summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/conf/NOTES6
-rw-r--r--sys/conf/files60
-rw-r--r--sys/conf/options4
-rw-r--r--sys/dev/raidframe/rf_acctrace.c174
-rw-r--r--sys/dev/raidframe/rf_acctrace.h134
-rw-r--r--sys/dev/raidframe/rf_alloclist.c190
-rw-r--r--sys/dev/raidframe/rf_alloclist.h60
-rw-r--r--sys/dev/raidframe/rf_archs.h75
-rw-r--r--sys/dev/raidframe/rf_aselect.c496
-rw-r--r--sys/dev/raidframe/rf_aselect.h43
-rw-r--r--sys/dev/raidframe/rf_bsd.h152
-rw-r--r--sys/dev/raidframe/rf_callback.c96
-rw-r--r--sys/dev/raidframe/rf_callback.h65
-rw-r--r--sys/dev/raidframe/rf_chaindecluster.c292
-rw-r--r--sys/dev/raidframe/rf_chaindecluster.h68
-rw-r--r--sys/dev/raidframe/rf_configure.h99
-rw-r--r--sys/dev/raidframe/rf_copyback.c433
-rw-r--r--sys/dev/raidframe/rf_copyback.h61
-rw-r--r--sys/dev/raidframe/rf_cvscan.c441
-rw-r--r--sys/dev/raidframe/rf_cvscan.h85
-rw-r--r--sys/dev/raidframe/rf_dag.h239
-rw-r--r--sys/dev/raidframe/rf_dagdegrd.c1132
-rw-r--r--sys/dev/raidframe/rf_dagdegrd.h64
-rw-r--r--sys/dev/raidframe/rf_dagdegwr.c846
-rw-r--r--sys/dev/raidframe/rf_dagdegwr.h55
-rw-r--r--sys/dev/raidframe/rf_dagffrd.c441
-rw-r--r--sys/dev/raidframe/rf_dagffrd.h53
-rw-r--r--sys/dev/raidframe/rf_dagffwr.c2131
-rw-r--r--sys/dev/raidframe/rf_dagffwr.h77
-rw-r--r--sys/dev/raidframe/rf_dagflags.h68
-rw-r--r--sys/dev/raidframe/rf_dagfuncs.c906
-rw-r--r--sys/dev/raidframe/rf_dagfuncs.h90
-rw-r--r--sys/dev/raidframe/rf_dagutils.c1299
-rw-r--r--sys/dev/raidframe/rf_dagutils.h121
-rw-r--r--sys/dev/raidframe/rf_debugMem.c208
-rw-r--r--sys/dev/raidframe/rf_debugMem.h88
-rw-r--r--sys/dev/raidframe/rf_debugprint.c136
-rw-r--r--sys/dev/raidframe/rf_debugprint.h44
-rw-r--r--sys/dev/raidframe/rf_decluster.c747
-rw-r--r--sys/dev/raidframe/rf_decluster.h141
-rw-r--r--sys/dev/raidframe/rf_declusterPQ.c493
-rw-r--r--sys/dev/raidframe/rf_declusterPQ.h52
-rw-r--r--sys/dev/raidframe/rf_desc.h113
-rw-r--r--sys/dev/raidframe/rf_diskqueue.c593
-rw-r--r--sys/dev/raidframe/rf_diskqueue.h208
-rw-r--r--sys/dev/raidframe/rf_disks.c1140
-rw-r--r--sys/dev/raidframe/rf_disks.h108
-rw-r--r--sys/dev/raidframe/rf_driver.c1050
-rw-r--r--sys/dev/raidframe/rf_driver.h79
-rw-r--r--sys/dev/raidframe/rf_engine.c812
-rw-r--r--sys/dev/raidframe/rf_engine.h48
-rw-r--r--sys/dev/raidframe/rf_etimer.h95
-rw-r--r--sys/dev/raidframe/rf_evenodd.c559
-rw-r--r--sys/dev/raidframe/rf_evenodd.h55
-rw-r--r--sys/dev/raidframe/rf_evenodd_dagfuncs.c977
-rw-r--r--sys/dev/raidframe/rf_evenodd_dagfuncs.h79
-rw-r--r--sys/dev/raidframe/rf_evenodd_dags.c191
-rw-r--r--sys/dev/raidframe/rf_evenodd_dags.h64
-rw-r--r--sys/dev/raidframe/rf_fifo.c238
-rw-r--r--sys/dev/raidframe/rf_fifo.h62
-rw-r--r--sys/dev/raidframe/rf_freebsdkintf.c3192
-rw-r--r--sys/dev/raidframe/rf_freelist.h702
-rw-r--r--sys/dev/raidframe/rf_general.h107
-rw-r--r--sys/dev/raidframe/rf_geniq.c165
-rw-r--r--sys/dev/raidframe/rf_hist.h57
-rw-r--r--sys/dev/raidframe/rf_interdecluster.c285
-rw-r--r--sys/dev/raidframe/rf_interdecluster.h60
-rw-r--r--sys/dev/raidframe/rf_invertq.c34
-rw-r--r--sys/dev/raidframe/rf_invertq.h64
-rw-r--r--sys/dev/raidframe/rf_kintf.h82
-rw-r--r--sys/dev/raidframe/rf_layout.c492
-rw-r--r--sys/dev/raidframe/rf_layout.h349
-rw-r--r--sys/dev/raidframe/rf_map.c909
-rw-r--r--sys/dev/raidframe/rf_map.h94
-rw-r--r--sys/dev/raidframe/rf_mcpair.c143
-rw-r--r--sys/dev/raidframe/rf_mcpair.h54
-rw-r--r--sys/dev/raidframe/rf_memchunk.c213
-rw-r--r--sys/dev/raidframe/rf_memchunk.h48
-rw-r--r--sys/dev/raidframe/rf_nwayxor.c451
-rw-r--r--sys/dev/raidframe/rf_nwayxor.h54
-rw-r--r--sys/dev/raidframe/rf_options.c78
-rw-r--r--sys/dev/raidframe/rf_options.h58
-rw-r--r--sys/dev/raidframe/rf_optnames.h105
-rw-r--r--sys/dev/raidframe/rf_paritylog.c871
-rw-r--r--sys/dev/raidframe/rf_paritylog.h181
-rw-r--r--sys/dev/raidframe/rf_paritylogDiskMgr.c703
-rw-r--r--sys/dev/raidframe/rf_paritylogDiskMgr.h42
-rw-r--r--sys/dev/raidframe/rf_paritylogging.c1076
-rw-r--r--sys/dev/raidframe/rf_paritylogging.h70
-rw-r--r--sys/dev/raidframe/rf_parityloggingdags.c675
-rw-r--r--sys/dev/raidframe/rf_parityloggingdags.h59
-rw-r--r--sys/dev/raidframe/rf_parityscan.c445
-rw-r--r--sys/dev/raidframe/rf_parityscan.h67
-rw-r--r--sys/dev/raidframe/rf_pq.c928
-rw-r--r--sys/dev/raidframe/rf_pq.h75
-rw-r--r--sys/dev/raidframe/rf_pqdeg.c219
-rw-r--r--sys/dev/raidframe/rf_pqdeg.h75
-rw-r--r--sys/dev/raidframe/rf_pqdegdags.c432
-rw-r--r--sys/dev/raidframe/rf_pqdegdags.h49
-rw-r--r--sys/dev/raidframe/rf_psstatus.c378
-rw-r--r--sys/dev/raidframe/rf_psstatus.h132
-rw-r--r--sys/dev/raidframe/rf_raid.h299
-rw-r--r--sys/dev/raidframe/rf_raid0.c163
-rw-r--r--sys/dev/raidframe/rf_raid0.h58
-rw-r--r--sys/dev/raidframe/rf_raid1.c691
-rw-r--r--sys/dev/raidframe/rf_raid1.h63
-rw-r--r--sys/dev/raidframe/rf_raid4.c159
-rw-r--r--sys/dev/raidframe/rf_raid4.h57
-rw-r--r--sys/dev/raidframe/rf_raid5.c322
-rw-r--r--sys/dev/raidframe/rf_raid5.h57
-rw-r--r--sys/dev/raidframe/rf_raid5_rotatedspare.c177
-rw-r--r--sys/dev/raidframe/rf_raid5_rotatedspare.h53
-rw-r--r--sys/dev/raidframe/rf_raidframe.h162
-rw-r--r--sys/dev/raidframe/rf_reconbuffer.c468
-rw-r--r--sys/dev/raidframe/rf_reconbuffer.h63
-rw-r--r--sys/dev/raidframe/rf_reconmap.c396
-rw-r--r--sys/dev/raidframe/rf_reconmap.h86
-rw-r--r--sys/dev/raidframe/rf_reconstruct.c1682
-rw-r--r--sys/dev/raidframe/rf_reconstruct.h202
-rw-r--r--sys/dev/raidframe/rf_reconutil.c338
-rw-r--r--sys/dev/raidframe/rf_reconutil.h52
-rw-r--r--sys/dev/raidframe/rf_revent.c230
-rw-r--r--sys/dev/raidframe/rf_revent.h52
-rw-r--r--sys/dev/raidframe/rf_shutdown.c104
-rw-r--r--sys/dev/raidframe/rf_shutdown.h67
-rw-r--r--sys/dev/raidframe/rf_sstf.c658
-rw-r--r--sys/dev/raidframe/rf_sstf.h70
-rw-r--r--sys/dev/raidframe/rf_states.c669
-rw-r--r--sys/dev/raidframe/rf_states.h48
-rw-r--r--sys/dev/raidframe/rf_stripelocks.c669
-rw-r--r--sys/dev/raidframe/rf_stripelocks.h130
-rw-r--r--sys/dev/raidframe/rf_strutils.c58
-rw-r--r--sys/dev/raidframe/rf_threadstuff.c223
-rw-r--r--sys/dev/raidframe/rf_threadstuff.h229
-rw-r--r--sys/dev/raidframe/rf_types.h247
-rw-r--r--sys/dev/raidframe/rf_utils.c149
-rw-r--r--sys/dev/raidframe/rf_utils.h70
-rw-r--r--sys/modules/raidframe/Makefile28
138 files changed, 0 insertions, 42458 deletions
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index f2e8ed5..cf5183c 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -1038,12 +1038,6 @@ device ccd #Concatenated disk driver
device vinum #Vinum concat/mirror/raid driver
options VINUMDEBUG #enable Vinum debugging hooks
-# RAIDframe device. RAID_AUTOCONFIG allows RAIDframe to search all of the
-# disk devices in the system looking for components that it recognizes (already
-# configured once before) and auto-configured them into arrays.
-device raidframe
-options RAID_AUTOCONFIG
-
# Kernel side iconv library
options LIBICONV
diff --git a/sys/conf/files b/sys/conf/files
index 339f7db..a905aea 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -606,66 +606,6 @@ dev/puc/puc_pci.c optional puc pci
dev/puc/puc_pccard.c optional puc pccard
dev/puc/puc_sbus.c optional puc sbus
dev/puc/pucdata.c optional puc pci
-dev/raidframe/rf_acctrace.c optional raidframe
-dev/raidframe/rf_alloclist.c optional raidframe
-dev/raidframe/rf_aselect.c optional raidframe
-dev/raidframe/rf_callback.c optional raidframe
-dev/raidframe/rf_chaindecluster.c optional raidframe
-dev/raidframe/rf_copyback.c optional raidframe
-dev/raidframe/rf_cvscan.c optional raidframe
-dev/raidframe/rf_dagdegrd.c optional raidframe
-dev/raidframe/rf_dagdegwr.c optional raidframe
-dev/raidframe/rf_dagffrd.c optional raidframe
-dev/raidframe/rf_dagffwr.c optional raidframe
-dev/raidframe/rf_dagfuncs.c optional raidframe
-dev/raidframe/rf_dagutils.c optional raidframe
-dev/raidframe/rf_debugMem.c optional raidframe
-dev/raidframe/rf_debugprint.c optional raidframe
-dev/raidframe/rf_decluster.c optional raidframe
-dev/raidframe/rf_declusterPQ.c optional raidframe
-dev/raidframe/rf_diskqueue.c optional raidframe
-dev/raidframe/rf_disks.c optional raidframe
-dev/raidframe/rf_driver.c optional raidframe
-dev/raidframe/rf_engine.c optional raidframe
-dev/raidframe/rf_evenodd.c optional raidframe
-dev/raidframe/rf_evenodd_dagfuncs.c optional raidframe
-dev/raidframe/rf_evenodd_dags.c optional raidframe
-dev/raidframe/rf_fifo.c optional raidframe
-dev/raidframe/rf_freebsdkintf.c optional raidframe
-dev/raidframe/rf_interdecluster.c optional raidframe
-dev/raidframe/rf_invertq.c optional raidframe
-dev/raidframe/rf_layout.c optional raidframe
-dev/raidframe/rf_map.c optional raidframe
-dev/raidframe/rf_mcpair.c optional raidframe
-dev/raidframe/rf_memchunk.c optional raidframe
-dev/raidframe/rf_nwayxor.c optional raidframe
-dev/raidframe/rf_options.c optional raidframe
-dev/raidframe/rf_paritylog.c optional raidframe
-dev/raidframe/rf_paritylogDiskMgr.c optional raidframe
-dev/raidframe/rf_paritylogging.c optional raidframe
-dev/raidframe/rf_parityloggingdags.c optional raidframe
-dev/raidframe/rf_parityscan.c optional raidframe
-dev/raidframe/rf_pq.c optional raidframe
-dev/raidframe/rf_pqdeg.c optional raidframe
-dev/raidframe/rf_pqdegdags.c optional raidframe
-dev/raidframe/rf_psstatus.c optional raidframe
-dev/raidframe/rf_raid0.c optional raidframe
-dev/raidframe/rf_raid1.c optional raidframe
-dev/raidframe/rf_raid4.c optional raidframe
-dev/raidframe/rf_raid5.c optional raidframe
-dev/raidframe/rf_raid5_rotatedspare.c optional raidframe
-dev/raidframe/rf_reconbuffer.c optional raidframe
-dev/raidframe/rf_reconmap.c optional raidframe
-dev/raidframe/rf_reconstruct.c optional raidframe
-dev/raidframe/rf_reconutil.c optional raidframe
-dev/raidframe/rf_revent.c optional raidframe
-dev/raidframe/rf_shutdown.c optional raidframe
-dev/raidframe/rf_sstf.c optional raidframe
-dev/raidframe/rf_states.c optional raidframe
-dev/raidframe/rf_stripelocks.c optional raidframe
-dev/raidframe/rf_strutils.c optional raidframe
-dev/raidframe/rf_threadstuff.c optional raidframe
-dev/raidframe/rf_utils.c optional raidframe
dev/random/harvest.c standard
dev/random/randomdev.c optional random
dev/random/yarrow.c optional random
diff --git a/sys/conf/options b/sys/conf/options
index f163ee3..4121e62 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -567,10 +567,6 @@ PCFCLOCK_VERBOSE opt_pcfclock.h
PCFCLOCK_MAX_RETRIES opt_pcfclock.h
TDFX_LINUX opt_tdfx.h
-# RAIDframe options
-RAID_AUTOCONFIG opt_raid.h
-RAID_DEBUG opt_raid.h
-
KTR opt_global.h
KTR_ALQ opt_ktr.h
KTR_MASK opt_ktr.h
diff --git a/sys/dev/raidframe/rf_acctrace.c b/sys/dev/raidframe/rf_acctrace.c
deleted file mode 100644
index 91c1b6d4..0000000
--- a/sys/dev/raidframe/rf_acctrace.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/* $NetBSD: rf_acctrace.c,v 1.4 1999/08/13 03:41:52 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*****************************************************************************
- *
- * acctrace.c -- code to support collecting information about each access
- *
- *****************************************************************************/
-
-#if defined(__FreeBSD__)
-#include <sys/types.h>
-#include <sys/time.h>
-#endif
-#include <sys/stat.h>
-#if defined(__NetBSD__)
-#include <sys/types.h>
-#endif
-
-#include <dev/raidframe/rf_threadstuff.h>
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_debugMem.h>
-#include <dev/raidframe/rf_acctrace.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_etimer.h>
-#include <dev/raidframe/rf_hist.h>
-#include <dev/raidframe/rf_shutdown.h>
-
-static long numTracesSoFar;
-static int accessTraceBufCount = 0;
-static RF_AccTraceEntry_t *access_tracebuf;
-static long traceCount;
-
-int rf_stopCollectingTraces;
-RF_DECLARE_MUTEX(rf_tracing_mutex)
- int rf_trace_fd;
-
- static void rf_ShutdownAccessTrace(void *);
-
- static void rf_ShutdownAccessTrace(ignored)
- void *ignored;
-{
- if (rf_accessTraceBufSize) {
- if (accessTraceBufCount)
- rf_FlushAccessTraceBuf();
- RF_Free(access_tracebuf, rf_accessTraceBufSize * sizeof(RF_AccTraceEntry_t));
- }
- rf_mutex_destroy(&rf_tracing_mutex);
-}
-
-int
-rf_ConfigureAccessTrace(listp)
- RF_ShutdownList_t **listp;
-{
- int rc;
-
- numTracesSoFar = accessTraceBufCount = rf_stopCollectingTraces = 0;
- if (rf_accessTraceBufSize) {
- RF_Malloc(access_tracebuf, rf_accessTraceBufSize * sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
- accessTraceBufCount = 0;
- }
- traceCount = 0;
- numTracesSoFar = 0;
- rc = rf_mutex_init(&rf_tracing_mutex, __FUNCTION__);
- if (rc) {
- RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- }
- rc = rf_ShutdownCreate(listp, rf_ShutdownAccessTrace, NULL);
- if (rc) {
- RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- if (rf_accessTraceBufSize) {
- RF_Free(access_tracebuf, rf_accessTraceBufSize * sizeof(RF_AccTraceEntry_t));
- rf_mutex_destroy(&rf_tracing_mutex);
- }
- }
- return (rc);
-}
-/* install a trace record. cause a flush to disk or to the trace collector daemon
- * if the trace buffer is at least 1/2 full.
- */
-void
-rf_LogTraceRec(raid, rec)
- RF_Raid_t *raid;
- RF_AccTraceEntry_t *rec;
-{
- RF_AccTotals_t *acc = &raid->acc_totals;
-#if 0
- RF_Etimer_t timer;
- int i, n;
-#endif
-
- if (rf_stopCollectingTraces || ((rf_maxNumTraces >= 0) && (numTracesSoFar >= rf_maxNumTraces)))
- return;
-
- /* update AccTotals for this device */
- if (!raid->keep_acc_totals)
- return;
- acc->num_log_ents++;
- if (rec->reconacc) {
- acc->recon_start_to_fetch_us += rec->specific.recon.recon_start_to_fetch_us;
- acc->recon_fetch_to_return_us += rec->specific.recon.recon_fetch_to_return_us;
- acc->recon_return_to_submit_us += rec->specific.recon.recon_return_to_submit_us;
- acc->recon_num_phys_ios += rec->num_phys_ios;
- acc->recon_phys_io_us += rec->phys_io_us;
- acc->recon_diskwait_us += rec->diskwait_us;
- acc->recon_reccount++;
- } else {
- RF_HIST_ADD(acc->tot_hist, rec->total_us);
- RF_HIST_ADD(acc->dw_hist, rec->diskwait_us);
- /* count of physical ios which are too big. often due to
- * thermal recalibration */
- /* if bigvals > 0, you should probably ignore this data set */
- if (rec->diskwait_us > 100000)
- acc->bigvals++;
- acc->total_us += rec->total_us;
- acc->suspend_ovhd_us += rec->specific.user.suspend_ovhd_us;
- acc->map_us += rec->specific.user.map_us;
- acc->lock_us += rec->specific.user.lock_us;
- acc->dag_create_us += rec->specific.user.dag_create_us;
- acc->dag_retry_us += rec->specific.user.dag_retry_us;
- acc->exec_us += rec->specific.user.exec_us;
- acc->cleanup_us += rec->specific.user.cleanup_us;
- acc->exec_engine_us += rec->specific.user.exec_engine_us;
- acc->xor_us += rec->xor_us;
- acc->q_us += rec->q_us;
- acc->plog_us += rec->plog_us;
- acc->diskqueue_us += rec->diskqueue_us;
- acc->diskwait_us += rec->diskwait_us;
- acc->num_phys_ios += rec->num_phys_ios;
- acc->phys_io_us = rec->phys_io_us;
- acc->user_reccount++;
- }
-}
-
-
-/* assumes the tracing mutex is locked at entry. In order to allow this to be called
- * from interrupt context, we don't do any copyouts here, but rather just wake trace
- * buffer collector thread.
- */
-void
-rf_FlushAccessTraceBuf()
-{
- accessTraceBufCount = 0;
-}
diff --git a/sys/dev/raidframe/rf_acctrace.h b/sys/dev/raidframe/rf_acctrace.h
deleted file mode 100644
index c211514..0000000
--- a/sys/dev/raidframe/rf_acctrace.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_acctrace.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*****************************************************************************
- *
- * acctrace.h -- header file for acctrace.c
- *
- *****************************************************************************/
-
-
-#ifndef _RF__RF_ACCTRACE_H_
-#define _RF__RF_ACCTRACE_H_
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_hist.h>
-#include <dev/raidframe/rf_etimer.h>
-
-typedef struct RF_user_acc_stats_s {
- RF_uint64 suspend_ovhd_us; /* us spent mucking in the
- * access-suspension code */
- RF_uint64 map_us; /* us spent mapping the access */
- RF_uint64 lock_us; /* us spent locking & unlocking stripes,
- * including time spent blocked */
- RF_uint64 dag_create_us;/* us spent creating the DAGs */
- RF_uint64 dag_retry_us; /* _total_ us spent retrying the op -- not
- * broken down into components */
- RF_uint64 exec_us; /* us spent in DispatchDAG */
- RF_uint64 exec_engine_us; /* us spent in engine, not including
- * blocking time */
- RF_uint64 cleanup_us; /* us spent tearing down the dag & maps, and
- * generally cleaning up */
-} RF_user_acc_stats_t;
-
-typedef struct RF_recon_acc_stats_s {
- RF_uint32 recon_start_to_fetch_us;
- RF_uint32 recon_fetch_to_return_us;
- RF_uint32 recon_return_to_submit_us;
-} RF_recon_acc_stats_t;
-
-typedef struct RF_acctrace_entry_s {
- union {
- RF_user_acc_stats_t user;
- RF_recon_acc_stats_t recon;
- } specific;
- RF_uint8 reconacc; /* whether this is a tracerec for a user acc
- * or a recon acc */
- RF_uint64 xor_us; /* us spent doing XORs */
- RF_uint64 q_us; /* us spent doing XORs */
- RF_uint64 plog_us; /* us spent waiting to stuff parity into log */
- RF_uint64 diskqueue_us; /* _total_ us spent in disk queue(s), incl
- * concurrent ops */
- RF_uint64 diskwait_us; /* _total_ us spent waiting actually waiting
- * on the disk, incl concurrent ops */
- RF_uint64 total_us; /* total us spent on this access */
- RF_uint64 num_phys_ios; /* number of physical I/Os invoked */
- RF_uint64 phys_io_us; /* time of physical I/O */
- RF_Etimer_t tot_timer; /* a timer used to compute total access time */
- RF_Etimer_t timer; /* a generic timer val for timing events that
- * live across procedure boundaries */
- RF_Etimer_t recon_timer;/* generic timer for recon stuff */
- RF_uint64 index;
-} RF_AccTraceEntry_t;
-
-typedef struct RF_AccTotals_s {
- /* user acc stats */
- RF_uint64 suspend_ovhd_us;
- RF_uint64 map_us;
- RF_uint64 lock_us;
- RF_uint64 dag_create_us;
- RF_uint64 dag_retry_us;
- RF_uint64 exec_us;
- RF_uint64 exec_engine_us;
- RF_uint64 cleanup_us;
- RF_uint64 user_reccount;
- /* recon acc stats */
- RF_uint64 recon_start_to_fetch_us;
- RF_uint64 recon_fetch_to_return_us;
- RF_uint64 recon_return_to_submit_us;
- RF_uint64 recon_io_overflow_count;
- RF_uint64 recon_phys_io_us;
- RF_uint64 recon_num_phys_ios;
- RF_uint64 recon_diskwait_us;
- RF_uint64 recon_reccount;
- /* trace entry stats */
- RF_uint64 xor_us;
- RF_uint64 q_us;
- RF_uint64 plog_us;
- RF_uint64 diskqueue_us;
- RF_uint64 diskwait_us;
- RF_uint64 total_us;
- RF_uint64 num_log_ents;
- RF_uint64 phys_io_overflow_count;
- RF_uint64 num_phys_ios;
- RF_uint64 phys_io_us;
- RF_uint64 bigvals;
- /* histograms */
- RF_Hist_t dw_hist[RF_HIST_NUM_BUCKETS];
- RF_Hist_t tot_hist[RF_HIST_NUM_BUCKETS];
-} RF_AccTotals_t;
-#if RF_UTILITY == 0
-RF_DECLARE_EXTERN_MUTEX(rf_tracing_mutex)
-#endif /* RF_UTILITY == 0 */
-
- int rf_ConfigureAccessTrace(RF_ShutdownList_t ** listp);
- void rf_LogTraceRec(RF_Raid_t * raid, RF_AccTraceEntry_t * rec);
- void rf_FlushAccessTraceBuf(void);
-
-#endif /* !_RF__RF_ACCTRACE_H_ */
diff --git a/sys/dev/raidframe/rf_alloclist.c b/sys/dev/raidframe/rf_alloclist.c
deleted file mode 100644
index 8c8b837..0000000
--- a/sys/dev/raidframe/rf_alloclist.c
+++ /dev/null
@@ -1,190 +0,0 @@
-/* $NetBSD: rf_alloclist.c,v 1.4 1999/08/13 03:41:53 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/****************************************************************************
- *
- * Alloclist.c -- code to manipulate allocation lists
- *
- * an allocation list is just a list of AllocListElem structures. Each
- * such structure contains a fixed-size array of pointers. Calling
- * FreeAList() causes each pointer to be freed.
- *
- ***************************************************************************/
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_threadstuff.h>
-#include <dev/raidframe/rf_alloclist.h>
-#include <dev/raidframe/rf_debugMem.h>
-#include <dev/raidframe/rf_etimer.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_shutdown.h>
-
-RF_DECLARE_STATIC_MUTEX(alist_mutex)
- static unsigned int fl_hit_count, fl_miss_count;
-
- static RF_AllocListElem_t *al_free_list = NULL;
- static int al_free_list_count;
-
-#define RF_AL_FREELIST_MAX 256
-
-#define DO_FREE(_p,_sz) RF_Free((_p),(_sz))
-
- static void rf_ShutdownAllocList(void *);
-
- static void rf_ShutdownAllocList(ignored)
- void *ignored;
-{
- RF_AllocListElem_t *p, *pt;
-
- for (p = al_free_list; p;) {
- pt = p;
- p = p->next;
- DO_FREE(pt, sizeof(*pt));
- }
- rf_mutex_destroy(&alist_mutex);
- /*
- printf("Alloclist: Free list hit count %lu (%lu %%) miss count %lu (%lu %%)\n",
- fl_hit_count, (100*fl_hit_count)/(fl_hit_count+fl_miss_count),
- fl_miss_count, (100*fl_miss_count)/(fl_hit_count+fl_miss_count));
- */
-}
-
-int
-rf_ConfigureAllocList(listp)
- RF_ShutdownList_t **listp;
-{
- int rc;
-
- rc = rf_mutex_init(&alist_mutex, __FUNCTION__);
- if (rc) {
- RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- return (rc);
- }
- al_free_list = NULL;
- fl_hit_count = fl_miss_count = al_free_list_count = 0;
- rc = rf_ShutdownCreate(listp, rf_ShutdownAllocList, NULL);
- if (rc) {
- RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n",
- __FILE__, __LINE__, rc);
- rf_mutex_destroy(&alist_mutex);
- return (rc);
- }
- return (0);
-}
-
-
-/* we expect the lists to have at most one or two elements, so we're willing
- * to search for the end. If you ever observe the lists growing longer,
- * increase POINTERS_PER_ALLOC_LIST_ELEMENT.
- */
-void
-rf_real_AddToAllocList(l, p, size, lockflag)
- RF_AllocListElem_t *l;
- void *p;
- int size;
- int lockflag;
-{
- RF_AllocListElem_t *newelem;
-
- for (; l->next; l = l->next)
- RF_ASSERT(l->numPointers == RF_POINTERS_PER_ALLOC_LIST_ELEMENT); /* find end of list */
-
- RF_ASSERT(l->numPointers >= 0 && l->numPointers <= RF_POINTERS_PER_ALLOC_LIST_ELEMENT);
- if (l->numPointers == RF_POINTERS_PER_ALLOC_LIST_ELEMENT) {
- newelem = rf_real_MakeAllocList(lockflag);
- l->next = newelem;
- l = newelem;
- }
- l->pointers[l->numPointers] = p;
- l->sizes[l->numPointers] = size;
- l->numPointers++;
-
-}
-
-
-/* we use the debug_mem_mutex here because we need to lock it anyway to call free.
- * this is probably a bug somewhere else in the code, but when I call malloc/free
- * outside of any lock I have endless trouble with malloc appearing to return the
- * same pointer twice. Since we have to lock it anyway, we might as well use it
- * as the lock around the al_free_list. Note that we can't call Free with the
- * debug_mem_mutex locked.
- */
-void
-rf_FreeAllocList(l)
- RF_AllocListElem_t *l;
-{
- int i;
- RF_AllocListElem_t *temp, *p;
-
- for (p = l; p; p = p->next) {
- RF_ASSERT(p->numPointers >= 0 && p->numPointers <= RF_POINTERS_PER_ALLOC_LIST_ELEMENT);
- for (i = 0; i < p->numPointers; i++) {
- RF_ASSERT(p->pointers[i]);
- RF_Free(p->pointers[i], p->sizes[i]);
- }
- }
- while (l) {
- temp = l;
- l = l->next;
- if (al_free_list_count > RF_AL_FREELIST_MAX) {
- DO_FREE(temp, sizeof(*temp));
- } else {
- temp->next = al_free_list;
- al_free_list = temp;
- al_free_list_count++;
- }
- }
-}
-
-RF_AllocListElem_t *
-rf_real_MakeAllocList(lockflag)
- int lockflag;
-{
- RF_AllocListElem_t *p;
-
- if (al_free_list) {
- fl_hit_count++;
- p = al_free_list;
- al_free_list = p->next;
- al_free_list_count--;
- } else {
- fl_miss_count++;
- RF_Malloc(p, sizeof(RF_AllocListElem_t), (RF_AllocListElem_t *)); /* no allocation locking
- * in kernel, so this is
- * fine */
- }
- if (p == NULL) {
- return (NULL);
- }
- bzero((char *) p, sizeof(RF_AllocListElem_t));
- return (p);
-}
diff --git a/sys/dev/raidframe/rf_alloclist.h b/sys/dev/raidframe/rf_alloclist.h
deleted file mode 100644
index c746452..0000000
--- a/sys/dev/raidframe/rf_alloclist.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_alloclist.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/****************************************************************************
- *
- * alloclist.h -- header file for alloclist.c
- *
- ***************************************************************************/
-
-#ifndef _RF__RF_ALLOCLIST_H_
-#define _RF__RF_ALLOCLIST_H_
-
-#include <dev/raidframe/rf_types.h>
-
-#define RF_POINTERS_PER_ALLOC_LIST_ELEMENT 20
-
-struct RF_AllocListElem_s {
- void *pointers[RF_POINTERS_PER_ALLOC_LIST_ELEMENT];
- int sizes[RF_POINTERS_PER_ALLOC_LIST_ELEMENT];
- int numPointers;
- RF_AllocListElem_t *next;
-};
-#define rf_MakeAllocList(_ptr_) _ptr_ = rf_real_MakeAllocList(1);
-#define rf_AddToAllocList(_l_,_ptr_,_sz_) rf_real_AddToAllocList((_l_), (_ptr_), (_sz_), 1)
-
-int rf_ConfigureAllocList(RF_ShutdownList_t ** listp);
-
-#if RF_UTILITY == 0
-void rf_real_AddToAllocList(RF_AllocListElem_t * l, void *p, int size, int lockflag);
-void rf_FreeAllocList(RF_AllocListElem_t * l);
-RF_AllocListElem_t *rf_real_MakeAllocList(int lockflag);
-#endif /* RF_UTILITY == 0 */
-
-#endif /* !_RF__RF_ALLOCLIST_H_ */
diff --git a/sys/dev/raidframe/rf_archs.h b/sys/dev/raidframe/rf_archs.h
deleted file mode 100644
index faef157..0000000
--- a/sys/dev/raidframe/rf_archs.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_archs.h,v 1.11 2001/01/26 04:43:16 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/* rf_archs.h -- defines for which architectures you want to
- * include is some particular build of raidframe. Unfortunately,
- * it's difficult to exclude declustering, P+Q, and distributed
- * sparing because the code is intermixed with RAID5 code. This
- * should be fixed.
- *
- * this is really intended only for use in the kernel, where I
- * am worried about the size of the object module. At user level and
- * in the simulator, I don't really care that much, so all the
- * architectures can be compiled together. Note that by itself, turning
- * off these defines does not affect the size of the executable; you
- * have to edit the makefile for that.
- *
- * comment out any line below to eliminate that architecture.
- * the list below includes all the modules that can be compiled
- * out.
- *
- */
-
-#ifndef _RF__RF_ARCHS_H_
-#define _RF__RF_ARCHS_H_
-
-#define RF_INCLUDE_EVENODD 1
-
-#define RF_INCLUDE_RAID5_RS 1
-#define RF_INCLUDE_PARITYLOGGING 1
-
-#define RF_INCLUDE_CHAINDECLUSTER 1
-#define RF_INCLUDE_INTERDECLUSTER 1
-
-#define RF_INCLUDE_PARITY_DECLUSTERING 1
-#define RF_INCLUDE_PARITY_DECLUSTERING_DS 1
-
-#define RF_INCLUDE_RAID0 1
-#define RF_INCLUDE_RAID1 1
-#define RF_INCLUDE_RAID4 1
-#define RF_INCLUDE_RAID5 1
-#define RF_INCLUDE_RAID6 0
-#define RF_INCLUDE_DECL_PQ 0
-
-#define RF_MEMORY_REDZONES 0
-#define RF_RECON_STATS 1
-
-#include <dev/raidframe/rf_options.h>
-
-#endif /* !_RF__RF_ARCHS_H_ */
diff --git a/sys/dev/raidframe/rf_aselect.c b/sys/dev/raidframe/rf_aselect.c
deleted file mode 100644
index 13cdbbe..0000000
--- a/sys/dev/raidframe/rf_aselect.c
+++ /dev/null
@@ -1,496 +0,0 @@
-/* $NetBSD: rf_aselect.c,v 1.3 1999/02/05 00:06:06 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland, William V. Courtright II
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*****************************************************************************
- *
- * aselect.c -- algorithm selection code
- *
- *****************************************************************************/
-
-#include <dev/raidframe/rf_archs.h>
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_dagutils.h>
-#include <dev/raidframe/rf_dagfuncs.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_desc.h>
-#include <dev/raidframe/rf_map.h>
-
-#if defined(__NetBSD__) || defined(__FreeBSD__) && defined(_KERNEL)
-/* the function below is not used... so don't define it! */
-#else
-static void TransferDagMemory(RF_DagHeader_t *, RF_DagHeader_t *);
-#endif
-
-static int InitHdrNode(RF_DagHeader_t **, RF_Raid_t *, int);
-static void UpdateNodeHdrPtr(RF_DagHeader_t *, RF_DagNode_t *);
-int rf_SelectAlgorithm(RF_RaidAccessDesc_t *, RF_RaidAccessFlags_t);
-
-
-/******************************************************************************
- *
- * Create and Initialiaze a dag header and termination node
- *
- *****************************************************************************/
-static int
-InitHdrNode(hdr, raidPtr, memChunkEnable)
- RF_DagHeader_t **hdr;
- RF_Raid_t *raidPtr;
- int memChunkEnable;
-{
- /* create and initialize dag hdr */
- *hdr = rf_AllocDAGHeader();
- rf_MakeAllocList((*hdr)->allocList);
- if ((*hdr)->allocList == NULL) {
- rf_FreeDAGHeader(*hdr);
- return (ENOMEM);
- }
- (*hdr)->status = rf_enable;
- (*hdr)->numSuccedents = 0;
- (*hdr)->raidPtr = raidPtr;
- (*hdr)->next = NULL;
- return (0);
-}
-/******************************************************************************
- *
- * Transfer allocation list and mem chunks from one dag to another
- *
- *****************************************************************************/
-#if defined(__NetBSD__) || defined(__FreeBSD__) && defined(_KERNEL)
-/* the function below is not used... so don't define it! */
-#else
-static void
-TransferDagMemory(daga, dagb)
- RF_DagHeader_t *daga;
- RF_DagHeader_t *dagb;
-{
- RF_AccessStripeMapHeader_t *end;
- RF_AllocListElem_t *p;
- int i, memChunksXfrd = 0, xtraChunksXfrd = 0;
-
- /* transfer allocList from dagb to daga */
- for (p = dagb->allocList; p; p = p->next) {
- for (i = 0; i < p->numPointers; i++) {
- rf_AddToAllocList(daga->allocList, p->pointers[i], p->sizes[i]);
- p->pointers[i] = NULL;
- p->sizes[i] = 0;
- }
- p->numPointers = 0;
- }
-
- /* transfer chunks from dagb to daga */
- while ((memChunksXfrd + xtraChunksXfrd < dagb->chunkIndex + dagb->xtraChunkIndex) && (daga->chunkIndex < RF_MAXCHUNKS)) {
- /* stuff chunks into daga's memChunk array */
- if (memChunksXfrd < dagb->chunkIndex) {
- daga->memChunk[daga->chunkIndex++] = dagb->memChunk[memChunksXfrd];
- dagb->memChunk[memChunksXfrd++] = NULL;
- } else {
- daga->memChunk[daga->xtraChunkIndex++] = dagb->xtraMemChunk[xtraChunksXfrd];
- dagb->xtraMemChunk[xtraChunksXfrd++] = NULL;
- }
- }
- /* use escape hatch to hold excess chunks */
- while (memChunksXfrd + xtraChunksXfrd < dagb->chunkIndex + dagb->xtraChunkIndex) {
- if (memChunksXfrd < dagb->chunkIndex) {
- daga->xtraMemChunk[daga->xtraChunkIndex++] = dagb->memChunk[memChunksXfrd];
- dagb->memChunk[memChunksXfrd++] = NULL;
- } else {
- daga->xtraMemChunk[daga->xtraChunkIndex++] = dagb->xtraMemChunk[xtraChunksXfrd];
- dagb->xtraMemChunk[xtraChunksXfrd++] = NULL;
- }
- }
- RF_ASSERT((memChunksXfrd == dagb->chunkIndex) && (xtraChunksXfrd == dagb->xtraChunkIndex));
- RF_ASSERT(daga->chunkIndex <= RF_MAXCHUNKS);
- RF_ASSERT(daga->xtraChunkIndex <= daga->xtraChunkCnt);
- dagb->chunkIndex = 0;
- dagb->xtraChunkIndex = 0;
-
- /* transfer asmList from dagb to daga */
- if (dagb->asmList) {
- if (daga->asmList) {
- end = daga->asmList;
- while (end->next)
- end = end->next;
- end->next = dagb->asmList;
- } else
- daga->asmList = dagb->asmList;
- dagb->asmList = NULL;
- }
-}
-#endif /* __NetBSD__ */
-
-/*****************************************************************************************
- *
- * Ensure that all node->dagHdr fields in a dag are consistent
- *
- * IMPORTANT: This routine recursively searches all succedents of the node. If a
- * succedent is encountered whose dagHdr ptr does not require adjusting, that node's
- * succedents WILL NOT BE EXAMINED.
- *
- ****************************************************************************************/
-static void
-UpdateNodeHdrPtr(hdr, node)
- RF_DagHeader_t *hdr;
- RF_DagNode_t *node;
-{
- int i;
- RF_ASSERT(hdr != NULL && node != NULL);
- for (i = 0; i < node->numSuccedents; i++)
- if (node->succedents[i]->dagHdr != hdr)
- UpdateNodeHdrPtr(hdr, node->succedents[i]);
- node->dagHdr = hdr;
-}
-/******************************************************************************
- *
- * Create a DAG to do a read or write operation.
- *
- * create an array of dagLists, one list per parity stripe.
- * return the lists in the array desc->dagArray.
- *
- * Normally, each list contains one dag for the entire stripe. In some
- * tricky cases, we break this into multiple dags, either one per stripe
- * unit or one per block (sector). When this occurs, these dags are returned
- * as a linked list (dagList) which is executed sequentially (to preserve
- * atomic parity updates in the stripe).
- *
- * dags which operate on independent parity goups (stripes) are returned in
- * independent dagLists (distinct elements in desc->dagArray) and may be
- * executed concurrently.
- *
- * Finally, if the SelectionFunc fails to create a dag for a block, we punt
- * and return 1.
- *
- * The above process is performed in two phases:
- * 1) create an array(s) of creation functions (eg stripeFuncs)
- * 2) create dags and concatenate/merge to form the final dag.
- *
- * Because dag's are basic blocks (single entry, single exit, unconditional
- * control flow, we can add the following optimizations (future work):
- * first-pass optimizer to allow max concurrency (need all data dependencies)
- * second-pass optimizer to eliminate common subexpressions (need true
- * data dependencies)
- * third-pass optimizer to eliminate dead code (need true data dependencies)
- *****************************************************************************/
-
-#define MAXNSTRIPES 5
-
-int
-rf_SelectAlgorithm(desc, flags)
- RF_RaidAccessDesc_t *desc;
- RF_RaidAccessFlags_t flags;
-{
- RF_AccessStripeMapHeader_t *asm_h = desc->asmap;
- RF_IoType_t type = desc->type;
- RF_Raid_t *raidPtr = desc->raidPtr;
- void *bp = desc->bp;
-
- RF_AccessStripeMap_t *asmap = asm_h->stripeMap;
- RF_AccessStripeMap_t *asm_p;
- RF_DagHeader_t *dag_h = NULL, *tempdag_h, *lastdag_h;
- int i, j, k;
- RF_VoidFuncPtr *stripeFuncs, normalStripeFuncs[MAXNSTRIPES];
- RF_AccessStripeMap_t *asm_up, *asm_bp;
- RF_AccessStripeMapHeader_t ***asmh_u, *endASMList;
- RF_AccessStripeMapHeader_t ***asmh_b;
- RF_VoidFuncPtr **stripeUnitFuncs, uFunc;
- RF_VoidFuncPtr **blockFuncs, bFunc;
- int numStripesBailed = 0, cantCreateDAGs = RF_FALSE;
- int numStripeUnitsBailed = 0;
- int stripeNum, numUnitDags = 0, stripeUnitNum, numBlockDags = 0;
- RF_StripeNum_t numStripeUnits;
- RF_SectorNum_t numBlocks;
- RF_RaidAddr_t address;
- int length;
- RF_PhysDiskAddr_t *physPtr;
- caddr_t buffer;
-
- lastdag_h = NULL;
- asmh_u = asmh_b = NULL;
- stripeUnitFuncs = NULL;
- blockFuncs = NULL;
-
- /* get an array of dag-function creation pointers, try to avoid
- * calling malloc */
- if (asm_h->numStripes <= MAXNSTRIPES)
- stripeFuncs = normalStripeFuncs;
- else
- RF_Calloc(stripeFuncs, asm_h->numStripes, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr *));
-
- /* walk through the asm list once collecting information */
- /* attempt to find a single creation function for each stripe */
- desc->numStripes = 0;
- for (i = 0, asm_p = asmap; asm_p; asm_p = asm_p->next, i++) {
- desc->numStripes++;
- (raidPtr->Layout.map->SelectionFunc) (raidPtr, type, asm_p, &stripeFuncs[i]);
- /* check to see if we found a creation func for this stripe */
- if (stripeFuncs[i] == (RF_VoidFuncPtr) NULL) {
- /* could not find creation function for entire stripe
- * so, let's see if we can find one for each stripe
- * unit in the stripe */
-
- if (numStripesBailed == 0) {
- /* one stripe map header for each stripe we
- * bail on */
- RF_Malloc(asmh_u, sizeof(RF_AccessStripeMapHeader_t **) * asm_h->numStripes, (RF_AccessStripeMapHeader_t ***));
- /* create an array of ptrs to arrays of
- * stripeFuncs */
- RF_Calloc(stripeUnitFuncs, asm_h->numStripes, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr **));
- }
- /* create an array of creation funcs (called
- * stripeFuncs) for this stripe */
- numStripeUnits = asm_p->numStripeUnitsAccessed;
- RF_Calloc(stripeUnitFuncs[numStripesBailed], numStripeUnits, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr *));
- RF_Malloc(asmh_u[numStripesBailed], numStripeUnits * sizeof(RF_AccessStripeMapHeader_t *), (RF_AccessStripeMapHeader_t **));
-
- /* lookup array of stripeUnitFuncs for this stripe */
- for (j = 0, physPtr = asm_p->physInfo; physPtr; physPtr = physPtr->next, j++) {
- /* remap for series of single stripe-unit
- * accesses */
- address = physPtr->raidAddress;
- length = physPtr->numSector;
- buffer = physPtr->bufPtr;
-
- asmh_u[numStripesBailed][j] = rf_MapAccess(raidPtr, address, length, buffer, RF_DONT_REMAP);
- asm_up = asmh_u[numStripesBailed][j]->stripeMap;
-
- /* get the creation func for this stripe unit */
- (raidPtr->Layout.map->SelectionFunc) (raidPtr, type, asm_up, &(stripeUnitFuncs[numStripesBailed][j]));
-
- /* check to see if we found a creation func
- * for this stripe unit */
- if (stripeUnitFuncs[numStripesBailed][j] == (RF_VoidFuncPtr) NULL) {
- /* could not find creation function
- * for stripe unit so, let's see if we
- * can find one for each block in the
- * stripe unit */
- if (numStripeUnitsBailed == 0) {
- /* one stripe map header for
- * each stripe unit we bail on */
- RF_Malloc(asmh_b, sizeof(RF_AccessStripeMapHeader_t **) * asm_h->numStripes * raidPtr->Layout.numDataCol, (RF_AccessStripeMapHeader_t ***));
- /* create an array of ptrs to
- * arrays of blockFuncs */
- RF_Calloc(blockFuncs, asm_h->numStripes * raidPtr->Layout.numDataCol, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr **));
- }
- /* create an array of creation funcs
- * (called blockFuncs) for this stripe
- * unit */
- numBlocks = physPtr->numSector;
- numBlockDags += numBlocks;
- RF_Calloc(blockFuncs[numStripeUnitsBailed], numBlocks, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr *));
- RF_Malloc(asmh_b[numStripeUnitsBailed], numBlocks * sizeof(RF_AccessStripeMapHeader_t *), (RF_AccessStripeMapHeader_t **));
-
- /* lookup array of blockFuncs for this
- * stripe unit */
- for (k = 0; k < numBlocks; k++) {
- /* remap for series of single
- * stripe-unit accesses */
- address = physPtr->raidAddress + k;
- length = 1;
- buffer = physPtr->bufPtr + (k * (1 << raidPtr->logBytesPerSector));
-
- asmh_b[numStripeUnitsBailed][k] = rf_MapAccess(raidPtr, address, length, buffer, RF_DONT_REMAP);
- asm_bp = asmh_b[numStripeUnitsBailed][k]->stripeMap;
-
- /* get the creation func for
- * this stripe unit */
- (raidPtr->Layout.map->SelectionFunc) (raidPtr, type, asm_bp, &(blockFuncs[numStripeUnitsBailed][k]));
-
- /* check to see if we found a
- * creation func for this
- * stripe unit */
- if (blockFuncs[numStripeUnitsBailed][k] == NULL)
- cantCreateDAGs = RF_TRUE;
- }
- numStripeUnitsBailed++;
- } else {
- numUnitDags++;
- }
- }
- RF_ASSERT(j == numStripeUnits);
- numStripesBailed++;
- }
- }
-
- if (cantCreateDAGs) {
- /* free memory and punt */
- if (asm_h->numStripes > MAXNSTRIPES)
- RF_Free(stripeFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr));
- if (numStripesBailed > 0) {
- stripeNum = 0;
- for (i = 0, asm_p = asmap; asm_p; asm_p = asm_p->next, i++)
- if (stripeFuncs[i] == NULL) {
- numStripeUnits = asm_p->numStripeUnitsAccessed;
- for (j = 0; j < numStripeUnits; j++)
- rf_FreeAccessStripeMap(asmh_u[stripeNum][j]);
- RF_Free(asmh_u[stripeNum], numStripeUnits * sizeof(RF_AccessStripeMapHeader_t *));
- RF_Free(stripeUnitFuncs[stripeNum], numStripeUnits * sizeof(RF_VoidFuncPtr));
- stripeNum++;
- }
- RF_ASSERT(stripeNum == numStripesBailed);
- RF_Free(stripeUnitFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr));
- RF_Free(asmh_u, asm_h->numStripes * sizeof(RF_AccessStripeMapHeader_t **));
- }
- return (1);
- } else {
- /* begin dag creation */
- stripeNum = 0;
- stripeUnitNum = 0;
-
- /* create an array of dagLists and fill them in */
- RF_CallocAndAdd(desc->dagArray, desc->numStripes, sizeof(RF_DagList_t), (RF_DagList_t *), desc->cleanupList);
-
- for (i = 0, asm_p = asmap; asm_p; asm_p = asm_p->next, i++) {
- /* grab dag header for this stripe */
- dag_h = NULL;
- desc->dagArray[i].desc = desc;
-
- if (stripeFuncs[i] == (RF_VoidFuncPtr) NULL) {
- /* use bailout functions for this stripe */
- for (j = 0, physPtr = asm_p->physInfo; physPtr; physPtr = physPtr->next, j++) {
- uFunc = stripeUnitFuncs[stripeNum][j];
- if (uFunc == (RF_VoidFuncPtr) NULL) {
- /* use bailout functions for
- * this stripe unit */
- for (k = 0; k < physPtr->numSector; k++) {
- /* create a dag for
- * this block */
- InitHdrNode(&tempdag_h, raidPtr, rf_useMemChunks);
- desc->dagArray[i].numDags++;
- if (dag_h == NULL) {
- dag_h = tempdag_h;
- } else {
- lastdag_h->next = tempdag_h;
- }
- lastdag_h = tempdag_h;
-
- bFunc = blockFuncs[stripeUnitNum][k];
- RF_ASSERT(bFunc);
- asm_bp = asmh_b[stripeUnitNum][k]->stripeMap;
- (*bFunc) (raidPtr, asm_bp, tempdag_h, bp, flags, tempdag_h->allocList);
- }
- stripeUnitNum++;
- } else {
- /* create a dag for this unit */
- InitHdrNode(&tempdag_h, raidPtr, rf_useMemChunks);
- desc->dagArray[i].numDags++;
- if (dag_h == NULL) {
- dag_h = tempdag_h;
- } else {
- lastdag_h->next = tempdag_h;
- }
- lastdag_h = tempdag_h;
-
- asm_up = asmh_u[stripeNum][j]->stripeMap;
- (*uFunc) (raidPtr, asm_up, tempdag_h, bp, flags, tempdag_h->allocList);
- }
- }
- RF_ASSERT(j == asm_p->numStripeUnitsAccessed);
- /* merge linked bailout dag to existing dag
- * collection */
- stripeNum++;
- } else {
- /* Create a dag for this parity stripe */
- InitHdrNode(&tempdag_h, raidPtr, rf_useMemChunks);
- desc->dagArray[i].numDags++;
- if (dag_h == NULL) {
- dag_h = tempdag_h;
- } else {
- lastdag_h->next = tempdag_h;
- }
- lastdag_h = tempdag_h;
-
- (stripeFuncs[i]) (raidPtr, asm_p, tempdag_h, bp, flags, tempdag_h->allocList);
- }
- desc->dagArray[i].dags = dag_h;
- }
- RF_ASSERT(i == desc->numStripes);
-
- /* free memory */
- if (asm_h->numStripes > MAXNSTRIPES)
- RF_Free(stripeFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr));
- if ((numStripesBailed > 0) || (numStripeUnitsBailed > 0)) {
- stripeNum = 0;
- stripeUnitNum = 0;
- if (dag_h->asmList) {
- endASMList = dag_h->asmList;
- while (endASMList->next)
- endASMList = endASMList->next;
- } else
- endASMList = NULL;
- /* walk through io, stripe by stripe */
- for (i = 0, asm_p = asmap; asm_p; asm_p = asm_p->next, i++)
- if (stripeFuncs[i] == NULL) {
- numStripeUnits = asm_p->numStripeUnitsAccessed;
- /* walk through stripe, stripe unit by
- * stripe unit */
- for (j = 0, physPtr = asm_p->physInfo; physPtr; physPtr = physPtr->next, j++) {
- if (stripeUnitFuncs[stripeNum][j] == NULL) {
- numBlocks = physPtr->numSector;
- /* walk through stripe
- * unit, block by
- * block */
- for (k = 0; k < numBlocks; k++)
- if (dag_h->asmList == NULL) {
- dag_h->asmList = asmh_b[stripeUnitNum][k];
- endASMList = dag_h->asmList;
- } else {
- endASMList->next = asmh_b[stripeUnitNum][k];
- endASMList = endASMList->next;
- }
- RF_Free(asmh_b[stripeUnitNum], numBlocks * sizeof(RF_AccessStripeMapHeader_t *));
- RF_Free(blockFuncs[stripeUnitNum], numBlocks * sizeof(RF_VoidFuncPtr));
- stripeUnitNum++;
- }
- if (dag_h->asmList == NULL) {
- dag_h->asmList = asmh_u[stripeNum][j];
- endASMList = dag_h->asmList;
- } else {
- endASMList->next = asmh_u[stripeNum][j];
- endASMList = endASMList->next;
- }
- }
- RF_Free(asmh_u[stripeNum], numStripeUnits * sizeof(RF_AccessStripeMapHeader_t *));
- RF_Free(stripeUnitFuncs[stripeNum], numStripeUnits * sizeof(RF_VoidFuncPtr));
- stripeNum++;
- }
- RF_ASSERT(stripeNum == numStripesBailed);
- RF_Free(stripeUnitFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr));
- RF_Free(asmh_u, asm_h->numStripes * sizeof(RF_AccessStripeMapHeader_t **));
- if (numStripeUnitsBailed > 0) {
- RF_ASSERT(stripeUnitNum == numStripeUnitsBailed);
- RF_Free(blockFuncs, raidPtr->Layout.numDataCol * asm_h->numStripes * sizeof(RF_VoidFuncPtr));
- RF_Free(asmh_b, raidPtr->Layout.numDataCol * asm_h->numStripes * sizeof(RF_AccessStripeMapHeader_t **));
- }
- }
- return (0);
- }
-}
diff --git a/sys/dev/raidframe/rf_aselect.h b/sys/dev/raidframe/rf_aselect.h
deleted file mode 100644
index de9cd76..0000000
--- a/sys/dev/raidframe/rf_aselect.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_aselect.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland, William V. Courtright II
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*****************************************************************************
- *
- * aselect.h -- header file for algorithm selection code
- *
- *****************************************************************************/
-
-#ifndef _RF__RF_ASELECT_H_
-#define _RF__RF_ASELECT_H_
-
-#include <dev/raidframe/rf_desc.h>
-
-int rf_SelectAlgorithm(RF_RaidAccessDesc_t * desc, RF_RaidAccessFlags_t flags);
-
-#endif /* !_RF__RF_ASELECT_H_ */
diff --git a/sys/dev/raidframe/rf_bsd.h b/sys/dev/raidframe/rf_bsd.h
deleted file mode 100644
index 14c10f5..0000000
--- a/sys/dev/raidframe/rf_bsd.h
+++ /dev/null
@@ -1,152 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_netbsd.h,v 1.12 2000/05/28 22:53:49 oster Exp $ */
-
-/*-
- * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
- * All rights reserved.
- *
- * This code is derived from software contributed to The NetBSD Foundation
- * by Greg Oster; Jason R. Thorpe.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the NetBSD
- * Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _RF__RF_BSD_H_
-#define _RF__RF_BSD_H_
-
-#ifdef _KERNEL
-#include <sys/fcntl.h>
-#include <sys/systm.h>
-#include <sys/namei.h>
-#include <sys/vnode.h>
-#include "opt_raid.h"
-
-#ifdef RAID_DEBUG
-#define rf_printf(lvl, fmt, args...) \
- do { \
- if (lvl <= RAID_DEBUG) printf(fmt, ##args); \
- } while(0)
-
-#else /* DEBUG */
-#define rf_printf(lvl, fmt, args...) { }
-#endif /* DEBUG */
-#endif /* _KERNEL */
-
-/* The per-component label information that the user can set */
-typedef struct RF_ComponentInfo_s {
- int row; /* the row number of this component */
- int column; /* the column number of this component */
- int serial_number; /* a user-specified serial number for this
- RAID set */
-} RF_ComponentInfo_t;
-
-/* The per-component label information */
-typedef struct RF_ComponentLabel_s {
- int version; /* The version of this label. */
- int serial_number; /* a user-specified serial number for this
- RAID set */
- int mod_counter; /* modification counter. Changed (usually
- by incrementing) every time the label
- is changed */
- int row; /* the row number of this component */
- int column; /* the column number of this component */
- int num_rows; /* number of rows in this RAID set */
- int num_columns; /* number of columns in this RAID set */
- int clean; /* 1 when clean, 0 when dirty */
- int status; /* rf_ds_optimal, rf_ds_dist_spared, whatever. */
- /* stuff that will be in version 2 of the label */
- int sectPerSU; /* Sectors per Stripe Unit */
- int SUsPerPU; /* Stripe Units per Parity Units */
- int SUsPerRU; /* Stripe Units per Reconstruction Units */
- int parityConfig; /* '0' == RAID0, '1' == RAID1, etc. */
- int maxOutstanding; /* maxOutstanding disk requests */
- int blockSize; /* size of component block.
- (disklabel->d_secsize) */
- int numBlocks; /* number of blocks on this component. May
- be smaller than the partition size. */
- int partitionSize; /* number of blocks on this *partition*.
- Must exactly match the partition size
- from the disklabel. */
- int future_use[33]; /* Future expansion */
- int autoconfigure; /* automatically configure this RAID set.
- 0 == no, 1 == yes */
- int root_partition; /* Use this set as /
- 0 == no, 1 == yes*/
- int last_unit; /* last unit number (e.g. 0 for /dev/raid0)
- of this component. Used for autoconfigure
- only. */
- int config_order; /* 0 .. n. The order in which the component
- should be auto-configured. E.g. 0 is will
- done first, (and would become raid0).
- This may be in conflict with last_unit!!?! */
- /* Not currently used. */
- int future_use2[44]; /* More future expansion */
-} RF_ComponentLabel_t;
-
-typedef struct RF_SingleComponent_s {
- int row;
- int column;
- char component_name[50]; /* name of the component */
-} RF_SingleComponent_t;
-
-#ifdef _KERNEL
-
-struct raidcinfo {
- struct vnode *ci_vp; /* component device's vnode */
- dev_t ci_dev; /* component device's dev_t */
- RF_ComponentLabel_t ci_label; /* components RAIDframe label */
-#if 0
- size_t ci_size; /* size */
- char *ci_path; /* path to component */
- size_t ci_pathlen; /* length of component path */
-#endif
-};
-
-
-
-/* XXX probably belongs in a different .h file. */
-typedef struct RF_AutoConfig_s {
- char devname[56]; /* the name of this component */
- int flag; /* a general-purpose flag */
- dev_t dev; /* the device for this component */
- struct vnode *vp; /* Mr. Vnode Pointer */
- RF_ComponentLabel_t *clabel; /* the label */
- struct RF_AutoConfig_s *next; /* the next autoconfig structure
- in this set. */
-} RF_AutoConfig_t;
-
-typedef struct RF_ConfigSet_s {
- struct RF_AutoConfig_s *ac; /* all of the autoconfig structures for
- this config set. */
- int rootable; /* Set to 1 if this set can be root */
- struct RF_ConfigSet_s *next;
-} RF_ConfigSet_t;
-
-#endif /* _KERNEL */
-#endif /* _RF__RF_BSD_H_ */
diff --git a/sys/dev/raidframe/rf_callback.c b/sys/dev/raidframe/rf_callback.c
deleted file mode 100644
index 1739fc6..0000000
--- a/sys/dev/raidframe/rf_callback.c
+++ /dev/null
@@ -1,96 +0,0 @@
-/* $NetBSD: rf_callback.c,v 1.3 1999/02/05 00:06:06 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Jim Zelenka
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*****************************************************************************************
- *
- * callback.c -- code to manipulate callback descriptor
- *
- ****************************************************************************************/
-
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_threadstuff.h>
-#include <dev/raidframe/rf_callback.h>
-#include <dev/raidframe/rf_debugMem.h>
-#include <dev/raidframe/rf_freelist.h>
-#include <dev/raidframe/rf_shutdown.h>
-
-static RF_FreeList_t *rf_callback_freelist;
-
-#define RF_MAX_FREE_CALLBACK 64
-#define RF_CALLBACK_INC 4
-#define RF_CALLBACK_INITIAL 4
-
-static void rf_ShutdownCallback(void *);
-static void
-rf_ShutdownCallback(ignored)
- void *ignored;
-{
- RF_FREELIST_DESTROY(rf_callback_freelist, next, (RF_CallbackDesc_t *));
-}
-
-int
-rf_ConfigureCallback(listp)
- RF_ShutdownList_t **listp;
-{
- int rc;
-
- RF_FREELIST_CREATE(rf_callback_freelist, RF_MAX_FREE_CALLBACK,
- RF_CALLBACK_INC, sizeof(RF_CallbackDesc_t));
- if (rf_callback_freelist == NULL)
- return (ENOMEM);
- rc = rf_ShutdownCreate(listp, rf_ShutdownCallback, NULL);
- if (rc) {
- RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- rf_ShutdownCallback(NULL);
- return (rc);
- }
- RF_FREELIST_PRIME(rf_callback_freelist, RF_CALLBACK_INITIAL, next,
- (RF_CallbackDesc_t *));
- return (0);
-}
-
-RF_CallbackDesc_t *
-rf_AllocCallbackDesc()
-{
- RF_CallbackDesc_t *p;
-
- RF_FREELIST_GET(rf_callback_freelist, p, next, (RF_CallbackDesc_t *));
- return (p);
-}
-
-void
-rf_FreeCallbackDesc(p)
- RF_CallbackDesc_t *p;
-{
- RF_FREELIST_FREE(rf_callback_freelist, p, next);
-}
diff --git a/sys/dev/raidframe/rf_callback.h b/sys/dev/raidframe/rf_callback.h
deleted file mode 100644
index feda31d..0000000
--- a/sys/dev/raidframe/rf_callback.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_callback.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*****************************************************************************************
- *
- * callback.h -- header file for callback.c
- *
- * the reconstruction code must manage concurrent I/Os on multiple drives.
- * it sometimes needs to suspend operation on a particular drive until some
- * condition occurs. we can't block the thread, of course, or we wouldn't
- * be able to manage our other outstanding I/Os. Instead we just suspend
- * new activity on the indicated disk, and create a callback descriptor and
- * put it someplace where it will get invoked when the condition that's
- * stalling us has cleared. When the descriptor is invoked, it will call
- * a function that will restart operation on the indicated disk.
- *
- ****************************************************************************************/
-
-#ifndef _RF__RF_CALLBACK_H_
-#define _RF__RF_CALLBACK_H_
-
-#include <dev/raidframe/rf_types.h>
-
-struct RF_CallbackDesc_s {
- void (*callbackFunc) (RF_CBParam_t); /* function to call */
- RF_CBParam_t callbackArg; /* args to give to function, or just
- * info about this callback */
- RF_CBParam_t callbackArg2;
- RF_RowCol_t row; /* disk row and column IDs to give to the
- * callback func */
- RF_RowCol_t col;
- RF_CallbackDesc_t *next;/* next entry in list */
-};
-
-int rf_ConfigureCallback(RF_ShutdownList_t ** listp);
-RF_CallbackDesc_t *rf_AllocCallbackDesc(void);
-void rf_FreeCallbackDesc(RF_CallbackDesc_t * p);
-
-#endif /* !_RF__RF_CALLBACK_H_ */
diff --git a/sys/dev/raidframe/rf_chaindecluster.c b/sys/dev/raidframe/rf_chaindecluster.c
deleted file mode 100644
index 68951a0..0000000
--- a/sys/dev/raidframe/rf_chaindecluster.c
+++ /dev/null
@@ -1,292 +0,0 @@
-/* $NetBSD: rf_chaindecluster.c,v 1.6 2001/01/26 04:27:16 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Khalil Amiri
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/******************************************************************************
- *
- * rf_chaindecluster.c -- implements chained declustering
- *
- *****************************************************************************/
-
-#include <dev/raidframe/rf_archs.h>
-
-#if (RF_INCLUDE_CHAINDECLUSTER > 0)
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_chaindecluster.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_dagutils.h>
-#include <dev/raidframe/rf_dagffrd.h>
-#include <dev/raidframe/rf_dagffwr.h>
-#include <dev/raidframe/rf_dagdegrd.h>
-#include <dev/raidframe/rf_dagfuncs.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_utils.h>
-
-typedef struct RF_ChaindeclusterConfigInfo_s {
- RF_RowCol_t **stripeIdentifier; /* filled in at config time and used
- * by IdentifyStripe */
- RF_StripeCount_t numSparingRegions;
- RF_StripeCount_t stripeUnitsPerSparingRegion;
- RF_SectorNum_t mirrorStripeOffset;
-} RF_ChaindeclusterConfigInfo_t;
-
-int
-rf_ConfigureChainDecluster(
- RF_ShutdownList_t ** listp,
- RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr)
-{
- RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
- RF_StripeCount_t num_used_stripeUnitsPerDisk;
- RF_ChaindeclusterConfigInfo_t *info;
- RF_RowCol_t i;
-
- /* create a Chained Declustering configuration structure */
- RF_MallocAndAdd(info, sizeof(RF_ChaindeclusterConfigInfo_t), (RF_ChaindeclusterConfigInfo_t *), raidPtr->cleanupList);
- if (info == NULL)
- return (ENOMEM);
- layoutPtr->layoutSpecificInfo = (void *) info;
-
- /* fill in the config structure. */
- info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, 2, raidPtr->cleanupList);
- if (info->stripeIdentifier == NULL)
- return (ENOMEM);
- for (i = 0; i < raidPtr->numCol; i++) {
- info->stripeIdentifier[i][0] = i % raidPtr->numCol;
- info->stripeIdentifier[i][1] = (i + 1) % raidPtr->numCol;
- }
-
- RF_ASSERT(raidPtr->numRow == 1);
-
- /* fill in the remaining layout parameters */
- num_used_stripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk - (layoutPtr->stripeUnitsPerDisk %
- (2 * raidPtr->numCol - 2));
- info->numSparingRegions = num_used_stripeUnitsPerDisk / (2 * raidPtr->numCol - 2);
- info->stripeUnitsPerSparingRegion = raidPtr->numCol * (raidPtr->numCol - 1);
- info->mirrorStripeOffset = info->numSparingRegions * (raidPtr->numCol - 1);
- layoutPtr->numStripe = info->numSparingRegions * info->stripeUnitsPerSparingRegion;
- layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
- layoutPtr->numDataCol = 1;
- layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
- layoutPtr->numParityCol = 1;
-
- layoutPtr->dataStripeUnitsPerDisk = num_used_stripeUnitsPerDisk;
-
- raidPtr->sectorsPerDisk =
- num_used_stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
-
- raidPtr->totalSectors =
- (layoutPtr->numStripe) * layoutPtr->sectorsPerStripeUnit;
-
- layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit;
-
- return (0);
-}
-
-RF_ReconUnitCount_t
-rf_GetNumSpareRUsChainDecluster(raidPtr)
- RF_Raid_t *raidPtr;
-{
- RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
-
- /*
- * The layout uses two stripe units per disk as spare within each
- * sparing region.
- */
- return (2 * info->numSparingRegions);
-}
-
-
-/* Maps to the primary copy of the data, i.e. the first mirror pair */
-void
-rf_MapSectorChainDecluster(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t raidSector,
- RF_RowCol_t * row,
- RF_RowCol_t * col,
- RF_SectorNum_t * diskSector,
- int remap)
-{
- RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
- RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
- RF_SectorNum_t index_within_region, index_within_disk;
- RF_StripeNum_t sparing_region_id;
- int col_before_remap;
-
- *row = 0;
- sparing_region_id = SUID / info->stripeUnitsPerSparingRegion;
- index_within_region = SUID % info->stripeUnitsPerSparingRegion;
- index_within_disk = index_within_region / raidPtr->numCol;
- col_before_remap = SUID % raidPtr->numCol;
-
- if (!remap) {
- *col = col_before_remap;
- *diskSector = (index_within_disk + ((raidPtr->numCol - 1) * sparing_region_id)) *
- raidPtr->Layout.sectorsPerStripeUnit;
- *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
- } else {
- /* remap sector to spare space... */
- *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit;
- *diskSector += (raidPtr->numCol - 1) * raidPtr->Layout.sectorsPerStripeUnit;
- *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
- index_within_disk = index_within_region / raidPtr->numCol;
- if (index_within_disk < col_before_remap)
- *col = index_within_disk;
- else
- if (index_within_disk == raidPtr->numCol - 2) {
- *col = (col_before_remap + raidPtr->numCol - 1) % raidPtr->numCol;
- *diskSector += raidPtr->Layout.sectorsPerStripeUnit;
- } else
- *col = (index_within_disk + 2) % raidPtr->numCol;
- }
-
-}
-
-
-
-/* Maps to the second copy of the mirror pair, which is chain declustered. The second copy is contained
- in the next disk (mod numCol) after the disk containing the primary copy.
- The offset into the disk is one-half disk down */
-void
-rf_MapParityChainDecluster(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t raidSector,
- RF_RowCol_t * row,
- RF_RowCol_t * col,
- RF_SectorNum_t * diskSector,
- int remap)
-{
- RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
- RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
- RF_SectorNum_t index_within_region, index_within_disk;
- RF_StripeNum_t sparing_region_id;
- int col_before_remap;
-
- *row = 0;
- if (!remap) {
- *col = SUID % raidPtr->numCol;
- *col = (*col + 1) % raidPtr->numCol;
- *diskSector = info->mirrorStripeOffset * raidPtr->Layout.sectorsPerStripeUnit;
- *diskSector += (SUID / raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit;
- *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
- } else {
- /* remap parity to spare space ... */
- sparing_region_id = SUID / info->stripeUnitsPerSparingRegion;
- index_within_region = SUID % info->stripeUnitsPerSparingRegion;
- index_within_disk = index_within_region / raidPtr->numCol;
- *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit;
- *diskSector += (raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit;
- *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
- col_before_remap = SUID % raidPtr->numCol;
- if (index_within_disk < col_before_remap)
- *col = index_within_disk;
- else
- if (index_within_disk == raidPtr->numCol - 2) {
- *col = (col_before_remap + 2) % raidPtr->numCol;
- *diskSector -= raidPtr->Layout.sectorsPerStripeUnit;
- } else
- *col = (index_within_disk + 2) % raidPtr->numCol;
- }
-
-}
-
-void
-rf_IdentifyStripeChainDecluster(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t addr,
- RF_RowCol_t ** diskids,
- RF_RowCol_t * outRow)
-{
- RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
- RF_StripeNum_t SUID;
- RF_RowCol_t col;
-
- SUID = addr / raidPtr->Layout.sectorsPerStripeUnit;
- col = SUID % raidPtr->numCol;
- *outRow = 0;
- *diskids = info->stripeIdentifier[col];
-}
-
-void
-rf_MapSIDToPSIDChainDecluster(
- RF_RaidLayout_t * layoutPtr,
- RF_StripeNum_t stripeID,
- RF_StripeNum_t * psID,
- RF_ReconUnitNum_t * which_ru)
-{
- *which_ru = 0;
- *psID = stripeID;
-}
-/******************************************************************************
- * select a graph to perform a single-stripe access
- *
- * Parameters: raidPtr - description of the physical array
- * type - type of operation (read or write) requested
- * asmap - logical & physical addresses for this access
- * createFunc - function to use to create the graph (return value)
- *****************************************************************************/
-
-void
-rf_RAIDCDagSelect(
- RF_Raid_t * raidPtr,
- RF_IoType_t type,
- RF_AccessStripeMap_t * asmap,
- RF_VoidFuncPtr * createFunc)
-#if 0
- void (**createFunc) (RF_Raid_t *, RF_AccessStripeMap_t *,
- RF_DagHeader_t *, void *, RF_RaidAccessFlags_t,
- RF_AllocListElem_t *)
-#endif
-{
- RF_ASSERT(RF_IO_IS_R_OR_W(type));
- RF_ASSERT(raidPtr->numRow == 1);
-
- if (asmap->numDataFailed + asmap->numParityFailed > 1) {
- RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n");
- *createFunc = NULL;
- return;
- }
- *createFunc = (type == RF_IO_TYPE_READ) ? (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG : (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG;
-
- if (type == RF_IO_TYPE_READ) {
- if ((raidPtr->status[0] == rf_rs_degraded) || (raidPtr->status[0] == rf_rs_reconstructing))
- *createFunc = (RF_VoidFuncPtr) rf_CreateRaidCDegradedReadDAG; /* array status is
- * degraded, implement
- * workload shifting */
- else
- *createFunc = (RF_VoidFuncPtr) rf_CreateMirrorPartitionReadDAG; /* array status not
- * degraded, so use
- * mirror partition dag */
- } else
- *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG;
-}
-#endif /* (RF_INCLUDE_CHAINDECLUSTER > 0) */
diff --git a/sys/dev/raidframe/rf_chaindecluster.h b/sys/dev/raidframe/rf_chaindecluster.h
deleted file mode 100644
index 6030289..0000000
--- a/sys/dev/raidframe/rf_chaindecluster.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_chaindecluster.h,v 1.4 2001/01/26 04:14:14 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Khalil Amiri
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/* rf_chaindecluster.h
- * header file for Chained Declustering
- */
-
-
-#ifndef _RF__RF_CHAINDECLUSTER_H_
-#define _RF__RF_CHAINDECLUSTER_H_
-
-int
-rf_ConfigureChainDecluster(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr);
-RF_ReconUnitCount_t rf_GetNumSpareRUsChainDecluster(RF_Raid_t * raidPtr);
-void
-rf_MapSectorChainDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
- RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap);
-void
-rf_MapParityChainDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
- RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap);
-void
-rf_IdentifyStripeChainDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t addr,
- RF_RowCol_t ** diskids, RF_RowCol_t * outRow);
-void
-rf_MapSIDToPSIDChainDecluster(RF_RaidLayout_t * layoutPtr,
- RF_StripeNum_t stripeID, RF_StripeNum_t * psID,
- RF_ReconUnitNum_t * which_ru);
-void
-rf_RAIDCDagSelect(RF_Raid_t * raidPtr, RF_IoType_t type,
- RF_AccessStripeMap_t * asmap,
- RF_VoidFuncPtr *);
-#if 0
-void (**createFunc) (RF_Raid_t *,
- RF_AccessStripeMap_t *,
- RF_DagHeader_t *,
- void *,
- RF_RaidAccessFlags_t,
- RF_AllocListElem_t *);
-#endif
-
-#endif /* !_RF__RF_CHAINDECLUSTER_H_ */
diff --git a/sys/dev/raidframe/rf_configure.h b/sys/dev/raidframe/rf_configure.h
deleted file mode 100644
index c51b8a3..0000000
--- a/sys/dev/raidframe/rf_configure.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_configure.h,v 1.4 1999/03/02 03:18:49 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/********************************
- *
- * rf_configure.h
- *
- * header file for raidframe configuration in the kernel version only.
- * configuration is invoked via ioctl rather than at boot time
- *
- *******************************/
-
-
-#ifndef _RF__RF_CONFIGURE_H_
-#define _RF__RF_CONFIGURE_H_
-
-#include <dev/raidframe/rf_archs.h>
-#include <dev/raidframe/rf_types.h>
-
-#include <sys/param.h>
-#include <sys/proc.h>
-
-#if defined(__NetBSD__)
-#include <sys/ioctl.h>
-#elif defined(__FreeBSD__)
-#include <sys/ioccom.h>
-#include <sys/filio.h>
-#endif
-
-/* the raidframe configuration, passed down through an ioctl.
- * the driver can be reconfigured (with total loss of data) at any time,
- * but it must be shut down first.
- */
-struct RF_Config_s {
- RF_RowCol_t numRow, numCol, numSpare; /* number of rows, columns,
- * and spare disks */
- dev_t devs[RF_MAXROW][RF_MAXCOL]; /* device numbers for disks
- * comprising array */
- char devnames[RF_MAXROW][RF_MAXCOL][50]; /* device names */
- dev_t spare_devs[RF_MAXSPARE]; /* device numbers for spare
- * disks */
- char spare_names[RF_MAXSPARE][50]; /* device names */
- RF_SectorNum_t sectPerSU; /* sectors per stripe unit */
- RF_StripeNum_t SUsPerPU;/* stripe units per parity unit */
- RF_StripeNum_t SUsPerRU;/* stripe units per reconstruction unit */
- RF_ParityConfig_t parityConfig; /* identifies the RAID architecture to
- * be used */
- RF_DiskQueueType_t diskQueueType; /* 'f' = fifo, 'c' = cvscan,
- * not used in kernel */
- char maxOutstandingDiskReqs; /* # concurrent reqs to be sent to a
- * disk. not used in kernel. */
- char debugVars[RF_MAXDBGV][50]; /* space for specifying debug
- * variables & their values */
- unsigned int layoutSpecificSize; /* size in bytes of
- * layout-specific info */
- void *layoutSpecific; /* a pointer to a layout-specific structure to
- * be copied in */
- int force; /* if !0, ignore many fatal
- configuration conditions */
- /*
- "force" is used to override cases where the component labels would
- indicate that configuration should not proceed without user
- intervention
- */
-};
-#ifndef _KERNEL
-int rf_MakeConfig(char *configname, RF_Config_t * cfgPtr);
-int rf_MakeLayoutSpecificNULL(FILE * fp, RF_Config_t * cfgPtr, void *arg);
-int rf_MakeLayoutSpecificDeclustered(FILE * configfp, RF_Config_t * cfgPtr, void *arg);
-void *rf_ReadSpareTable(RF_SparetWait_t * req, char *fname);
-#endif /* !_KERNEL */
-
-#endif /* !_RF__RF_CONFIGURE_H_ */
diff --git a/sys/dev/raidframe/rf_copyback.c b/sys/dev/raidframe/rf_copyback.c
deleted file mode 100644
index eb16404..0000000
--- a/sys/dev/raidframe/rf_copyback.c
+++ /dev/null
@@ -1,433 +0,0 @@
-/* $NetBSD: rf_copyback.c,v 1.15 2001/01/26 02:16:24 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*****************************************************************************************
- *
- * copyback.c -- code to copy reconstructed data back from spare space to
- * the replaced disk.
- *
- * the code operates using callbacks on the I/Os to continue with the next
- * unit to be copied back. We do this because a simple loop containing blocking I/Os
- * will not work in the simulator.
- *
- ****************************************************************************************/
-
-#include <dev/raidframe/rf_types.h>
-
-#if defined(__FreeBSD__)
-#include <sys/types.h>
-#include <sys/systm.h>
-#if __FreeBSD_version > 500005
-#include <sys/bio.h>
-#endif
-#endif
-
-#include <sys/time.h>
-#include <sys/buf.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_mcpair.h>
-#include <dev/raidframe/rf_acctrace.h>
-#include <dev/raidframe/rf_etimer.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_utils.h>
-#include <dev/raidframe/rf_copyback.h>
-#include <dev/raidframe/rf_decluster.h>
-#include <dev/raidframe/rf_driver.h>
-#include <dev/raidframe/rf_shutdown.h>
-#include <dev/raidframe/rf_kintf.h>
-
-#define RF_COPYBACK_DATA 0
-#define RF_COPYBACK_PARITY 1
-
-int rf_copyback_in_progress;
-
-static int rf_CopybackReadDoneProc(RF_CopybackDesc_t * desc, int status);
-static int rf_CopybackWriteDoneProc(RF_CopybackDesc_t * desc, int status);
-static void rf_CopybackOne(RF_CopybackDesc_t * desc, int typ,
- RF_RaidAddr_t addr, RF_RowCol_t testRow,
- RF_RowCol_t testCol,
- RF_SectorNum_t testOffs);
-static void rf_CopybackComplete(RF_CopybackDesc_t * desc, int status);
-
-int
-rf_ConfigureCopyback(listp)
- RF_ShutdownList_t **listp;
-{
- rf_copyback_in_progress = 0;
- return (0);
-}
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/proc.h>
-#if defined(__NetBSD__)
-#include <sys/ioctl.h>
-#elif defined(__FreeBSD__)
-#include <sys/ioccom.h>
-#include <sys/filio.h>
-#endif
-#include <sys/fcntl.h>
-#include <sys/vnode.h>
-
-/* do a complete copyback */
-void
-rf_CopybackReconstructedData(raidPtr)
- RF_Raid_t *raidPtr;
-{
- RF_ComponentLabel_t *c_label;
- int done, retcode;
- RF_CopybackDesc_t *desc;
- RF_RowCol_t frow, fcol;
- RF_RaidDisk_t *badDisk;
- struct vnode *vp;
- char *databuf;
- int ac;
-
- RF_Malloc(c_label, sizeof(RF_ComponentLabel_t), (RF_ComponentLabel_t *));
- if (c_label == NULL) {
- printf("rf_CopybackReconstructedData: Out of memory?\n");
- return;
- }
-
- done = 0;
- fcol = 0;
- for (frow = 0; frow < raidPtr->numRow; frow++) {
- for (fcol = 0; fcol < raidPtr->numCol; fcol++) {
- if (raidPtr->Disks[frow][fcol].status == rf_ds_dist_spared
- || raidPtr->Disks[frow][fcol].status == rf_ds_spared) {
- done = 1;
- break;
- }
- }
- if (done)
- break;
- }
-
- if (frow == raidPtr->numRow) {
- printf("COPYBACK: no disks need copyback\n");
- return;
- }
- badDisk = &raidPtr->Disks[frow][fcol];
-
- /* This device may have been opened successfully the first time. Close
- * it before trying to open it again.. */
-
- if (raidPtr->raid_cinfo[frow][fcol].ci_vp != NULL) {
- printf("Closed the open device: %s\n",
- raidPtr->Disks[frow][fcol].devname);
- vp = raidPtr->raid_cinfo[frow][fcol].ci_vp;
- ac = raidPtr->Disks[frow][fcol].auto_configured;
- rf_close_component(raidPtr, vp, ac);
- raidPtr->raid_cinfo[frow][fcol].ci_vp = NULL;
-
- }
- /* note that this disk was *not* auto_configured (any longer) */
- raidPtr->Disks[frow][fcol].auto_configured = 0;
-
- printf("About to (re-)open the device: %s\n",
- raidPtr->Disks[frow][fcol].devname);
-
- retcode = raid_getcomponentsize(raidPtr, frow, fcol);
-
- if (retcode) {
- printf("COPYBACK: raidlookup on device: %s failed: %d!\n",
- raidPtr->Disks[frow][fcol].devname, retcode);
-
- /* XXX the component isn't responding properly... must be
- * still dead :-( */
- return;
-
- }
-#if 0
- /* This is the way it was done before the CAM stuff was removed */
-
- if (rf_extract_ids(badDisk->devname, &bus, &targ, &lun)) {
- printf("COPYBACK: unable to extract bus, target, lun from devname %s\n",
- badDisk->devname);
- return;
- }
- /* TUR the disk that's marked as bad to be sure that it's actually
- * alive */
- rf_SCSI_AllocTUR(&tur_op);
- retcode = rf_SCSI_DoTUR(tur_op, bus, targ, lun, badDisk->dev);
- rf_SCSI_FreeDiskOp(tur_op, 0);
-#endif
-
- if (retcode) {
- printf("COPYBACK: target disk failed TUR\n");
- return;
- }
- /* get a buffer to hold one SU */
- RF_Malloc(databuf, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (char *));
-
- /* create a descriptor */
- RF_Malloc(desc, sizeof(*desc), (RF_CopybackDesc_t *));
- desc->raidPtr = raidPtr;
- desc->status = 0;
- desc->frow = frow;
- desc->fcol = fcol;
- desc->spRow = badDisk->spareRow;
- desc->spCol = badDisk->spareCol;
- desc->stripeAddr = 0;
- desc->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
- desc->sectPerStripe = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.numDataCol;
- desc->databuf = databuf;
- desc->mcpair = rf_AllocMCPair();
-
- printf("COPYBACK: Quiescing the array\n");
- /* quiesce the array, since we don't want to code support for user
- * accs here */
- rf_SuspendNewRequestsAndWait(raidPtr);
-
- /* adjust state of the array and of the disks */
- RF_LOCK_MUTEX(raidPtr->mutex);
- raidPtr->Disks[desc->frow][desc->fcol].status = rf_ds_optimal;
- raidPtr->status[desc->frow] = rf_rs_optimal;
- rf_copyback_in_progress = 1; /* debug only */
- RF_UNLOCK_MUTEX(raidPtr->mutex);
-
- printf("COPYBACK: Beginning\n");
- RF_GETTIME(desc->starttime);
- rf_ContinueCopyback(desc);
-
- /* Data has been restored. Fix up the component label. */
- /* Don't actually need the read here.. */
- raidread_component_label( raidPtr->raid_cinfo[frow][fcol].ci_dev,
- raidPtr->raid_cinfo[frow][fcol].ci_vp,
- c_label);
-
- raid_init_component_label( raidPtr, c_label );
-
- c_label->row = frow;
- c_label->column = fcol;
- c_label->partitionSize = raidPtr->Disks[frow][fcol].partitionSize;
-
- raidwrite_component_label( raidPtr->raid_cinfo[frow][fcol].ci_dev,
- raidPtr->raid_cinfo[frow][fcol].ci_vp,
- c_label);
- RF_Free(c_label, sizeof(RF_ComponentLabel_t));
-}
-
-
-/*
- * invoked via callback after a copyback I/O has completed to
- * continue on with the next one
- */
-void
-rf_ContinueCopyback(desc)
- RF_CopybackDesc_t *desc;
-{
- RF_SectorNum_t testOffs, stripeAddr;
- RF_Raid_t *raidPtr = desc->raidPtr;
- RF_RaidAddr_t addr;
- RF_RowCol_t testRow, testCol;
- int old_pctg, new_pctg, done;
- struct timeval t, diff;
-
- old_pctg = (-1);
- while (1) {
- stripeAddr = desc->stripeAddr;
- desc->raidPtr->copyback_stripes_done = stripeAddr
- / desc->sectPerStripe;
- if (rf_prReconSched) {
- old_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors;
- }
- desc->stripeAddr += desc->sectPerStripe;
- if (rf_prReconSched) {
- new_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors;
- if (new_pctg != old_pctg) {
- RF_GETTIME(t);
- RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff);
- printf("%d %d.%06d\n", new_pctg, (int) diff.tv_sec, (int) diff.tv_usec);
- }
- }
- if (stripeAddr >= raidPtr->totalSectors) {
- rf_CopybackComplete(desc, 0);
- return;
- }
- /* walk through the current stripe, su-by-su */
- for (done = 0, addr = stripeAddr; addr < stripeAddr + desc->sectPerStripe; addr += desc->sectPerSU) {
-
- /* map the SU, disallowing remap to spare space */
- (raidPtr->Layout.map->MapSector) (raidPtr, addr, &testRow, &testCol, &testOffs, RF_DONT_REMAP);
-
- if (testRow == desc->frow && testCol == desc->fcol) {
- rf_CopybackOne(desc, RF_COPYBACK_DATA, addr, testRow, testCol, testOffs);
- done = 1;
- break;
- }
- }
-
- if (!done) {
- /* we didn't find the failed disk in the data part.
- * check parity. */
-
- /* map the parity for this stripe, disallowing remap
- * to spare space */
- (raidPtr->Layout.map->MapParity) (raidPtr, stripeAddr, &testRow, &testCol, &testOffs, RF_DONT_REMAP);
-
- if (testRow == desc->frow && testCol == desc->fcol) {
- rf_CopybackOne(desc, RF_COPYBACK_PARITY, stripeAddr, testRow, testCol, testOffs);
- }
- }
- /* check to see if the last read/write pair failed */
- if (desc->status) {
- rf_CopybackComplete(desc, 1);
- return;
- }
- /* we didn't find any units to copy back in this stripe.
- * Continue with the next one */
- }
-}
-
-
-/* copyback one unit */
-static void
-rf_CopybackOne(desc, typ, addr, testRow, testCol, testOffs)
- RF_CopybackDesc_t *desc;
- int typ;
- RF_RaidAddr_t addr;
- RF_RowCol_t testRow;
- RF_RowCol_t testCol;
- RF_SectorNum_t testOffs;
-{
- RF_SectorCount_t sectPerSU = desc->sectPerSU;
- RF_Raid_t *raidPtr = desc->raidPtr;
- RF_RowCol_t spRow = desc->spRow;
- RF_RowCol_t spCol = desc->spCol;
- RF_SectorNum_t spOffs;
-
- /* find the spare spare location for this SU */
- if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
- if (typ == RF_COPYBACK_DATA)
- raidPtr->Layout.map->MapSector(raidPtr, addr, &spRow, &spCol, &spOffs, RF_REMAP);
- else
- raidPtr->Layout.map->MapParity(raidPtr, addr, &spRow, &spCol, &spOffs, RF_REMAP);
- } else {
- spOffs = testOffs;
- }
-
- /* create reqs to read the old location & write the new */
- desc->readreq = rf_CreateDiskQueueData(RF_IO_TYPE_READ, spOffs,
- sectPerSU, desc->databuf, 0L, 0,
- (int (*) (void *, int)) rf_CopybackReadDoneProc, desc,
- NULL, NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL);
- desc->writereq = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, testOffs,
- sectPerSU, desc->databuf, 0L, 0,
- (int (*) (void *, int)) rf_CopybackWriteDoneProc, desc,
- NULL, NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL);
- desc->frow = testRow;
- desc->fcol = testCol;
-
- /* enqueue the read. the write will go out as part of the callback on
- * the read. at user-level & in the kernel, wait for the read-write
- * pair to complete. in the simulator, just return, since everything
- * will happen as callbacks */
-
- RF_LOCK_MUTEX(desc->mcpair->mutex);
- desc->mcpair->flag = 0;
-
- rf_DiskIOEnqueue(&raidPtr->Queues[spRow][spCol], desc->readreq, RF_IO_NORMAL_PRIORITY);
-
- while (!desc->mcpair->flag) {
- RF_WAIT_MCPAIR(desc->mcpair);
- }
- RF_UNLOCK_MUTEX(desc->mcpair->mutex);
- rf_FreeDiskQueueData(desc->readreq);
- rf_FreeDiskQueueData(desc->writereq);
-
-}
-
-
-/* called at interrupt context when the read has completed. just send out the write */
-static int
-rf_CopybackReadDoneProc(desc, status)
- RF_CopybackDesc_t *desc;
- int status;
-{
- if (status) { /* invoke the callback with bad status */
- printf("COPYBACK: copyback read failed. Aborting.\n");
- (desc->writereq->CompleteFunc) (desc, -100);
- } else {
- rf_DiskIOEnqueue(&(desc->raidPtr->Queues[desc->frow][desc->fcol]), desc->writereq, RF_IO_NORMAL_PRIORITY);
- }
- return (0);
-}
-/* called at interrupt context when the write has completed.
- * at user level & in the kernel, wake up the copyback thread.
- * in the simulator, invoke the next copyback directly.
- * can't free diskqueuedata structs in the kernel b/c we're at interrupt context.
- */
-static int
-rf_CopybackWriteDoneProc(desc, status)
- RF_CopybackDesc_t *desc;
- int status;
-{
- if (status && status != -100) {
- printf("COPYBACK: copyback write failed. Aborting.\n");
- }
- desc->status = status;
- rf_MCPairWakeupFunc(desc->mcpair);
- return (0);
-}
-/* invoked when the copyback has completed */
-static void
-rf_CopybackComplete(desc, status)
- RF_CopybackDesc_t *desc;
- int status;
-{
- RF_Raid_t *raidPtr = desc->raidPtr;
- struct timeval t, diff;
-
- if (!status) {
- RF_LOCK_MUTEX(raidPtr->mutex);
- if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
- RF_ASSERT(raidPtr->Layout.map->parityConfig == 'D');
- rf_FreeSpareTable(raidPtr);
- } else {
- raidPtr->Disks[desc->spRow][desc->spCol].status = rf_ds_spare;
- }
- RF_UNLOCK_MUTEX(raidPtr->mutex);
-
- RF_GETTIME(t);
- RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff);
- printf("Copyback time was %d.%06d seconds\n",
- (int) diff.tv_sec, (int) diff.tv_usec);
- } else
- printf("COPYBACK: Failure.\n");
-
- RF_Free(desc->databuf, rf_RaidAddressToByte(raidPtr, desc->sectPerSU));
- rf_FreeMCPair(desc->mcpair);
- RF_Free(desc, sizeof(*desc));
-
- rf_copyback_in_progress = 0;
- rf_ResumeNewRequests(raidPtr);
-}
diff --git a/sys/dev/raidframe/rf_copyback.h b/sys/dev/raidframe/rf_copyback.h
deleted file mode 100644
index 67da842..0000000
--- a/sys/dev/raidframe/rf_copyback.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_copyback.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */
-/*
- * rf_copyback.h
- */
-/*
- * Copyright (c) 1996 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Jim Zelenka
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#ifndef _RF__RF_COPYBACK_H_
-#define _RF__RF_COPYBACK_H_
-
-#include <dev/raidframe/rf_types.h>
-
-typedef struct RF_CopybackDesc_s {
- RF_Raid_t *raidPtr;
- RF_RowCol_t frow;
- RF_RowCol_t fcol;
- RF_RowCol_t spRow;
- RF_RowCol_t spCol;
- int status;
- RF_StripeNum_t stripeAddr;
- RF_SectorCount_t sectPerSU;
- RF_SectorCount_t sectPerStripe;
- char *databuf;
- RF_DiskQueueData_t *readreq;
- RF_DiskQueueData_t *writereq;
- struct timeval starttime;
- RF_MCPair_t *mcpair;
-} RF_CopybackDesc_t;
-
-extern int rf_copyback_in_progress;
-
-int rf_ConfigureCopyback(RF_ShutdownList_t ** listp);
-void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
-void rf_ContinueCopyback(RF_CopybackDesc_t * desc);
-
-#endif /* !_RF__RF_COPYBACK_H_ */
diff --git a/sys/dev/raidframe/rf_cvscan.c b/sys/dev/raidframe/rf_cvscan.c
deleted file mode 100644
index b7c1026..0000000
--- a/sys/dev/raidframe/rf_cvscan.c
+++ /dev/null
@@ -1,441 +0,0 @@
-/* $NetBSD: rf_cvscan.c,v 1.5 1999/08/13 03:41:53 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*******************************************************************************
- *
- * cvscan.c -- prioritized cvscan disk queueing code.
- *
- * Nov 9, 1994, adapted from raidSim version (MCH)
- *
- ******************************************************************************/
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_alloclist.h>
-#include <dev/raidframe/rf_stripelocks.h>
-#include <dev/raidframe/rf_layout.h>
-#include <dev/raidframe/rf_diskqueue.h>
-#include <dev/raidframe/rf_cvscan.h>
-#include <dev/raidframe/rf_debugMem.h>
-#include <dev/raidframe/rf_general.h>
-
-#define DO_CHECK_STATE(_hdr_) CheckCvscanState((_hdr_), __FILE__, __LINE__)
-
-#define pri_ok(p) ( ((p) == RF_IO_NORMAL_PRIORITY) || ((p) == RF_IO_LOW_PRIORITY))
-
-static void
-CheckCvscanState(RF_CvscanHeader_t * hdr, char *file, int line)
-{
- long i, key;
- RF_DiskQueueData_t *tmp;
-
- if (hdr->left != (RF_DiskQueueData_t *) NULL)
- RF_ASSERT(hdr->left->sectorOffset < hdr->cur_block);
- for (key = hdr->cur_block, i = 0, tmp = hdr->left;
- tmp != (RF_DiskQueueData_t *) NULL;
- key = tmp->sectorOffset, i++, tmp = tmp->next)
- RF_ASSERT(tmp->sectorOffset <= key
- && tmp->priority == hdr->nxt_priority && pri_ok(tmp->priority));
- RF_ASSERT(i == hdr->left_cnt);
-
- for (key = hdr->cur_block, i = 0, tmp = hdr->right;
- tmp != (RF_DiskQueueData_t *) NULL;
- key = tmp->sectorOffset, i++, tmp = tmp->next) {
- RF_ASSERT(key <= tmp->sectorOffset);
- RF_ASSERT(tmp->priority == hdr->nxt_priority);
- RF_ASSERT(pri_ok(tmp->priority));
- }
- RF_ASSERT(i == hdr->right_cnt);
-
- for (key = hdr->nxt_priority - 1, tmp = hdr->burner;
- tmp != (RF_DiskQueueData_t *) NULL;
- key = tmp->priority, tmp = tmp->next) {
- RF_ASSERT(tmp);
- RF_ASSERT(hdr);
- RF_ASSERT(pri_ok(tmp->priority));
- RF_ASSERT(key >= tmp->priority);
- RF_ASSERT(tmp->priority < hdr->nxt_priority);
- }
-}
-
-
-
-static void
-PriorityInsert(RF_DiskQueueData_t ** list_ptr, RF_DiskQueueData_t * req)
-{
- /* * insert block pointed to by req in to list whose first * entry is
- * pointed to by the pointer that list_ptr points to * ie., list_ptr
- * is a grandparent of the first entry */
-
- for (; (*list_ptr) != (RF_DiskQueueData_t *) NULL &&
- (*list_ptr)->priority > req->priority;
- list_ptr = &((*list_ptr)->next)) {
- }
- req->next = (*list_ptr);
- (*list_ptr) = req;
-}
-
-
-
-static void
-ReqInsert(RF_DiskQueueData_t ** list_ptr, RF_DiskQueueData_t * req, RF_CvscanArmDir_t order)
-{
- /* * insert block pointed to by req in to list whose first * entry is
- * pointed to by the pointer that list_ptr points to * ie., list_ptr
- * is a grandparent of the first entry */
-
- for (; (*list_ptr) != (RF_DiskQueueData_t *) NULL &&
-
- ((order == rf_cvscan_RIGHT && (*list_ptr)->sectorOffset <= req->sectorOffset)
- || (order == rf_cvscan_LEFT && (*list_ptr)->sectorOffset > req->sectorOffset));
- list_ptr = &((*list_ptr)->next)) {
- }
- req->next = (*list_ptr);
- (*list_ptr) = req;
-}
-
-
-
-static RF_DiskQueueData_t *
-ReqDequeue(RF_DiskQueueData_t ** list_ptr)
-{
- RF_DiskQueueData_t *ret = (*list_ptr);
- if ((*list_ptr) != (RF_DiskQueueData_t *) NULL) {
- (*list_ptr) = (*list_ptr)->next;
- }
- return (ret);
-}
-
-
-
-static void
-ReBalance(RF_CvscanHeader_t * hdr)
-{
- /* DO_CHECK_STATE(hdr); */
- while (hdr->right != (RF_DiskQueueData_t *) NULL
- && hdr->right->sectorOffset < hdr->cur_block) {
- hdr->right_cnt--;
- hdr->left_cnt++;
- ReqInsert(&hdr->left, ReqDequeue(&hdr->right), rf_cvscan_LEFT);
- }
- /* DO_CHECK_STATE(hdr); */
-}
-
-
-
-static void
-Transfer(RF_DiskQueueData_t ** to_list_ptr, RF_DiskQueueData_t ** from_list_ptr)
-{
- RF_DiskQueueData_t *gp;
- for (gp = (*from_list_ptr); gp != (RF_DiskQueueData_t *) NULL;) {
- RF_DiskQueueData_t *p = gp->next;
- PriorityInsert(to_list_ptr, gp);
- gp = p;
- }
- (*from_list_ptr) = (RF_DiskQueueData_t *) NULL;
-}
-
-
-
-static void
-RealEnqueue(RF_CvscanHeader_t * hdr, RF_DiskQueueData_t * req)
-{
- RF_ASSERT(req->priority == RF_IO_NORMAL_PRIORITY || req->priority == RF_IO_LOW_PRIORITY);
-
- DO_CHECK_STATE(hdr);
- if (hdr->left_cnt == 0 && hdr->right_cnt == 0) {
- hdr->nxt_priority = req->priority;
- }
- if (req->priority > hdr->nxt_priority) {
- /*
- ** dump all other outstanding requests on the back burner
- */
- Transfer(&hdr->burner, &hdr->left);
- Transfer(&hdr->burner, &hdr->right);
- hdr->left_cnt = 0;
- hdr->right_cnt = 0;
- hdr->nxt_priority = req->priority;
- }
- if (req->priority < hdr->nxt_priority) {
- /*
- ** yet another low priority task!
- */
- PriorityInsert(&hdr->burner, req);
- } else {
- if (req->sectorOffset < hdr->cur_block) {
- /* this request is to the left of the current arms */
- ReqInsert(&hdr->left, req, rf_cvscan_LEFT);
- hdr->left_cnt++;
- } else {
- /* this request is to the right of the current arms */
- ReqInsert(&hdr->right, req, rf_cvscan_RIGHT);
- hdr->right_cnt++;
- }
- }
- DO_CHECK_STATE(hdr);
-}
-
-
-
-void
-rf_CvscanEnqueue(void *q_in, RF_DiskQueueData_t * elem, int priority)
-{
- RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in;
- RealEnqueue(hdr, elem /* req */ );
-}
-
-
-
-RF_DiskQueueData_t *
-rf_CvscanDequeue(void *q_in)
-{
- RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in;
- long range, i, sum_dist_left, sum_dist_right;
- RF_DiskQueueData_t *ret;
- RF_DiskQueueData_t *tmp;
-
- DO_CHECK_STATE(hdr);
-
- if (hdr->left_cnt == 0 && hdr->right_cnt == 0)
- return ((RF_DiskQueueData_t *) NULL);
-
- range = RF_MIN(hdr->range_for_avg, RF_MIN(hdr->left_cnt, hdr->right_cnt));
- for (i = 0, tmp = hdr->left, sum_dist_left =
- ((hdr->direction == rf_cvscan_RIGHT) ? range * hdr->change_penalty : 0);
- tmp != (RF_DiskQueueData_t *) NULL && i < range;
- tmp = tmp->next, i++) {
- sum_dist_left += hdr->cur_block - tmp->sectorOffset;
- }
- for (i = 0, tmp = hdr->right, sum_dist_right =
- ((hdr->direction == rf_cvscan_LEFT) ? range * hdr->change_penalty : 0);
- tmp != (RF_DiskQueueData_t *) NULL && i < range;
- tmp = tmp->next, i++) {
- sum_dist_right += tmp->sectorOffset - hdr->cur_block;
- }
-
- if (hdr->right_cnt == 0 || sum_dist_left < sum_dist_right) {
- hdr->direction = rf_cvscan_LEFT;
- hdr->cur_block = hdr->left->sectorOffset + hdr->left->numSector;
- hdr->left_cnt = RF_MAX(hdr->left_cnt - 1, 0);
- tmp = hdr->left;
- ret = (ReqDequeue(&hdr->left)) /*->parent*/ ;
- } else {
- hdr->direction = rf_cvscan_RIGHT;
- hdr->cur_block = hdr->right->sectorOffset + hdr->right->numSector;
- hdr->right_cnt = RF_MAX(hdr->right_cnt - 1, 0);
- tmp = hdr->right;
- ret = (ReqDequeue(&hdr->right)) /*->parent*/ ;
- }
- ReBalance(hdr);
-
- if (hdr->left_cnt == 0 && hdr->right_cnt == 0
- && hdr->burner != (RF_DiskQueueData_t *) NULL) {
- /*
- ** restore low priority requests for next dequeue
- */
- RF_DiskQueueData_t *burner = hdr->burner;
- hdr->nxt_priority = burner->priority;
- while (burner != (RF_DiskQueueData_t *) NULL
- && burner->priority == hdr->nxt_priority) {
- RF_DiskQueueData_t *next = burner->next;
- RealEnqueue(hdr, burner);
- burner = next;
- }
- hdr->burner = burner;
- }
- DO_CHECK_STATE(hdr);
- return (ret);
-}
-
-
-
-RF_DiskQueueData_t *
-rf_CvscanPeek(void *q_in)
-{
- RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in;
- long range, i, sum_dist_left, sum_dist_right;
- RF_DiskQueueData_t *tmp, *headElement;
-
- DO_CHECK_STATE(hdr);
-
- if (hdr->left_cnt == 0 && hdr->right_cnt == 0)
- headElement = NULL;
- else {
- range = RF_MIN(hdr->range_for_avg, RF_MIN(hdr->left_cnt, hdr->right_cnt));
- for (i = 0, tmp = hdr->left, sum_dist_left =
- ((hdr->direction == rf_cvscan_RIGHT) ? range * hdr->change_penalty : 0);
- tmp != (RF_DiskQueueData_t *) NULL && i < range;
- tmp = tmp->next, i++) {
- sum_dist_left += hdr->cur_block - tmp->sectorOffset;
- }
- for (i = 0, tmp = hdr->right, sum_dist_right =
- ((hdr->direction == rf_cvscan_LEFT) ? range * hdr->change_penalty : 0);
- tmp != (RF_DiskQueueData_t *) NULL && i < range;
- tmp = tmp->next, i++) {
- sum_dist_right += tmp->sectorOffset - hdr->cur_block;
- }
-
- if (hdr->right_cnt == 0 || sum_dist_left < sum_dist_right)
- headElement = hdr->left;
- else
- headElement = hdr->right;
- }
- return (headElement);
-}
-
-
-
-/*
-** CVSCAN( 1, 0 ) is Shortest Seek Time First (SSTF)
-** lowest average response time
-** CVSCAN( 1, infinity ) is SCAN
-** lowest response time standard deviation
-*/
-
-
-int
-rf_CvscanConfigure()
-{
- return (0);
-}
-
-
-
-void *
-rf_CvscanCreate(RF_SectorCount_t sectPerDisk,
- RF_AllocListElem_t * clList,
- RF_ShutdownList_t ** listp)
-{
- RF_CvscanHeader_t *hdr;
- long range = 2; /* Currently no mechanism to change these */
- long penalty = sectPerDisk / 5;
-
- RF_MallocAndAdd(hdr, sizeof(RF_CvscanHeader_t), (RF_CvscanHeader_t *), clList);
- bzero((char *) hdr, sizeof(RF_CvscanHeader_t));
- hdr->range_for_avg = RF_MAX(range, 1);
- hdr->change_penalty = RF_MAX(penalty, 0);
- hdr->direction = rf_cvscan_RIGHT;
- hdr->cur_block = 0;
- hdr->left_cnt = hdr->right_cnt = 0;
- hdr->left = hdr->right = (RF_DiskQueueData_t *) NULL;
- hdr->burner = (RF_DiskQueueData_t *) NULL;
- DO_CHECK_STATE(hdr);
-
- return ((void *) hdr);
-}
-
-
-#if defined(__NetBSD__) || defined(__FreeBSD__) && defined(_KERNEL)
-/* PrintCvscanQueue is not used, so we ignore it... */
-#else
-static void
-PrintCvscanQueue(RF_CvscanHeader_t * hdr)
-{
- RF_DiskQueueData_t *tmp;
-
- printf("CVSCAN(%d,%d) at %d going %s\n",
- (int) hdr->range_for_avg,
- (int) hdr->change_penalty,
- (int) hdr->cur_block,
- (hdr->direction == rf_cvscan_LEFT) ? "LEFT" : "RIGHT");
- printf("\tLeft(%d): ", hdr->left_cnt);
- for (tmp = hdr->left; tmp != (RF_DiskQueueData_t *) NULL; tmp = tmp->next)
- printf("(%d,%ld,%d) ",
- (int) tmp->sectorOffset,
- (long) (tmp->sectorOffset + tmp->numSector),
- tmp->priority);
- printf("\n");
- printf("\tRight(%d): ", hdr->right_cnt);
- for (tmp = hdr->right; tmp != (RF_DiskQueueData_t *) NULL; tmp = tmp->next)
- printf("(%d,%ld,%d) ",
- (int) tmp->sectorOffset,
- (long) (tmp->sectorOffset + tmp->numSector),
- tmp->priority);
- printf("\n");
- printf("\tBurner: ");
- for (tmp = hdr->burner; tmp != (RF_DiskQueueData_t *) NULL; tmp = tmp->next)
- printf("(%d,%ld,%d) ",
- (int) tmp->sectorOffset,
- (long) (tmp->sectorOffset + tmp->numSector),
- tmp->priority);
- printf("\n");
-}
-#endif
-
-
-/* promotes reconstruction accesses for the given stripeID to normal priority.
- * returns 1 if an access was found and zero otherwise. Normally, we should
- * only have one or zero entries in the burner queue, so execution time should
- * be short.
- */
-int
-rf_CvscanPromote(void *q_in, RF_StripeNum_t parityStripeID, RF_ReconUnitNum_t which_ru)
-{
- RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in;
- RF_DiskQueueData_t *trailer = NULL, *tmp = hdr->burner, *tlist = NULL;
- int retval = 0;
-
- DO_CHECK_STATE(hdr);
- while (tmp) { /* handle entries at the front of the list */
- if (tmp->parityStripeID == parityStripeID && tmp->which_ru == which_ru) {
- hdr->burner = tmp->next;
- tmp->priority = RF_IO_NORMAL_PRIORITY;
- tmp->next = tlist;
- tlist = tmp;
- tmp = hdr->burner;
- } else
- break;
- }
- if (tmp) {
- trailer = tmp;
- tmp = tmp->next;
- }
- while (tmp) { /* handle entries on the rest of the list */
- if (tmp->parityStripeID == parityStripeID && tmp->which_ru == which_ru) {
- trailer->next = tmp->next;
- tmp->priority = RF_IO_NORMAL_PRIORITY;
- tmp->next = tlist;
- tlist = tmp; /* insert on a temp queue */
- tmp = trailer->next;
- } else {
- trailer = tmp;
- tmp = tmp->next;
- }
- }
- while (tlist) {
- retval++;
- tmp = tlist->next;
- RealEnqueue(hdr, tlist);
- tlist = tmp;
- }
- RF_ASSERT(retval == 0 || retval == 1);
- DO_CHECK_STATE((RF_CvscanHeader_t *) q_in);
- return (retval);
-}
diff --git a/sys/dev/raidframe/rf_cvscan.h b/sys/dev/raidframe/rf_cvscan.h
deleted file mode 100644
index 7f536a8..0000000
--- a/sys/dev/raidframe/rf_cvscan.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_cvscan.h,v 1.3 1999/02/05 00:06:07 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*
-** Disk scheduling by CVSCAN( N, r )
-**
-** Given a set of requests, partition them into one set on each
-** side of the current arm position. The trick is to pick which
-** side you are going to service next; once a side is picked you will
-** service the closest request.
-** Let there be n1 requests on one side and n2 requests on the other
-** side. If one of n1 or n2 is zero, select the other side.
-** If both n1 and n2 are nonzero, select a "range" for examination
-** that is N' = min( n1, n2, N ). Average the distance from the
-** current position to the nearest N' requests on each side giving
-** d1 and d2.
-** Suppose the last decision was to move toward set 2, then the
-** current direction is toward set 2, and you will only switch to set
-** 1 if d1+R < d2 where R is r*(total number of cylinders), r in [0,1].
-**
-** I extend this by applying only to the set of requests that all
-** share the same, highest priority level.
-*/
-
-#ifndef _RF__RF_CVSCAN_H_
-#define _RF__RF_CVSCAN_H_
-
-#include <dev/raidframe/rf_diskqueue.h>
-
-typedef enum RF_CvscanArmDir_e {
- rf_cvscan_LEFT,
- rf_cvscan_RIGHT
-} RF_CvscanArmDir_t;
-
-typedef struct RF_CvscanHeader_s {
- long range_for_avg; /* CVSCAN param N */
- long change_penalty; /* CVSCAN param R */
- RF_CvscanArmDir_t direction;
- RF_SectorNum_t cur_block;
- int nxt_priority;
- RF_DiskQueueData_t *left;
- int left_cnt;
- RF_DiskQueueData_t *right;
- int right_cnt;
- RF_DiskQueueData_t *burner;
-} RF_CvscanHeader_t;
-
-int rf_CvscanConfigure(void);
-void *
-rf_CvscanCreate(RF_SectorCount_t sect_per_disk,
- RF_AllocListElem_t * cl_list, RF_ShutdownList_t ** listp);
-void rf_CvscanEnqueue(void *qptr, RF_DiskQueueData_t * req, int priority);
-RF_DiskQueueData_t *rf_CvscanDequeue(void *qptr);
-RF_DiskQueueData_t *rf_CvscanPeek(void *qptr);
-int
-rf_CvscanPromote(void *qptr, RF_StripeNum_t parityStripeID,
- RF_ReconUnitNum_t which_ru);
-
-#endif /* !_RF__RF_CVSCAN_H_ */
diff --git a/sys/dev/raidframe/rf_dag.h b/sys/dev/raidframe/rf_dag.h
deleted file mode 100644
index 15cd4a8..0000000
--- a/sys/dev/raidframe/rf_dag.h
+++ /dev/null
@@ -1,239 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_dag.h,v 1.3 1999/02/05 00:06:07 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: William V. Courtright II, Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/****************************************************************************
- * *
- * dag.h -- header file for DAG-related data structures *
- * *
- ****************************************************************************/
-
-#ifndef _RF__RF_DAG_H_
-#define _RF__RF_DAG_H_
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_threadstuff.h>
-#include <dev/raidframe/rf_alloclist.h>
-#include <dev/raidframe/rf_stripelocks.h>
-#include <dev/raidframe/rf_layout.h>
-#include <dev/raidframe/rf_dagflags.h>
-#include <dev/raidframe/rf_acctrace.h>
-#include <dev/raidframe/rf_memchunk.h>
-
-#define RF_THREAD_CONTEXT 0 /* we were invoked from thread context */
-#define RF_INTR_CONTEXT 1 /* we were invoked from interrupt context */
-#define RF_MAX_ANTECEDENTS 20 /* max num of antecedents a node may posses */
-
-#if defined(__FreeBSD__) && __FreeBSD_version > 500005
-#include <sys/bio.h>
-#endif
-#include <sys/buf.h>
-
-struct RF_PropHeader_s { /* structure for propagation of results */
- int resultNum; /* bind result # resultNum */
- int paramNum; /* to parameter # paramNum */
- RF_PropHeader_t *next; /* linked list for multiple results/params */
-};
-
-typedef enum RF_NodeStatus_e {
- rf_bwd1, /* node is ready for undo logging (backward
- * error recovery only) */
- rf_bwd2, /* node has completed undo logging (backward
- * error recovery only) */
- rf_wait, /* node is waiting to be executed */
- rf_fired, /* node is currently executing its do function */
- rf_good, /* node successfully completed execution of
- * its do function */
- rf_bad, /* node failed to successfully execute its do
- * function */
- rf_skipped, /* not used anymore, used to imply a node was
- * not executed */
- rf_recover, /* node is currently executing its undo
- * function */
- rf_panic, /* node failed to successfully execute its
- * undo function */
- rf_undone /* node successfully executed its undo
- * function */
-} RF_NodeStatus_t;
-/*
- * These were used to control skipping a node.
- * Now, these are only used as comments.
- */
-typedef enum RF_AntecedentType_e {
- rf_trueData,
- rf_antiData,
- rf_outputData,
- rf_control
-} RF_AntecedentType_t;
-#define RF_DAG_PTRCACHESIZE 40
-#define RF_DAG_PARAMCACHESIZE 12
-
-typedef RF_uint8 RF_DagNodeFlags_t;
-
-struct RF_DagNode_s {
- RF_NodeStatus_t status; /* current status of this node */
- int (*doFunc) (RF_DagNode_t *); /* normal function */
- int (*undoFunc) (RF_DagNode_t *); /* func to remove effect of
- * doFunc */
- int (*wakeFunc) (RF_DagNode_t *, int status); /* func called when the
- * node completes an I/O */
- int numParams; /* number of parameters required by *funcPtr */
- int numResults; /* number of results produced by *funcPtr */
- int numAntecedents; /* number of antecedents */
- int numAntDone; /* number of antecedents which have finished */
- int numSuccedents; /* number of succedents */
- int numSuccFired; /* incremented when a succedent is fired
- * during forward execution */
- int numSuccDone; /* incremented when a succedent finishes
- * during rollBackward */
- int commitNode; /* boolean flag - if true, this is a commit
- * node */
- RF_DagNode_t **succedents; /* succedents, array size
- * numSuccedents */
- RF_DagNode_t **antecedents; /* antecedents, array size
- * numAntecedents */
- RF_AntecedentType_t antType[RF_MAX_ANTECEDENTS]; /* type of each
- * antecedent */
- void **results; /* array of results produced by *funcPtr */
- RF_DagParam_t *params; /* array of parameters required by *funcPtr */
- RF_PropHeader_t **propList; /* propagation list, size
- * numSuccedents */
- RF_DagHeader_t *dagHdr; /* ptr to head of dag containing this node */
- void *dagFuncData; /* dag execution func uses this for whatever
- * it wants */
- RF_DagNode_t *next;
- int nodeNum; /* used by PrintDAG for debug only */
- int visited; /* used to avoid re-visiting nodes on DAG
- * walks */
- /* ANY CODE THAT USES THIS FIELD MUST MAINTAIN THE PROPERTY THAT AFTER
- * IT FINISHES, ALL VISITED FLAGS IN THE DAG ARE IDENTICAL */
- char *name; /* debug only */
- RF_DagNodeFlags_t flags;/* see below */
- RF_DagNode_t *dag_ptrs[RF_DAG_PTRCACHESIZE]; /* cache for performance */
- RF_DagParam_t dag_params[RF_DAG_PARAMCACHESIZE]; /* cache for performance */
-};
-/*
- * Bit values for flags field of RF_DagNode_t
- */
-#define RF_DAGNODE_FLAG_NONE 0x00
-#define RF_DAGNODE_FLAG_YIELD 0x01 /* in the kernel, yield the processor
- * before firing this node */
-
-/* enable - DAG ready for normal execution, no errors encountered
- * rollForward - DAG encountered an error after commit point, rolling forward
- * rollBackward - DAG encountered an error prior to commit point, rolling backward
- */
-typedef enum RF_DagStatus_e {
- rf_enable,
- rf_rollForward,
- rf_rollBackward
-} RF_DagStatus_t;
-#define RF_MAX_HDR_SUCC 1
-
-#define RF_MAXCHUNKS 10
-
-struct RF_DagHeader_s {
- RF_DagStatus_t status; /* status of this DAG */
- int numSuccedents; /* DAG may be a tree, i.e. may have > 1 root */
- int numCommitNodes; /* number of commit nodes in graph */
- int numCommits; /* number of commit nodes which have been
- * fired */
- RF_DagNode_t *succedents[RF_MAX_HDR_SUCC]; /* array of succedents,
- * size numSuccedents */
- RF_DagHeader_t *next; /* ptr to allow a list of dags */
- RF_AllocListElem_t *allocList; /* ptr to list of ptrs to be freed
- * prior to freeing DAG */
- RF_AccessStripeMapHeader_t *asmList; /* list of access stripe maps
- * to be freed */
- int nodeNum; /* used by PrintDAG for debug only */
- int numNodesCompleted;
- RF_AccTraceEntry_t *tracerec; /* perf mon only */
-
- void (*cbFunc) (void *); /* function to call when the dag
- * completes */
- void *cbArg; /* argument for cbFunc */
- char *creator; /* name of function used to create this dag */
-
- RF_Raid_t *raidPtr; /* the descriptor for the RAID device this DAG
- * is for */
- void *bp; /* the bp for this I/O passed down from the
- * file system. ignored outside kernel */
-
- RF_ChunkDesc_t *memChunk[RF_MAXCHUNKS]; /* experimental- Chunks of
- * memory to be retained upon
- * DAG free for re-use */
- int chunkIndex; /* the idea is to avoid calls to alloc and
- * free */
-
- RF_ChunkDesc_t **xtraMemChunk; /* escape hatch which allows
- * SelectAlgorithm to merge memChunks
- * from several dags */
- int xtraChunkIndex; /* number of ptrs to valid chunks */
- int xtraChunkCnt; /* number of ptrs to chunks allocated */
-
-};
-
-struct RF_DagList_s {
- /* common info for a list of dags which will be fired sequentially */
- int numDags; /* number of dags in the list */
- int numDagsFired; /* number of dags in list which have initiated
- * execution */
- int numDagsDone; /* number of dags in list which have completed
- * execution */
- RF_DagHeader_t *dags; /* list of dags */
- RF_RaidAccessDesc_t *desc; /* ptr to descriptor for this access */
- RF_AccTraceEntry_t tracerec; /* perf mon info for dags (not user
- * info) */
-};
-/* resets a node so that it can be fired again */
-#define RF_ResetNode(_n_) { \
- (_n_)->status = rf_wait; \
- (_n_)->numAntDone = 0; \
- (_n_)->numSuccFired = 0; \
- (_n_)->numSuccDone = 0; \
- (_n_)->next = NULL; \
-}
-
-#define RF_ResetDagHeader(_h_) { \
- (_h_)->numNodesCompleted = 0; \
- (_h_)->numCommits = 0; \
- (_h_)->status = rf_enable; \
-}
-
-/* convience macro for declaring a create dag function */
-
-#define RF_CREATE_DAG_FUNC_DECL(_name_) \
-void _name_ ( \
- RF_Raid_t *raidPtr, \
- RF_AccessStripeMap_t *asmap, \
- RF_DagHeader_t *dag_h, \
- void *bp, \
- RF_RaidAccessFlags_t flags, \
- RF_AllocListElem_t *allocList)
-
-#endif /* !_RF__RF_DAG_H_ */
diff --git a/sys/dev/raidframe/rf_dagdegrd.c b/sys/dev/raidframe/rf_dagdegrd.c
deleted file mode 100644
index 6321db6..0000000
--- a/sys/dev/raidframe/rf_dagdegrd.c
+++ /dev/null
@@ -1,1132 +0,0 @@
-/* $NetBSD: rf_dagdegrd.c,v 1.7 2001/01/26 14:06:16 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*
- * rf_dagdegrd.c
- *
- * code for creating degraded read DAGs
- */
-
-#include <dev/raidframe/rf_archs.h>
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_dagutils.h>
-#include <dev/raidframe/rf_dagfuncs.h>
-#include <dev/raidframe/rf_debugMem.h>
-#include <dev/raidframe/rf_memchunk.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_dagdegrd.h>
-
-
-/******************************************************************************
- *
- * General comments on DAG creation:
- *
- * All DAGs in this file use roll-away error recovery. Each DAG has a single
- * commit node, usually called "Cmt." If an error occurs before the Cmt node
- * is reached, the execution engine will halt forward execution and work
- * backward through the graph, executing the undo functions. Assuming that
- * each node in the graph prior to the Cmt node are undoable and atomic - or -
- * does not make changes to permanent state, the graph will fail atomically.
- * If an error occurs after the Cmt node executes, the engine will roll-forward
- * through the graph, blindly executing nodes until it reaches the end.
- * If a graph reaches the end, it is assumed to have completed successfully.
- *
- * A graph has only 1 Cmt node.
- *
- */
-
-
-/******************************************************************************
- *
- * The following wrappers map the standard DAG creation interface to the
- * DAG creation routines. Additionally, these wrappers enable experimentation
- * with new DAG structures by providing an extra level of indirection, allowing
- * the DAG creation routines to be replaced at this single point.
- */
-
-void
-rf_CreateRaidFiveDegradedReadDAG(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h,
- void *bp,
- RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList)
-{
- rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
- &rf_xorRecoveryFuncs);
-}
-
-
-/******************************************************************************
- *
- * DAG creation code begins here
- */
-
-
-/******************************************************************************
- * Create a degraded read DAG for RAID level 1
- *
- * Hdr -> Nil -> R(p/s)d -> Commit -> Trm
- *
- * The "Rd" node reads data from the surviving disk in the mirror pair
- * Rpd - read of primary copy
- * Rsd - read of secondary copy
- *
- * Parameters: raidPtr - description of the physical array
- * asmap - logical & physical addresses for this access
- * bp - buffer ptr (for holding write data)
- * flags - general flags (e.g. disk locking)
- * allocList - list of memory allocated in DAG creation
- *****************************************************************************/
-
-void
-rf_CreateRaidOneDegradedReadDAG(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h,
- void *bp,
- RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList)
-{
- RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode;
- RF_StripeNum_t parityStripeID;
- RF_ReconUnitNum_t which_ru;
- RF_PhysDiskAddr_t *pda;
- int useMirror, i;
-
- useMirror = 0;
- parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
- asmap->raidAddress, &which_ru);
- if (rf_dagDebug) {
- printf("[Creating RAID level 1 degraded read DAG]\n");
- }
- dag_h->creator = "RaidOneDegradedReadDAG";
- /* alloc the Wnd nodes and the Wmir node */
- if (asmap->numDataFailed == 0)
- useMirror = RF_FALSE;
- else
- useMirror = RF_TRUE;
-
- /* total number of nodes = 1 + (block + commit + terminator) */
- RF_CallocAndAdd(nodes, 4, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList);
- i = 0;
- rdNode = &nodes[i];
- i++;
- blockNode = &nodes[i];
- i++;
- commitNode = &nodes[i];
- i++;
- termNode = &nodes[i];
- i++;
-
- /* this dag can not commit until the commit node is reached. errors
- * prior to the commit point imply the dag has failed and must be
- * retried */
- dag_h->numCommitNodes = 1;
- dag_h->numCommits = 0;
- dag_h->numSuccedents = 1;
-
- /* initialize the block, commit, and terminator nodes */
- rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
- NULL, 1, 0, 0, 0, dag_h, "Nil", allocList);
- rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
- NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList);
- rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc,
- NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
-
- pda = asmap->physInfo;
- RF_ASSERT(pda != NULL);
- /* parityInfo must describe entire parity unit */
- RF_ASSERT(asmap->parityInfo->next == NULL);
-
- /* initialize the data node */
- if (!useMirror) {
- /* read primary copy of data */
- rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc,
- rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rpd", allocList);
- rdNode->params[0].p = pda;
- rdNode->params[1].p = pda->bufPtr;
- rdNode->params[2].v = parityStripeID;
- rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- } else {
- /* read secondary copy of data */
- rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc,
- rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rsd", allocList);
- rdNode->params[0].p = asmap->parityInfo;
- rdNode->params[1].p = pda->bufPtr;
- rdNode->params[2].v = parityStripeID;
- rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- }
-
- /* connect header to block node */
- RF_ASSERT(dag_h->numSuccedents == 1);
- RF_ASSERT(blockNode->numAntecedents == 0);
- dag_h->succedents[0] = blockNode;
-
- /* connect block node to rdnode */
- RF_ASSERT(blockNode->numSuccedents == 1);
- RF_ASSERT(rdNode->numAntecedents == 1);
- blockNode->succedents[0] = rdNode;
- rdNode->antecedents[0] = blockNode;
- rdNode->antType[0] = rf_control;
-
- /* connect rdnode to commit node */
- RF_ASSERT(rdNode->numSuccedents == 1);
- RF_ASSERT(commitNode->numAntecedents == 1);
- rdNode->succedents[0] = commitNode;
- commitNode->antecedents[0] = rdNode;
- commitNode->antType[0] = rf_control;
-
- /* connect commit node to terminator */
- RF_ASSERT(commitNode->numSuccedents == 1);
- RF_ASSERT(termNode->numAntecedents == 1);
- RF_ASSERT(termNode->numSuccedents == 0);
- commitNode->succedents[0] = termNode;
- termNode->antecedents[0] = commitNode;
- termNode->antType[0] = rf_control;
-}
-
-
-
-/******************************************************************************
- *
- * creates a DAG to perform a degraded-mode read of data within one stripe.
- * This DAG is as follows:
- *
- * Hdr -> Block -> Rud -> Xor -> Cmt -> T
- * -> Rrd ->
- * -> Rp -->
- *
- * Each R node is a successor of the L node
- * One successor arc from each R node goes to C, and the other to X
- * There is one Rud for each chunk of surviving user data requested by the
- * user, and one Rrd for each chunk of surviving user data _not_ being read by
- * the user
- * R = read, ud = user data, rd = recovery (surviving) data, p = parity
- * X = XOR, C = Commit, T = terminate
- *
- * The block node guarantees a single source node.
- *
- * Note: The target buffer for the XOR node is set to the actual user buffer
- * where the failed data is supposed to end up. This buffer is zero'd by the
- * code here. Thus, if you create a degraded read dag, use it, and then
- * re-use, you have to be sure to zero the target buffer prior to the re-use.
- *
- * The recfunc argument at the end specifies the name and function used for
- * the redundancy
- * recovery function.
- *
- *****************************************************************************/
-
-void
-rf_CreateDegradedReadDAG(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h,
- void *bp,
- RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList,
- RF_RedFuncs_t * recFunc)
-{
- RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *xorNode, *blockNode;
- RF_DagNode_t *commitNode, *rpNode, *termNode;
- int nNodes, nRrdNodes, nRudNodes, nXorBufs, i;
- int j, paramNum;
- RF_SectorCount_t sectorsPerSU;
- RF_ReconUnitNum_t which_ru;
- char *overlappingPDAs;/* a temporary array of flags */
- RF_AccessStripeMapHeader_t *new_asm_h[2];
- RF_PhysDiskAddr_t *pda, *parityPDA;
- RF_StripeNum_t parityStripeID;
- RF_PhysDiskAddr_t *failedPDA;
- RF_RaidLayout_t *layoutPtr;
- char *rpBuf;
-
- layoutPtr = &(raidPtr->Layout);
- /* failedPDA points to the pda within the asm that targets the failed
- * disk */
- failedPDA = asmap->failedPDAs[0];
- parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr,
- asmap->raidAddress, &which_ru);
- sectorsPerSU = layoutPtr->sectorsPerStripeUnit;
-
- if (rf_dagDebug) {
- printf("[Creating degraded read DAG]\n");
- }
- RF_ASSERT(asmap->numDataFailed == 1);
- dag_h->creator = "DegradedReadDAG";
-
- /*
- * generate two ASMs identifying the surviving data we need
- * in order to recover the lost data
- */
-
- /* overlappingPDAs array must be zero'd */
- RF_Calloc(overlappingPDAs, asmap->numStripeUnitsAccessed, sizeof(char), (char *));
- rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h, new_asm_h, &nXorBufs,
- &rpBuf, overlappingPDAs, allocList);
-
- /*
- * create all the nodes at once
- *
- * -1 because no access is generated for the failed pda
- */
- nRudNodes = asmap->numStripeUnitsAccessed - 1;
- nRrdNodes = ((new_asm_h[0]) ? new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) +
- ((new_asm_h[1]) ? new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0);
- nNodes = 5 + nRudNodes + nRrdNodes; /* lock, unlock, xor, Rp, Rud,
- * Rrd */
- RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *),
- allocList);
- i = 0;
- blockNode = &nodes[i];
- i++;
- commitNode = &nodes[i];
- i++;
- xorNode = &nodes[i];
- i++;
- rpNode = &nodes[i];
- i++;
- termNode = &nodes[i];
- i++;
- rudNodes = &nodes[i];
- i += nRudNodes;
- rrdNodes = &nodes[i];
- i += nRrdNodes;
- RF_ASSERT(i == nNodes);
-
- /* initialize nodes */
- dag_h->numCommitNodes = 1;
- dag_h->numCommits = 0;
- /* this dag can not commit until the commit node is reached errors
- * prior to the commit point imply the dag has failed */
- dag_h->numSuccedents = 1;
-
- rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
- NULL, nRudNodes + nRrdNodes + 1, 0, 0, 0, dag_h, "Nil", allocList);
- rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
- NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList);
- rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc,
- NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
- rf_InitNode(xorNode, rf_wait, RF_FALSE, recFunc->simple, rf_NullNodeUndoFunc,
- NULL, 1, nRudNodes + nRrdNodes + 1, 2 * nXorBufs + 2, 1, dag_h,
- recFunc->SimpleName, allocList);
-
- /* fill in the Rud nodes */
- for (pda = asmap->physInfo, i = 0; i < nRudNodes; i++, pda = pda->next) {
- if (pda == failedPDA) {
- i--;
- continue;
- }
- rf_InitNode(&rudNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc,
- rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
- "Rud", allocList);
- RF_ASSERT(pda);
- rudNodes[i].params[0].p = pda;
- rudNodes[i].params[1].p = pda->bufPtr;
- rudNodes[i].params[2].v = parityStripeID;
- rudNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- }
-
- /* fill in the Rrd nodes */
- i = 0;
- if (new_asm_h[0]) {
- for (pda = new_asm_h[0]->stripeMap->physInfo;
- i < new_asm_h[0]->stripeMap->numStripeUnitsAccessed;
- i++, pda = pda->next) {
- rf_InitNode(&rrdNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc,
- rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
- dag_h, "Rrd", allocList);
- RF_ASSERT(pda);
- rrdNodes[i].params[0].p = pda;
- rrdNodes[i].params[1].p = pda->bufPtr;
- rrdNodes[i].params[2].v = parityStripeID;
- rrdNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- }
- }
- if (new_asm_h[1]) {
- for (j = 0, pda = new_asm_h[1]->stripeMap->physInfo;
- j < new_asm_h[1]->stripeMap->numStripeUnitsAccessed;
- j++, pda = pda->next) {
- rf_InitNode(&rrdNodes[i + j], rf_wait, RF_FALSE, rf_DiskReadFunc,
- rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
- dag_h, "Rrd", allocList);
- RF_ASSERT(pda);
- rrdNodes[i + j].params[0].p = pda;
- rrdNodes[i + j].params[1].p = pda->bufPtr;
- rrdNodes[i + j].params[2].v = parityStripeID;
- rrdNodes[i + j].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- }
- }
- /* make a PDA for the parity unit */
- RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList);
- parityPDA->row = asmap->parityInfo->row;
- parityPDA->col = asmap->parityInfo->col;
- parityPDA->startSector = ((asmap->parityInfo->startSector / sectorsPerSU)
- * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU);
- parityPDA->numSector = failedPDA->numSector;
-
- /* initialize the Rp node */
- rf_InitNode(rpNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc,
- rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rp ", allocList);
- rpNode->params[0].p = parityPDA;
- rpNode->params[1].p = rpBuf;
- rpNode->params[2].v = parityStripeID;
- rpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
-
- /*
- * the last and nastiest step is to assign all
- * the parameters of the Xor node
- */
- paramNum = 0;
- for (i = 0; i < nRrdNodes; i++) {
- /* all the Rrd nodes need to be xored together */
- xorNode->params[paramNum++] = rrdNodes[i].params[0];
- xorNode->params[paramNum++] = rrdNodes[i].params[1];
- }
- for (i = 0; i < nRudNodes; i++) {
- /* any Rud nodes that overlap the failed access need to be
- * xored in */
- if (overlappingPDAs[i]) {
- RF_MallocAndAdd(pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList);
- bcopy((char *) rudNodes[i].params[0].p, (char *) pda, sizeof(RF_PhysDiskAddr_t));
- rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_DOBUFFER, 0);
- xorNode->params[paramNum++].p = pda;
- xorNode->params[paramNum++].p = pda->bufPtr;
- }
- }
- RF_Free(overlappingPDAs, asmap->numStripeUnitsAccessed * sizeof(char));
-
- /* install parity pda as last set of params to be xor'd */
- xorNode->params[paramNum++].p = parityPDA;
- xorNode->params[paramNum++].p = rpBuf;
-
- /*
- * the last 2 params to the recovery xor node are
- * the failed PDA and the raidPtr
- */
- xorNode->params[paramNum++].p = failedPDA;
- xorNode->params[paramNum++].p = raidPtr;
- RF_ASSERT(paramNum == 2 * nXorBufs + 2);
-
- /*
- * The xor node uses results[0] as the target buffer.
- * Set pointer and zero the buffer. In the kernel, this
- * may be a user buffer in which case we have to remap it.
- */
- xorNode->results[0] = failedPDA->bufPtr;
- RF_BZERO(bp, failedPDA->bufPtr, rf_RaidAddressToByte(raidPtr,
- failedPDA->numSector));
-
- /* connect nodes to form graph */
- /* connect the header to the block node */
- RF_ASSERT(dag_h->numSuccedents == 1);
- RF_ASSERT(blockNode->numAntecedents == 0);
- dag_h->succedents[0] = blockNode;
-
- /* connect the block node to the read nodes */
- RF_ASSERT(blockNode->numSuccedents == (1 + nRrdNodes + nRudNodes));
- RF_ASSERT(rpNode->numAntecedents == 1);
- blockNode->succedents[0] = rpNode;
- rpNode->antecedents[0] = blockNode;
- rpNode->antType[0] = rf_control;
- for (i = 0; i < nRrdNodes; i++) {
- RF_ASSERT(rrdNodes[i].numSuccedents == 1);
- blockNode->succedents[1 + i] = &rrdNodes[i];
- rrdNodes[i].antecedents[0] = blockNode;
- rrdNodes[i].antType[0] = rf_control;
- }
- for (i = 0; i < nRudNodes; i++) {
- RF_ASSERT(rudNodes[i].numSuccedents == 1);
- blockNode->succedents[1 + nRrdNodes + i] = &rudNodes[i];
- rudNodes[i].antecedents[0] = blockNode;
- rudNodes[i].antType[0] = rf_control;
- }
-
- /* connect the read nodes to the xor node */
- RF_ASSERT(xorNode->numAntecedents == (1 + nRrdNodes + nRudNodes));
- RF_ASSERT(rpNode->numSuccedents == 1);
- rpNode->succedents[0] = xorNode;
- xorNode->antecedents[0] = rpNode;
- xorNode->antType[0] = rf_trueData;
- for (i = 0; i < nRrdNodes; i++) {
- RF_ASSERT(rrdNodes[i].numSuccedents == 1);
- rrdNodes[i].succedents[0] = xorNode;
- xorNode->antecedents[1 + i] = &rrdNodes[i];
- xorNode->antType[1 + i] = rf_trueData;
- }
- for (i = 0; i < nRudNodes; i++) {
- RF_ASSERT(rudNodes[i].numSuccedents == 1);
- rudNodes[i].succedents[0] = xorNode;
- xorNode->antecedents[1 + nRrdNodes + i] = &rudNodes[i];
- xorNode->antType[1 + nRrdNodes + i] = rf_trueData;
- }
-
- /* connect the xor node to the commit node */
- RF_ASSERT(xorNode->numSuccedents == 1);
- RF_ASSERT(commitNode->numAntecedents == 1);
- xorNode->succedents[0] = commitNode;
- commitNode->antecedents[0] = xorNode;
- commitNode->antType[0] = rf_control;
-
- /* connect the termNode to the commit node */
- RF_ASSERT(commitNode->numSuccedents == 1);
- RF_ASSERT(termNode->numAntecedents == 1);
- RF_ASSERT(termNode->numSuccedents == 0);
- commitNode->succedents[0] = termNode;
- termNode->antType[0] = rf_control;
- termNode->antecedents[0] = commitNode;
-}
-
-#if (RF_INCLUDE_CHAINDECLUSTER > 0)
-/******************************************************************************
- * Create a degraded read DAG for Chained Declustering
- *
- * Hdr -> Nil -> R(p/s)d -> Cmt -> Trm
- *
- * The "Rd" node reads data from the surviving disk in the mirror pair
- * Rpd - read of primary copy
- * Rsd - read of secondary copy
- *
- * Parameters: raidPtr - description of the physical array
- * asmap - logical & physical addresses for this access
- * bp - buffer ptr (for holding write data)
- * flags - general flags (e.g. disk locking)
- * allocList - list of memory allocated in DAG creation
- *****************************************************************************/
-
-void
-rf_CreateRaidCDegradedReadDAG(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h,
- void *bp,
- RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList)
-{
- RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode;
- RF_StripeNum_t parityStripeID;
- int useMirror, i, shiftable;
- RF_ReconUnitNum_t which_ru;
- RF_PhysDiskAddr_t *pda;
-
- if ((asmap->numDataFailed + asmap->numParityFailed) == 0) {
- shiftable = RF_TRUE;
- } else {
- shiftable = RF_FALSE;
- }
- useMirror = 0;
- parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
- asmap->raidAddress, &which_ru);
-
- if (rf_dagDebug) {
- printf("[Creating RAID C degraded read DAG]\n");
- }
- dag_h->creator = "RaidCDegradedReadDAG";
- /* alloc the Wnd nodes and the Wmir node */
- if (asmap->numDataFailed == 0)
- useMirror = RF_FALSE;
- else
- useMirror = RF_TRUE;
-
- /* total number of nodes = 1 + (block + commit + terminator) */
- RF_CallocAndAdd(nodes, 4, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList);
- i = 0;
- rdNode = &nodes[i];
- i++;
- blockNode = &nodes[i];
- i++;
- commitNode = &nodes[i];
- i++;
- termNode = &nodes[i];
- i++;
-
- /*
- * This dag can not commit until the commit node is reached.
- * Errors prior to the commit point imply the dag has failed
- * and must be retried.
- */
- dag_h->numCommitNodes = 1;
- dag_h->numCommits = 0;
- dag_h->numSuccedents = 1;
-
- /* initialize the block, commit, and terminator nodes */
- rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
- NULL, 1, 0, 0, 0, dag_h, "Nil", allocList);
- rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
- NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList);
- rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc,
- NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
-
- pda = asmap->physInfo;
- RF_ASSERT(pda != NULL);
- /* parityInfo must describe entire parity unit */
- RF_ASSERT(asmap->parityInfo->next == NULL);
-
- /* initialize the data node */
- if (!useMirror) {
- rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc,
- rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rpd", allocList);
- if (shiftable && rf_compute_workload_shift(raidPtr, pda)) {
- /* shift this read to the next disk in line */
- rdNode->params[0].p = asmap->parityInfo;
- rdNode->params[1].p = pda->bufPtr;
- rdNode->params[2].v = parityStripeID;
- rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- } else {
- /* read primary copy */
- rdNode->params[0].p = pda;
- rdNode->params[1].p = pda->bufPtr;
- rdNode->params[2].v = parityStripeID;
- rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- }
- } else {
- /* read secondary copy of data */
- rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc,
- rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rsd", allocList);
- rdNode->params[0].p = asmap->parityInfo;
- rdNode->params[1].p = pda->bufPtr;
- rdNode->params[2].v = parityStripeID;
- rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- }
-
- /* connect header to block node */
- RF_ASSERT(dag_h->numSuccedents == 1);
- RF_ASSERT(blockNode->numAntecedents == 0);
- dag_h->succedents[0] = blockNode;
-
- /* connect block node to rdnode */
- RF_ASSERT(blockNode->numSuccedents == 1);
- RF_ASSERT(rdNode->numAntecedents == 1);
- blockNode->succedents[0] = rdNode;
- rdNode->antecedents[0] = blockNode;
- rdNode->antType[0] = rf_control;
-
- /* connect rdnode to commit node */
- RF_ASSERT(rdNode->numSuccedents == 1);
- RF_ASSERT(commitNode->numAntecedents == 1);
- rdNode->succedents[0] = commitNode;
- commitNode->antecedents[0] = rdNode;
- commitNode->antType[0] = rf_control;
-
- /* connect commit node to terminator */
- RF_ASSERT(commitNode->numSuccedents == 1);
- RF_ASSERT(termNode->numAntecedents == 1);
- RF_ASSERT(termNode->numSuccedents == 0);
- commitNode->succedents[0] = termNode;
- termNode->antecedents[0] = commitNode;
- termNode->antType[0] = rf_control;
-}
-#endif /* (RF_INCLUDE_CHAINDECLUSTER > 0) */
-
-#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0)
-/*
- * XXX move this elsewhere?
- */
-void
-rf_DD_GenerateFailedAccessASMs(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_PhysDiskAddr_t ** pdap,
- int *nNodep,
- RF_PhysDiskAddr_t ** pqpdap,
- int *nPQNodep,
- RF_AllocListElem_t * allocList)
-{
- RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
- int PDAPerDisk, i;
- RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
- int numDataCol = layoutPtr->numDataCol;
- int state;
- RF_SectorNum_t suoff, suend;
- unsigned firstDataCol, napdas, count;
- RF_SectorNum_t fone_start, fone_end, ftwo_start = 0, ftwo_end = 0;
- RF_PhysDiskAddr_t *fone = asmap->failedPDAs[0], *ftwo = asmap->failedPDAs[1];
- RF_PhysDiskAddr_t *pda_p;
- RF_PhysDiskAddr_t *phys_p;
- RF_RaidAddr_t sosAddr;
-
- /* determine how many pda's we will have to generate per unaccess
- * stripe. If there is only one failed data unit, it is one; if two,
- * possibly two, depending wether they overlap. */
-
- fone_start = rf_StripeUnitOffset(layoutPtr, fone->startSector);
- fone_end = fone_start + fone->numSector;
-
-#define CONS_PDA(if,start,num) \
- pda_p->row = asmap->if->row; pda_p->col = asmap->if->col; \
- pda_p->startSector = ((asmap->if->startSector / secPerSU) * secPerSU) + start; \
- pda_p->numSector = num; \
- pda_p->next = NULL; \
- RF_MallocAndAdd(pda_p->bufPtr,rf_RaidAddressToByte(raidPtr,num),(char *), allocList)
-
- if (asmap->numDataFailed == 1) {
- PDAPerDisk = 1;
- state = 1;
- RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList);
- pda_p = *pqpdap;
- /* build p */
- CONS_PDA(parityInfo, fone_start, fone->numSector);
- pda_p->type = RF_PDA_TYPE_PARITY;
- pda_p++;
- /* build q */
- CONS_PDA(qInfo, fone_start, fone->numSector);
- pda_p->type = RF_PDA_TYPE_Q;
- } else {
- ftwo_start = rf_StripeUnitOffset(layoutPtr, ftwo->startSector);
- ftwo_end = ftwo_start + ftwo->numSector;
- if (fone->numSector + ftwo->numSector > secPerSU) {
- PDAPerDisk = 1;
- state = 2;
- RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList);
- pda_p = *pqpdap;
- CONS_PDA(parityInfo, 0, secPerSU);
- pda_p->type = RF_PDA_TYPE_PARITY;
- pda_p++;
- CONS_PDA(qInfo, 0, secPerSU);
- pda_p->type = RF_PDA_TYPE_Q;
- } else {
- PDAPerDisk = 2;
- state = 3;
- /* four of them, fone, then ftwo */
- RF_MallocAndAdd(*pqpdap, 4 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList);
- pda_p = *pqpdap;
- CONS_PDA(parityInfo, fone_start, fone->numSector);
- pda_p->type = RF_PDA_TYPE_PARITY;
- pda_p++;
- CONS_PDA(qInfo, fone_start, fone->numSector);
- pda_p->type = RF_PDA_TYPE_Q;
- pda_p++;
- CONS_PDA(parityInfo, ftwo_start, ftwo->numSector);
- pda_p->type = RF_PDA_TYPE_PARITY;
- pda_p++;
- CONS_PDA(qInfo, ftwo_start, ftwo->numSector);
- pda_p->type = RF_PDA_TYPE_Q;
- }
- }
- /* figure out number of nonaccessed pda */
- napdas = PDAPerDisk * (numDataCol - asmap->numStripeUnitsAccessed - (ftwo == NULL ? 1 : 0));
- *nPQNodep = PDAPerDisk;
-
- /* sweep over the over accessed pda's, figuring out the number of
- * additional pda's to generate. Of course, skip the failed ones */
-
- count = 0;
- for (pda_p = asmap->physInfo; pda_p; pda_p = pda_p->next) {
- if ((pda_p == fone) || (pda_p == ftwo))
- continue;
- suoff = rf_StripeUnitOffset(layoutPtr, pda_p->startSector);
- suend = suoff + pda_p->numSector;
- switch (state) {
- case 1: /* one failed PDA to overlap */
- /* if a PDA doesn't contain the failed unit, it can
- * only miss the start or end, not both */
- if ((suoff > fone_start) || (suend < fone_end))
- count++;
- break;
- case 2: /* whole stripe */
- if (suoff) /* leak at begining */
- count++;
- if (suend < numDataCol) /* leak at end */
- count++;
- break;
- case 3: /* two disjoint units */
- if ((suoff > fone_start) || (suend < fone_end))
- count++;
- if ((suoff > ftwo_start) || (suend < ftwo_end))
- count++;
- break;
- default:
- RF_PANIC();
- }
- }
-
- napdas += count;
- *nNodep = napdas;
- if (napdas == 0)
- return; /* short circuit */
-
- /* allocate up our list of pda's */
-
- RF_CallocAndAdd(pda_p, napdas, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList);
- *pdap = pda_p;
-
- /* linkem together */
- for (i = 0; i < (napdas - 1); i++)
- pda_p[i].next = pda_p + (i + 1);
-
- /* march through the one's up to the first accessed disk */
- firstDataCol = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), asmap->physInfo->raidAddress) % numDataCol;
- sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
- for (i = 0; i < firstDataCol; i++) {
- if ((pda_p - (*pdap)) == napdas)
- continue;
- pda_p->type = RF_PDA_TYPE_DATA;
- pda_p->raidAddress = sosAddr + (i * secPerSU);
- (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0);
- /* skip over dead disks */
- if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status))
- continue;
- switch (state) {
- case 1: /* fone */
- pda_p->numSector = fone->numSector;
- pda_p->raidAddress += fone_start;
- pda_p->startSector += fone_start;
- RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
- break;
- case 2: /* full stripe */
- pda_p->numSector = secPerSU;
- RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, secPerSU), (char *), allocList);
- break;
- case 3: /* two slabs */
- pda_p->numSector = fone->numSector;
- pda_p->raidAddress += fone_start;
- pda_p->startSector += fone_start;
- RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
- pda_p++;
- pda_p->type = RF_PDA_TYPE_DATA;
- pda_p->raidAddress = sosAddr + (i * secPerSU);
- (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0);
- pda_p->numSector = ftwo->numSector;
- pda_p->raidAddress += ftwo_start;
- pda_p->startSector += ftwo_start;
- RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
- break;
- default:
- RF_PANIC();
- }
- pda_p++;
- }
-
- /* march through the touched stripe units */
- for (phys_p = asmap->physInfo; phys_p; phys_p = phys_p->next, i++) {
- if ((phys_p == asmap->failedPDAs[0]) || (phys_p == asmap->failedPDAs[1]))
- continue;
- suoff = rf_StripeUnitOffset(layoutPtr, phys_p->startSector);
- suend = suoff + phys_p->numSector;
- switch (state) {
- case 1: /* single buffer */
- if (suoff > fone_start) {
- RF_ASSERT(suend >= fone_end);
- /* The data read starts after the mapped
- * access, snip off the begining */
- pda_p->numSector = suoff - fone_start;
- pda_p->raidAddress = sosAddr + (i * secPerSU) + fone_start;
- (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0);
- RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
- pda_p++;
- }
- if (suend < fone_end) {
- RF_ASSERT(suoff <= fone_start);
- /* The data read stops before the end of the
- * failed access, extend */
- pda_p->numSector = fone_end - suend;
- pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */
- (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0);
- RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
- pda_p++;
- }
- break;
- case 2: /* whole stripe unit */
- RF_ASSERT((suoff == 0) || (suend == secPerSU));
- if (suend < secPerSU) { /* short read, snip from end
- * on */
- pda_p->numSector = secPerSU - suend;
- pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */
- (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0);
- RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
- pda_p++;
- } else
- if (suoff > 0) { /* short at front */
- pda_p->numSector = suoff;
- pda_p->raidAddress = sosAddr + (i * secPerSU);
- (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0);
- RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
- pda_p++;
- }
- break;
- case 3: /* two nonoverlapping failures */
- if ((suoff > fone_start) || (suend < fone_end)) {
- if (suoff > fone_start) {
- RF_ASSERT(suend >= fone_end);
- /* The data read starts after the
- * mapped access, snip off the
- * begining */
- pda_p->numSector = suoff - fone_start;
- pda_p->raidAddress = sosAddr + (i * secPerSU) + fone_start;
- (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0);
- RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
- pda_p++;
- }
- if (suend < fone_end) {
- RF_ASSERT(suoff <= fone_start);
- /* The data read stops before the end
- * of the failed access, extend */
- pda_p->numSector = fone_end - suend;
- pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */
- (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0);
- RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
- pda_p++;
- }
- }
- if ((suoff > ftwo_start) || (suend < ftwo_end)) {
- if (suoff > ftwo_start) {
- RF_ASSERT(suend >= ftwo_end);
- /* The data read starts after the
- * mapped access, snip off the
- * begining */
- pda_p->numSector = suoff - ftwo_start;
- pda_p->raidAddress = sosAddr + (i * secPerSU) + ftwo_start;
- (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0);
- RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
- pda_p++;
- }
- if (suend < ftwo_end) {
- RF_ASSERT(suoff <= ftwo_start);
- /* The data read stops before the end
- * of the failed access, extend */
- pda_p->numSector = ftwo_end - suend;
- pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */
- (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0);
- RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
- pda_p++;
- }
- }
- break;
- default:
- RF_PANIC();
- }
- }
-
- /* after the last accessed disk */
- for (; i < numDataCol; i++) {
- if ((pda_p - (*pdap)) == napdas)
- continue;
- pda_p->type = RF_PDA_TYPE_DATA;
- pda_p->raidAddress = sosAddr + (i * secPerSU);
- (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0);
- /* skip over dead disks */
- if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status))
- continue;
- switch (state) {
- case 1: /* fone */
- pda_p->numSector = fone->numSector;
- pda_p->raidAddress += fone_start;
- pda_p->startSector += fone_start;
- RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
- break;
- case 2: /* full stripe */
- pda_p->numSector = secPerSU;
- RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, secPerSU), (char *), allocList);
- break;
- case 3: /* two slabs */
- pda_p->numSector = fone->numSector;
- pda_p->raidAddress += fone_start;
- pda_p->startSector += fone_start;
- RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
- pda_p++;
- pda_p->type = RF_PDA_TYPE_DATA;
- pda_p->raidAddress = sosAddr + (i * secPerSU);
- (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0);
- pda_p->numSector = ftwo->numSector;
- pda_p->raidAddress += ftwo_start;
- pda_p->startSector += ftwo_start;
- RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
- break;
- default:
- RF_PANIC();
- }
- pda_p++;
- }
-
- RF_ASSERT(pda_p - *pdap == napdas);
- return;
-}
-#define INIT_DISK_NODE(node,name) \
-rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 2,1,4,0, dag_h, name, allocList); \
-(node)->succedents[0] = unblockNode; \
-(node)->succedents[1] = recoveryNode; \
-(node)->antecedents[0] = blockNode; \
-(node)->antType[0] = rf_control
-
-#define DISK_NODE_PARAMS(_node_,_p_) \
- (_node_).params[0].p = _p_ ; \
- (_node_).params[1].p = (_p_)->bufPtr; \
- (_node_).params[2].v = parityStripeID; \
- (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru)
-
-void
-rf_DoubleDegRead(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h,
- void *bp,
- RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList,
- char *redundantReadNodeName,
- char *recoveryNodeName,
- int (*recovFunc) (RF_DagNode_t *))
-{
- RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
- RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *recoveryNode, *blockNode,
- *unblockNode, *rpNodes, *rqNodes, *termNode;
- RF_PhysDiskAddr_t *pda, *pqPDAs;
- RF_PhysDiskAddr_t *npdas;
- int nNodes, nRrdNodes, nRudNodes, i;
- RF_ReconUnitNum_t which_ru;
- int nReadNodes, nPQNodes;
- RF_PhysDiskAddr_t *failedPDA = asmap->failedPDAs[0];
- RF_PhysDiskAddr_t *failedPDAtwo = asmap->failedPDAs[1];
- RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, &which_ru);
-
- if (rf_dagDebug)
- printf("[Creating Double Degraded Read DAG]\n");
- rf_DD_GenerateFailedAccessASMs(raidPtr, asmap, &npdas, &nRrdNodes, &pqPDAs, &nPQNodes, allocList);
-
- nRudNodes = asmap->numStripeUnitsAccessed - (asmap->numDataFailed);
- nReadNodes = nRrdNodes + nRudNodes + 2 * nPQNodes;
- nNodes = 4 /* block, unblock, recovery, term */ + nReadNodes;
-
- RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList);
- i = 0;
- blockNode = &nodes[i];
- i += 1;
- unblockNode = &nodes[i];
- i += 1;
- recoveryNode = &nodes[i];
- i += 1;
- termNode = &nodes[i];
- i += 1;
- rudNodes = &nodes[i];
- i += nRudNodes;
- rrdNodes = &nodes[i];
- i += nRrdNodes;
- rpNodes = &nodes[i];
- i += nPQNodes;
- rqNodes = &nodes[i];
- i += nPQNodes;
- RF_ASSERT(i == nNodes);
-
- dag_h->numSuccedents = 1;
- dag_h->succedents[0] = blockNode;
- dag_h->creator = "DoubleDegRead";
- dag_h->numCommits = 0;
- dag_h->numCommitNodes = 1; /* unblock */
-
- rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 2, 0, 0, dag_h, "Trm", allocList);
- termNode->antecedents[0] = unblockNode;
- termNode->antType[0] = rf_control;
- termNode->antecedents[1] = recoveryNode;
- termNode->antType[1] = rf_control;
-
- /* init the block and unblock nodes */
- /* The block node has all nodes except itself, unblock and recovery as
- * successors. Similarly for predecessors of the unblock. */
- rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nReadNodes, 0, 0, 0, dag_h, "Nil", allocList);
- rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nReadNodes, 0, 0, dag_h, "Nil", allocList);
-
- for (i = 0; i < nReadNodes; i++) {
- blockNode->succedents[i] = rudNodes + i;
- unblockNode->antecedents[i] = rudNodes + i;
- unblockNode->antType[i] = rf_control;
- }
- unblockNode->succedents[0] = termNode;
-
- /* The recovery node has all the reads as predecessors, and the term
- * node as successors. It gets a pda as a param from each of the read
- * nodes plus the raidPtr. For each failed unit is has a result pda. */
- rf_InitNode(recoveryNode, rf_wait, RF_FALSE, recovFunc, rf_NullNodeUndoFunc, NULL,
- 1, /* succesors */
- nReadNodes, /* preds */
- nReadNodes + 2, /* params */
- asmap->numDataFailed, /* results */
- dag_h, recoveryNodeName, allocList);
-
- recoveryNode->succedents[0] = termNode;
- for (i = 0; i < nReadNodes; i++) {
- recoveryNode->antecedents[i] = rudNodes + i;
- recoveryNode->antType[i] = rf_trueData;
- }
-
- /* build the read nodes, then come back and fill in recovery params
- * and results */
- pda = asmap->physInfo;
- for (i = 0; i < nRudNodes; pda = pda->next) {
- if ((pda == failedPDA) || (pda == failedPDAtwo))
- continue;
- INIT_DISK_NODE(rudNodes + i, "Rud");
- RF_ASSERT(pda);
- DISK_NODE_PARAMS(rudNodes[i], pda);
- i++;
- }
-
- pda = npdas;
- for (i = 0; i < nRrdNodes; i++, pda = pda->next) {
- INIT_DISK_NODE(rrdNodes + i, "Rrd");
- RF_ASSERT(pda);
- DISK_NODE_PARAMS(rrdNodes[i], pda);
- }
-
- /* redundancy pdas */
- pda = pqPDAs;
- INIT_DISK_NODE(rpNodes, "Rp");
- RF_ASSERT(pda);
- DISK_NODE_PARAMS(rpNodes[0], pda);
- pda++;
- INIT_DISK_NODE(rqNodes, redundantReadNodeName);
- RF_ASSERT(pda);
- DISK_NODE_PARAMS(rqNodes[0], pda);
- if (nPQNodes == 2) {
- pda++;
- INIT_DISK_NODE(rpNodes + 1, "Rp");
- RF_ASSERT(pda);
- DISK_NODE_PARAMS(rpNodes[1], pda);
- pda++;
- INIT_DISK_NODE(rqNodes + 1, redundantReadNodeName);
- RF_ASSERT(pda);
- DISK_NODE_PARAMS(rqNodes[1], pda);
- }
- /* fill in recovery node params */
- for (i = 0; i < nReadNodes; i++)
- recoveryNode->params[i] = rudNodes[i].params[0]; /* pda */
- recoveryNode->params[i++].p = (void *) raidPtr;
- recoveryNode->params[i++].p = (void *) asmap;
- recoveryNode->results[0] = failedPDA;
- if (asmap->numDataFailed == 2)
- recoveryNode->results[1] = failedPDAtwo;
-
- /* zero fill the target data buffers? */
-}
-
-#endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) */
diff --git a/sys/dev/raidframe/rf_dagdegrd.h b/sys/dev/raidframe/rf_dagdegrd.h
deleted file mode 100644
index 2e899d8..0000000
--- a/sys/dev/raidframe/rf_dagdegrd.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_dagdegrd.h,v 1.3 1999/02/05 00:06:07 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#ifndef _RF__RF_DAGDEGRD_H_
-#define _RF__RF_DAGDEGRD_H_
-
-#include <dev/raidframe/rf_types.h>
-
-/* degraded read DAG creation routines */
-void
-rf_CreateRaidFiveDegradedReadDAG(RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp,
- RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList);
-void
-rf_CreateRaidOneDegradedReadDAG(RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp,
- RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList);
-void
-rf_CreateDegradedReadDAG(RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp,
- RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList,
- RF_RedFuncs_t * recFunc);
-void
-rf_CreateRaidCDegradedReadDAG(RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp,
- RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList);
-void
-rf_DD_GenerateFailedAccessASMs(RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap, RF_PhysDiskAddr_t ** pdap,
- int *nNodep, RF_PhysDiskAddr_t ** pqpdap, int *nPQNodep,
- RF_AllocListElem_t * allocList);
-void
-rf_DoubleDegRead(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList, char *redundantReadNodeName,
- char *recoveryNodeName, int (*recovFunc) (RF_DagNode_t *));
-
-#endif /* !_RF__RF_DAGDEGRD_H_ */
diff --git a/sys/dev/raidframe/rf_dagdegwr.c b/sys/dev/raidframe/rf_dagdegwr.c
deleted file mode 100644
index 70e0db6..0000000
--- a/sys/dev/raidframe/rf_dagdegwr.c
+++ /dev/null
@@ -1,846 +0,0 @@
-/* $NetBSD: rf_dagdegwr.c,v 1.6 2001/01/26 04:05:08 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*
- * rf_dagdegwr.c
- *
- * code for creating degraded write DAGs
- *
- */
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_dagutils.h>
-#include <dev/raidframe/rf_dagfuncs.h>
-#include <dev/raidframe/rf_debugMem.h>
-#include <dev/raidframe/rf_memchunk.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_dagdegwr.h>
-
-
-/******************************************************************************
- *
- * General comments on DAG creation:
- *
- * All DAGs in this file use roll-away error recovery. Each DAG has a single
- * commit node, usually called "Cmt." If an error occurs before the Cmt node
- * is reached, the execution engine will halt forward execution and work
- * backward through the graph, executing the undo functions. Assuming that
- * each node in the graph prior to the Cmt node are undoable and atomic - or -
- * does not make changes to permanent state, the graph will fail atomically.
- * If an error occurs after the Cmt node executes, the engine will roll-forward
- * through the graph, blindly executing nodes until it reaches the end.
- * If a graph reaches the end, it is assumed to have completed successfully.
- *
- * A graph has only 1 Cmt node.
- *
- */
-
-
-/******************************************************************************
- *
- * The following wrappers map the standard DAG creation interface to the
- * DAG creation routines. Additionally, these wrappers enable experimentation
- * with new DAG structures by providing an extra level of indirection, allowing
- * the DAG creation routines to be replaced at this single point.
- */
-
-static
-RF_CREATE_DAG_FUNC_DECL(rf_CreateSimpleDegradedWriteDAG)
-{
- rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp,
- flags, allocList, 1, rf_RecoveryXorFunc, RF_TRUE);
-}
-
-void
-rf_CreateDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList)
- RF_Raid_t *raidPtr;
- RF_AccessStripeMap_t *asmap;
- RF_DagHeader_t *dag_h;
- void *bp;
- RF_RaidAccessFlags_t flags;
- RF_AllocListElem_t *allocList;
-{
- RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
- RF_PhysDiskAddr_t *failedPDA = asmap->failedPDAs[0];
-
- RF_ASSERT(asmap->numDataFailed == 1);
- dag_h->creator = "DegradedWriteDAG";
-
- /* if the access writes only a portion of the failed unit, and also
- * writes some portion of at least one surviving unit, we create two
- * DAGs, one for the failed component and one for the non-failed
- * component, and do them sequentially. Note that the fact that we're
- * accessing only a portion of the failed unit indicates that the
- * access either starts or ends in the failed unit, and hence we need
- * create only two dags. This is inefficient in that the same data or
- * parity can get read and written twice using this structure. I need
- * to fix this to do the access all at once. */
- RF_ASSERT(!(asmap->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit));
- rf_CreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList);
-}
-
-
-
-/******************************************************************************
- *
- * DAG creation code begins here
- */
-
-
-
-/******************************************************************************
- *
- * CommonCreateSimpleDegradedWriteDAG -- creates a DAG to do a degraded-mode
- * write, which is as follows
- *
- * / {Wnq} --\
- * hdr -> blockNode -> Rod -> Xor -> Cmt -> Wnp ----> unblock -> term
- * \ {Rod} / \ Wnd ---/
- * \ {Wnd} -/
- *
- * commit nodes: Xor, Wnd
- *
- * IMPORTANT:
- * This DAG generator does not work for double-degraded archs since it does not
- * generate Q
- *
- * This dag is essentially identical to the large-write dag, except that the
- * write to the failed data unit is suppressed.
- *
- * IMPORTANT: this dag does not work in the case where the access writes only
- * a portion of the failed unit, and also writes some portion of at least one
- * surviving SU. this case is handled in CreateDegradedWriteDAG above.
- *
- * The block & unblock nodes are leftovers from a previous version. They
- * do nothing, but I haven't deleted them because it would be a tremendous
- * effort to put them back in.
- *
- * This dag is used whenever a one of the data units in a write has failed.
- * If it is the parity unit that failed, the nonredundant write dag (below)
- * is used.
- *****************************************************************************/
-
-void
-rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags,
- allocList, nfaults, redFunc, allowBufferRecycle)
- RF_Raid_t *raidPtr;
- RF_AccessStripeMap_t *asmap;
- RF_DagHeader_t *dag_h;
- void *bp;
- RF_RaidAccessFlags_t flags;
- RF_AllocListElem_t *allocList;
- int nfaults;
- int (*redFunc) (RF_DagNode_t *);
- int allowBufferRecycle;
-{
- int nNodes, nRrdNodes, nWndNodes, nXorBufs, i, j, paramNum,
- rdnodesFaked;
- RF_DagNode_t *blockNode, *unblockNode, *wnpNode, *wnqNode, *termNode;
- RF_DagNode_t *nodes, *wndNodes, *rrdNodes, *xorNode, *commitNode;
- RF_SectorCount_t sectorsPerSU;
- RF_ReconUnitNum_t which_ru;
- char *xorTargetBuf = NULL; /* the target buffer for the XOR
- * operation */
- char *overlappingPDAs;/* a temporary array of flags */
- RF_AccessStripeMapHeader_t *new_asm_h[2];
- RF_PhysDiskAddr_t *pda, *parityPDA;
- RF_StripeNum_t parityStripeID;
- RF_PhysDiskAddr_t *failedPDA;
- RF_RaidLayout_t *layoutPtr;
-
- layoutPtr = &(raidPtr->Layout);
- parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress,
- &which_ru);
- sectorsPerSU = layoutPtr->sectorsPerStripeUnit;
- /* failedPDA points to the pda within the asm that targets the failed
- * disk */
- failedPDA = asmap->failedPDAs[0];
-
- if (rf_dagDebug)
- printf("[Creating degraded-write DAG]\n");
-
- RF_ASSERT(asmap->numDataFailed == 1);
- dag_h->creator = "SimpleDegradedWriteDAG";
-
- /*
- * Generate two ASMs identifying the surviving data
- * we need in order to recover the lost data.
- */
- /* overlappingPDAs array must be zero'd */
- RF_Calloc(overlappingPDAs, asmap->numStripeUnitsAccessed, sizeof(char), (char *));
- rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h, new_asm_h,
- &nXorBufs, NULL, overlappingPDAs, allocList);
-
- /* create all the nodes at once */
- nWndNodes = asmap->numStripeUnitsAccessed - 1; /* no access is
- * generated for the
- * failed pda */
-
- nRrdNodes = ((new_asm_h[0]) ? new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) +
- ((new_asm_h[1]) ? new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0);
- /*
- * XXX
- *
- * There's a bug with a complete stripe overwrite- that means 0 reads
- * of old data, and the rest of the DAG generation code doesn't like
- * that. A release is coming, and I don't wanna risk breaking a critical
- * DAG generator, so here's what I'm gonna do- if there's no read nodes,
- * I'm gonna fake there being a read node, and I'm gonna swap in a
- * no-op node in its place (to make all the link-up code happy).
- * This should be fixed at some point. --jimz
- */
- if (nRrdNodes == 0) {
- nRrdNodes = 1;
- rdnodesFaked = 1;
- } else {
- rdnodesFaked = 0;
- }
- /* lock, unlock, xor, Wnd, Rrd, W(nfaults) */
- nNodes = 5 + nfaults + nWndNodes + nRrdNodes;
- RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t),
- (RF_DagNode_t *), allocList);
- i = 0;
- blockNode = &nodes[i];
- i += 1;
- commitNode = &nodes[i];
- i += 1;
- unblockNode = &nodes[i];
- i += 1;
- termNode = &nodes[i];
- i += 1;
- xorNode = &nodes[i];
- i += 1;
- wnpNode = &nodes[i];
- i += 1;
- wndNodes = &nodes[i];
- i += nWndNodes;
- rrdNodes = &nodes[i];
- i += nRrdNodes;
- if (nfaults == 2) {
- wnqNode = &nodes[i];
- i += 1;
- } else {
- wnqNode = NULL;
- }
- RF_ASSERT(i == nNodes);
-
- /* this dag can not commit until all rrd and xor Nodes have completed */
- dag_h->numCommitNodes = 1;
- dag_h->numCommits = 0;
- dag_h->numSuccedents = 1;
-
- RF_ASSERT(nRrdNodes > 0);
- rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
- NULL, nRrdNodes, 0, 0, 0, dag_h, "Nil", allocList);
- rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
- NULL, nWndNodes + nfaults, 1, 0, 0, dag_h, "Cmt", allocList);
- rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
- NULL, 1, nWndNodes + nfaults, 0, 0, dag_h, "Nil", allocList);
- rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc,
- NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
- rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1,
- nRrdNodes, 2 * nXorBufs + 2, nfaults, dag_h, "Xrc", allocList);
-
- /*
- * Fill in the Rrd nodes. If any of the rrd buffers are the same size as
- * the failed buffer, save a pointer to it so we can use it as the target
- * of the XOR. The pdas in the rrd nodes have been range-restricted, so if
- * a buffer is the same size as the failed buffer, it must also be at the
- * same alignment within the SU.
- */
- i = 0;
- if (new_asm_h[0]) {
- for (i = 0, pda = new_asm_h[0]->stripeMap->physInfo;
- i < new_asm_h[0]->stripeMap->numStripeUnitsAccessed;
- i++, pda = pda->next) {
- rf_InitNode(&rrdNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc,
- rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rrd", allocList);
- RF_ASSERT(pda);
- rrdNodes[i].params[0].p = pda;
- rrdNodes[i].params[1].p = pda->bufPtr;
- rrdNodes[i].params[2].v = parityStripeID;
- rrdNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- }
- }
- /* i now equals the number of stripe units accessed in new_asm_h[0] */
- if (new_asm_h[1]) {
- for (j = 0, pda = new_asm_h[1]->stripeMap->physInfo;
- j < new_asm_h[1]->stripeMap->numStripeUnitsAccessed;
- j++, pda = pda->next) {
- rf_InitNode(&rrdNodes[i + j], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc,
- rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rrd", allocList);
- RF_ASSERT(pda);
- rrdNodes[i + j].params[0].p = pda;
- rrdNodes[i + j].params[1].p = pda->bufPtr;
- rrdNodes[i + j].params[2].v = parityStripeID;
- rrdNodes[i + j].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- if (allowBufferRecycle && (pda->numSector == failedPDA->numSector))
- xorTargetBuf = pda->bufPtr;
- }
- }
- if (rdnodesFaked) {
- /*
- * This is where we'll init that fake noop read node
- * (XXX should the wakeup func be different?)
- */
- rf_InitNode(&rrdNodes[0], rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
- NULL, 1, 1, 0, 0, dag_h, "RrN", allocList);
- }
- /*
- * Make a PDA for the parity unit. The parity PDA should start at
- * the same offset into the SU as the failed PDA.
- */
- /* Danner comment: I don't think this copy is really necessary. We are
- * in one of two cases here. (1) The entire failed unit is written.
- * Then asmap->parityInfo will describe the entire parity. (2) We are
- * only writing a subset of the failed unit and nothing else. Then the
- * asmap->parityInfo describes the failed unit and the copy can also
- * be avoided. */
-
- RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList);
- parityPDA->row = asmap->parityInfo->row;
- parityPDA->col = asmap->parityInfo->col;
- parityPDA->startSector = ((asmap->parityInfo->startSector / sectorsPerSU)
- * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU);
- parityPDA->numSector = failedPDA->numSector;
-
- if (!xorTargetBuf) {
- RF_CallocAndAdd(xorTargetBuf, 1,
- rf_RaidAddressToByte(raidPtr, failedPDA->numSector), (char *), allocList);
- }
- /* init the Wnp node */
- rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
- rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnp", allocList);
- wnpNode->params[0].p = parityPDA;
- wnpNode->params[1].p = xorTargetBuf;
- wnpNode->params[2].v = parityStripeID;
- wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
-
- /* fill in the Wnq Node */
- if (nfaults == 2) {
- {
- RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t),
- (RF_PhysDiskAddr_t *), allocList);
- parityPDA->row = asmap->qInfo->row;
- parityPDA->col = asmap->qInfo->col;
- parityPDA->startSector = ((asmap->qInfo->startSector / sectorsPerSU)
- * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU);
- parityPDA->numSector = failedPDA->numSector;
-
- rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
- rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnq", allocList);
- wnqNode->params[0].p = parityPDA;
- RF_CallocAndAdd(xorNode->results[1], 1,
- rf_RaidAddressToByte(raidPtr, failedPDA->numSector), (char *), allocList);
- wnqNode->params[1].p = xorNode->results[1];
- wnqNode->params[2].v = parityStripeID;
- wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- }
- }
- /* fill in the Wnd nodes */
- for (pda = asmap->physInfo, i = 0; i < nWndNodes; i++, pda = pda->next) {
- if (pda == failedPDA) {
- i--;
- continue;
- }
- rf_InitNode(&wndNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
- rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList);
- RF_ASSERT(pda);
- wndNodes[i].params[0].p = pda;
- wndNodes[i].params[1].p = pda->bufPtr;
- wndNodes[i].params[2].v = parityStripeID;
- wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- }
-
- /* fill in the results of the xor node */
- xorNode->results[0] = xorTargetBuf;
-
- /* fill in the params of the xor node */
-
- paramNum = 0;
- if (rdnodesFaked == 0) {
- for (i = 0; i < nRrdNodes; i++) {
- /* all the Rrd nodes need to be xored together */
- xorNode->params[paramNum++] = rrdNodes[i].params[0];
- xorNode->params[paramNum++] = rrdNodes[i].params[1];
- }
- }
- for (i = 0; i < nWndNodes; i++) {
- /* any Wnd nodes that overlap the failed access need to be
- * xored in */
- if (overlappingPDAs[i]) {
- RF_MallocAndAdd(pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList);
- bcopy((char *) wndNodes[i].params[0].p, (char *) pda, sizeof(RF_PhysDiskAddr_t));
- rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_DOBUFFER, 0);
- xorNode->params[paramNum++].p = pda;
- xorNode->params[paramNum++].p = pda->bufPtr;
- }
- }
- RF_Free(overlappingPDAs, asmap->numStripeUnitsAccessed * sizeof(char));
-
- /*
- * Install the failed PDA into the xor param list so that the
- * new data gets xor'd in.
- */
- xorNode->params[paramNum++].p = failedPDA;
- xorNode->params[paramNum++].p = failedPDA->bufPtr;
-
- /*
- * The last 2 params to the recovery xor node are always the failed
- * PDA and the raidPtr. install the failedPDA even though we have just
- * done so above. This allows us to use the same XOR function for both
- * degraded reads and degraded writes.
- */
- xorNode->params[paramNum++].p = failedPDA;
- xorNode->params[paramNum++].p = raidPtr;
- RF_ASSERT(paramNum == 2 * nXorBufs + 2);
-
- /*
- * Code to link nodes begins here
- */
-
- /* link header to block node */
- RF_ASSERT(blockNode->numAntecedents == 0);
- dag_h->succedents[0] = blockNode;
-
- /* link block node to rd nodes */
- RF_ASSERT(blockNode->numSuccedents == nRrdNodes);
- for (i = 0; i < nRrdNodes; i++) {
- RF_ASSERT(rrdNodes[i].numAntecedents == 1);
- blockNode->succedents[i] = &rrdNodes[i];
- rrdNodes[i].antecedents[0] = blockNode;
- rrdNodes[i].antType[0] = rf_control;
- }
-
- /* link read nodes to xor node */
- RF_ASSERT(xorNode->numAntecedents == nRrdNodes);
- for (i = 0; i < nRrdNodes; i++) {
- RF_ASSERT(rrdNodes[i].numSuccedents == 1);
- rrdNodes[i].succedents[0] = xorNode;
- xorNode->antecedents[i] = &rrdNodes[i];
- xorNode->antType[i] = rf_trueData;
- }
-
- /* link xor node to commit node */
- RF_ASSERT(xorNode->numSuccedents == 1);
- RF_ASSERT(commitNode->numAntecedents == 1);
- xorNode->succedents[0] = commitNode;
- commitNode->antecedents[0] = xorNode;
- commitNode->antType[0] = rf_control;
-
- /* link commit node to wnd nodes */
- RF_ASSERT(commitNode->numSuccedents == nfaults + nWndNodes);
- for (i = 0; i < nWndNodes; i++) {
- RF_ASSERT(wndNodes[i].numAntecedents == 1);
- commitNode->succedents[i] = &wndNodes[i];
- wndNodes[i].antecedents[0] = commitNode;
- wndNodes[i].antType[0] = rf_control;
- }
-
- /* link the commit node to wnp, wnq nodes */
- RF_ASSERT(wnpNode->numAntecedents == 1);
- commitNode->succedents[nWndNodes] = wnpNode;
- wnpNode->antecedents[0] = commitNode;
- wnpNode->antType[0] = rf_control;
- if (nfaults == 2) {
- RF_ASSERT(wnqNode->numAntecedents == 1);
- commitNode->succedents[nWndNodes + 1] = wnqNode;
- wnqNode->antecedents[0] = commitNode;
- wnqNode->antType[0] = rf_control;
- }
- /* link write new data nodes to unblock node */
- RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nfaults));
- for (i = 0; i < nWndNodes; i++) {
- RF_ASSERT(wndNodes[i].numSuccedents == 1);
- wndNodes[i].succedents[0] = unblockNode;
- unblockNode->antecedents[i] = &wndNodes[i];
- unblockNode->antType[i] = rf_control;
- }
-
- /* link write new parity node to unblock node */
- RF_ASSERT(wnpNode->numSuccedents == 1);
- wnpNode->succedents[0] = unblockNode;
- unblockNode->antecedents[nWndNodes] = wnpNode;
- unblockNode->antType[nWndNodes] = rf_control;
-
- /* link write new q node to unblock node */
- if (nfaults == 2) {
- RF_ASSERT(wnqNode->numSuccedents == 1);
- wnqNode->succedents[0] = unblockNode;
- unblockNode->antecedents[nWndNodes + 1] = wnqNode;
- unblockNode->antType[nWndNodes + 1] = rf_control;
- }
- /* link unblock node to term node */
- RF_ASSERT(unblockNode->numSuccedents == 1);
- RF_ASSERT(termNode->numAntecedents == 1);
- RF_ASSERT(termNode->numSuccedents == 0);
- unblockNode->succedents[0] = termNode;
- termNode->antecedents[0] = unblockNode;
- termNode->antType[0] = rf_control;
-}
-#define CONS_PDA(if,start,num) \
- pda_p->row = asmap->if->row; pda_p->col = asmap->if->col; \
- pda_p->startSector = ((asmap->if->startSector / secPerSU) * secPerSU) + start; \
- pda_p->numSector = num; \
- pda_p->next = NULL; \
- RF_MallocAndAdd(pda_p->bufPtr,rf_RaidAddressToByte(raidPtr,num),(char *), allocList)
-#if (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_EVENODD > 0)
-void
-rf_WriteGenerateFailedAccessASMs(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_PhysDiskAddr_t ** pdap,
- int *nNodep,
- RF_PhysDiskAddr_t ** pqpdap,
- int *nPQNodep,
- RF_AllocListElem_t * allocList)
-{
- RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
- int PDAPerDisk, i;
- RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
- int numDataCol = layoutPtr->numDataCol;
- int state;
- unsigned napdas;
- RF_SectorNum_t fone_start, fone_end, ftwo_start = 0, ftwo_end;
- RF_PhysDiskAddr_t *fone = asmap->failedPDAs[0], *ftwo = asmap->failedPDAs[1];
- RF_PhysDiskAddr_t *pda_p;
- RF_RaidAddr_t sosAddr;
-
- /* determine how many pda's we will have to generate per unaccess
- * stripe. If there is only one failed data unit, it is one; if two,
- * possibly two, depending wether they overlap. */
-
- fone_start = rf_StripeUnitOffset(layoutPtr, fone->startSector);
- fone_end = fone_start + fone->numSector;
-
- if (asmap->numDataFailed == 1) {
- PDAPerDisk = 1;
- state = 1;
- RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList);
- pda_p = *pqpdap;
- /* build p */
- CONS_PDA(parityInfo, fone_start, fone->numSector);
- pda_p->type = RF_PDA_TYPE_PARITY;
- pda_p++;
- /* build q */
- CONS_PDA(qInfo, fone_start, fone->numSector);
- pda_p->type = RF_PDA_TYPE_Q;
- } else {
- ftwo_start = rf_StripeUnitOffset(layoutPtr, ftwo->startSector);
- ftwo_end = ftwo_start + ftwo->numSector;
- if (fone->numSector + ftwo->numSector > secPerSU) {
- PDAPerDisk = 1;
- state = 2;
- RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList);
- pda_p = *pqpdap;
- CONS_PDA(parityInfo, 0, secPerSU);
- pda_p->type = RF_PDA_TYPE_PARITY;
- pda_p++;
- CONS_PDA(qInfo, 0, secPerSU);
- pda_p->type = RF_PDA_TYPE_Q;
- } else {
- PDAPerDisk = 2;
- state = 3;
- /* four of them, fone, then ftwo */
- RF_MallocAndAdd(*pqpdap, 4 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList);
- pda_p = *pqpdap;
- CONS_PDA(parityInfo, fone_start, fone->numSector);
- pda_p->type = RF_PDA_TYPE_PARITY;
- pda_p++;
- CONS_PDA(qInfo, fone_start, fone->numSector);
- pda_p->type = RF_PDA_TYPE_Q;
- pda_p++;
- CONS_PDA(parityInfo, ftwo_start, ftwo->numSector);
- pda_p->type = RF_PDA_TYPE_PARITY;
- pda_p++;
- CONS_PDA(qInfo, ftwo_start, ftwo->numSector);
- pda_p->type = RF_PDA_TYPE_Q;
- }
- }
- /* figure out number of nonaccessed pda */
- napdas = PDAPerDisk * (numDataCol - 2);
- *nPQNodep = PDAPerDisk;
-
- *nNodep = napdas;
- if (napdas == 0)
- return; /* short circuit */
-
- /* allocate up our list of pda's */
-
- RF_CallocAndAdd(pda_p, napdas, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList);
- *pdap = pda_p;
-
- /* linkem together */
- for (i = 0; i < (napdas - 1); i++)
- pda_p[i].next = pda_p + (i + 1);
-
- sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
- for (i = 0; i < numDataCol; i++) {
- if ((pda_p - (*pdap)) == napdas)
- continue;
- pda_p->type = RF_PDA_TYPE_DATA;
- pda_p->raidAddress = sosAddr + (i * secPerSU);
- (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0);
- /* skip over dead disks */
- if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status))
- continue;
- switch (state) {
- case 1: /* fone */
- pda_p->numSector = fone->numSector;
- pda_p->raidAddress += fone_start;
- pda_p->startSector += fone_start;
- RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
- break;
- case 2: /* full stripe */
- pda_p->numSector = secPerSU;
- RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, secPerSU), (char *), allocList);
- break;
- case 3: /* two slabs */
- pda_p->numSector = fone->numSector;
- pda_p->raidAddress += fone_start;
- pda_p->startSector += fone_start;
- RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
- pda_p++;
- pda_p->type = RF_PDA_TYPE_DATA;
- pda_p->raidAddress = sosAddr + (i * secPerSU);
- (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0);
- pda_p->numSector = ftwo->numSector;
- pda_p->raidAddress += ftwo_start;
- pda_p->startSector += ftwo_start;
- RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
- break;
- default:
- RF_PANIC();
- }
- pda_p++;
- }
-
- RF_ASSERT(pda_p - *pdap == napdas);
- return;
-}
-#define DISK_NODE_PDA(node) ((node)->params[0].p)
-
-#define DISK_NODE_PARAMS(_node_,_p_) \
- (_node_).params[0].p = _p_ ; \
- (_node_).params[1].p = (_p_)->bufPtr; \
- (_node_).params[2].v = parityStripeID; \
- (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru)
-
-void
-rf_DoubleDegSmallWrite(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h,
- void *bp,
- RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList,
- char *redundantReadNodeName,
- char *redundantWriteNodeName,
- char *recoveryNodeName,
- int (*recovFunc) (RF_DagNode_t *))
-{
- RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
- RF_DagNode_t *nodes, *wudNodes, *rrdNodes, *recoveryNode, *blockNode,
- *unblockNode, *rpNodes, *rqNodes, *wpNodes, *wqNodes, *termNode;
- RF_PhysDiskAddr_t *pda, *pqPDAs;
- RF_PhysDiskAddr_t *npdas;
- int nWriteNodes, nNodes, nReadNodes, nRrdNodes, nWudNodes, i;
- RF_ReconUnitNum_t which_ru;
- int nPQNodes;
- RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, &which_ru);
-
- /* simple small write case - First part looks like a reconstruct-read
- * of the failed data units. Then a write of all data units not
- * failed. */
-
-
- /* Hdr | ------Block- / / \ Rrd Rrd ... Rrd Rp Rq \ \
- * / -------PQ----- / \ \ Wud Wp WQ \ | /
- * --Unblock- | T
- *
- * Rrd = read recovery data (potentially none) Wud = write user data
- * (not incl. failed disks) Wp = Write P (could be two) Wq = Write Q
- * (could be two)
- *
- */
-
- rf_WriteGenerateFailedAccessASMs(raidPtr, asmap, &npdas, &nRrdNodes, &pqPDAs, &nPQNodes, allocList);
-
- RF_ASSERT(asmap->numDataFailed == 1);
-
- nWudNodes = asmap->numStripeUnitsAccessed - (asmap->numDataFailed);
- nReadNodes = nRrdNodes + 2 * nPQNodes;
- nWriteNodes = nWudNodes + 2 * nPQNodes;
- nNodes = 4 + nReadNodes + nWriteNodes;
-
- RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList);
- blockNode = nodes;
- unblockNode = blockNode + 1;
- termNode = unblockNode + 1;
- recoveryNode = termNode + 1;
- rrdNodes = recoveryNode + 1;
- rpNodes = rrdNodes + nRrdNodes;
- rqNodes = rpNodes + nPQNodes;
- wudNodes = rqNodes + nPQNodes;
- wpNodes = wudNodes + nWudNodes;
- wqNodes = wpNodes + nPQNodes;
-
- dag_h->creator = "PQ_DDSimpleSmallWrite";
- dag_h->numSuccedents = 1;
- dag_h->succedents[0] = blockNode;
- rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
- termNode->antecedents[0] = unblockNode;
- termNode->antType[0] = rf_control;
-
- /* init the block and unblock nodes */
- /* The block node has all the read nodes as successors */
- rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nReadNodes, 0, 0, 0, dag_h, "Nil", allocList);
- for (i = 0; i < nReadNodes; i++)
- blockNode->succedents[i] = rrdNodes + i;
-
- /* The unblock node has all the writes as successors */
- rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nWriteNodes, 0, 0, dag_h, "Nil", allocList);
- for (i = 0; i < nWriteNodes; i++) {
- unblockNode->antecedents[i] = wudNodes + i;
- unblockNode->antType[i] = rf_control;
- }
- unblockNode->succedents[0] = termNode;
-
-#define INIT_READ_NODE(node,name) \
- rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, allocList); \
- (node)->succedents[0] = recoveryNode; \
- (node)->antecedents[0] = blockNode; \
- (node)->antType[0] = rf_control;
-
- /* build the read nodes */
- pda = npdas;
- for (i = 0; i < nRrdNodes; i++, pda = pda->next) {
- INIT_READ_NODE(rrdNodes + i, "rrd");
- DISK_NODE_PARAMS(rrdNodes[i], pda);
- }
-
- /* read redundancy pdas */
- pda = pqPDAs;
- INIT_READ_NODE(rpNodes, "Rp");
- RF_ASSERT(pda);
- DISK_NODE_PARAMS(rpNodes[0], pda);
- pda++;
- INIT_READ_NODE(rqNodes, redundantReadNodeName);
- RF_ASSERT(pda);
- DISK_NODE_PARAMS(rqNodes[0], pda);
- if (nPQNodes == 2) {
- pda++;
- INIT_READ_NODE(rpNodes + 1, "Rp");
- RF_ASSERT(pda);
- DISK_NODE_PARAMS(rpNodes[1], pda);
- pda++;
- INIT_READ_NODE(rqNodes + 1, redundantReadNodeName);
- RF_ASSERT(pda);
- DISK_NODE_PARAMS(rqNodes[1], pda);
- }
- /* the recovery node has all reads as precedessors and all writes as
- * successors. It generates a result for every write P or write Q
- * node. As parameters, it takes a pda per read and a pda per stripe
- * of user data written. It also takes as the last params the raidPtr
- * and asm. For results, it takes PDA for P & Q. */
-
-
- rf_InitNode(recoveryNode, rf_wait, RF_FALSE, recovFunc, rf_NullNodeUndoFunc, NULL,
- nWriteNodes, /* succesors */
- nReadNodes, /* preds */
- nReadNodes + nWudNodes + 3, /* params */
- 2 * nPQNodes, /* results */
- dag_h, recoveryNodeName, allocList);
-
-
-
- for (i = 0; i < nReadNodes; i++) {
- recoveryNode->antecedents[i] = rrdNodes + i;
- recoveryNode->antType[i] = rf_control;
- recoveryNode->params[i].p = DISK_NODE_PDA(rrdNodes + i);
- }
- for (i = 0; i < nWudNodes; i++) {
- recoveryNode->succedents[i] = wudNodes + i;
- }
- recoveryNode->params[nReadNodes + nWudNodes].p = asmap->failedPDAs[0];
- recoveryNode->params[nReadNodes + nWudNodes + 1].p = raidPtr;
- recoveryNode->params[nReadNodes + nWudNodes + 2].p = asmap;
-
- for (; i < nWriteNodes; i++)
- recoveryNode->succedents[i] = wudNodes + i;
-
- pda = pqPDAs;
- recoveryNode->results[0] = pda;
- pda++;
- recoveryNode->results[1] = pda;
- if (nPQNodes == 2) {
- pda++;
- recoveryNode->results[2] = pda;
- pda++;
- recoveryNode->results[3] = pda;
- }
- /* fill writes */
-#define INIT_WRITE_NODE(node,name) \
- rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, allocList); \
- (node)->succedents[0] = unblockNode; \
- (node)->antecedents[0] = recoveryNode; \
- (node)->antType[0] = rf_control;
-
- pda = asmap->physInfo;
- for (i = 0; i < nWudNodes; i++) {
- INIT_WRITE_NODE(wudNodes + i, "Wd");
- DISK_NODE_PARAMS(wudNodes[i], pda);
- recoveryNode->params[nReadNodes + i].p = DISK_NODE_PDA(wudNodes + i);
- pda = pda->next;
- }
- /* write redundancy pdas */
- pda = pqPDAs;
- INIT_WRITE_NODE(wpNodes, "Wp");
- RF_ASSERT(pda);
- DISK_NODE_PARAMS(wpNodes[0], pda);
- pda++;
- INIT_WRITE_NODE(wqNodes, "Wq");
- RF_ASSERT(pda);
- DISK_NODE_PARAMS(wqNodes[0], pda);
- if (nPQNodes == 2) {
- pda++;
- INIT_WRITE_NODE(wpNodes + 1, "Wp");
- RF_ASSERT(pda);
- DISK_NODE_PARAMS(wpNodes[1], pda);
- pda++;
- INIT_WRITE_NODE(wqNodes + 1, "Wq");
- RF_ASSERT(pda);
- DISK_NODE_PARAMS(wqNodes[1], pda);
- }
-}
-#endif /* (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_EVENODD > 0) */
diff --git a/sys/dev/raidframe/rf_dagdegwr.h b/sys/dev/raidframe/rf_dagdegwr.h
deleted file mode 100644
index 1e4b5e2..0000000
--- a/sys/dev/raidframe/rf_dagdegwr.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_dagdegwr.h,v 1.4 1999/08/15 02:36:03 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-
-#ifndef _RF__RF_DAGDEGWR_H_
-#define _RF__RF_DAGDEGWR_H_
-
-/* degraded write DAG creation routines */
-void rf_CreateDegradedWriteDAG(RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp,
- RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList);
-
-void rf_CommonCreateSimpleDegradedWriteDAG(RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp,
- RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList,
- int nfaults, int (*redFunc) (RF_DagNode_t *), int allowBufferRecycle);
-
-void rf_WriteGenerateFailedAccessASMs(RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap, RF_PhysDiskAddr_t ** pdap,
- int *nNodep, RF_PhysDiskAddr_t ** pqpdap,
- int *nPQNodep, RF_AllocListElem_t * allocList);
-
-void rf_DoubleDegSmallWrite(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList, char *redundantReadNodeName,
- char *redundantWriteNodeName, char *recoveryNodeName,
- int (*recovFunc) (RF_DagNode_t *));
-
-#endif /* !_RF__RF_DAGDEGWR_H_ */
diff --git a/sys/dev/raidframe/rf_dagffrd.c b/sys/dev/raidframe/rf_dagffrd.c
deleted file mode 100644
index 13c0af7..0000000
--- a/sys/dev/raidframe/rf_dagffrd.c
+++ /dev/null
@@ -1,441 +0,0 @@
-/* $NetBSD: rf_dagffrd.c,v 1.4 2000/01/07 03:40:58 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*
- * rf_dagffrd.c
- *
- * code for creating fault-free read DAGs
- *
- */
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_dagutils.h>
-#include <dev/raidframe/rf_dagfuncs.h>
-#include <dev/raidframe/rf_debugMem.h>
-#include <dev/raidframe/rf_memchunk.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_dagffrd.h>
-
-/******************************************************************************
- *
- * General comments on DAG creation:
- *
- * All DAGs in this file use roll-away error recovery. Each DAG has a single
- * commit node, usually called "Cmt." If an error occurs before the Cmt node
- * is reached, the execution engine will halt forward execution and work
- * backward through the graph, executing the undo functions. Assuming that
- * each node in the graph prior to the Cmt node are undoable and atomic - or -
- * does not make changes to permanent state, the graph will fail atomically.
- * If an error occurs after the Cmt node executes, the engine will roll-forward
- * through the graph, blindly executing nodes until it reaches the end.
- * If a graph reaches the end, it is assumed to have completed successfully.
- *
- * A graph has only 1 Cmt node.
- *
- */
-
-
-/******************************************************************************
- *
- * The following wrappers map the standard DAG creation interface to the
- * DAG creation routines. Additionally, these wrappers enable experimentation
- * with new DAG structures by providing an extra level of indirection, allowing
- * the DAG creation routines to be replaced at this single point.
- */
-
-void
-rf_CreateFaultFreeReadDAG(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h,
- void *bp,
- RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList)
-{
- rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
- RF_IO_TYPE_READ);
-}
-
-
-/******************************************************************************
- *
- * DAG creation code begins here
- */
-
-/******************************************************************************
- *
- * creates a DAG to perform a nonredundant read or write of data within one
- * stripe.
- * For reads, this DAG is as follows:
- *
- * /---- read ----\
- * Header -- Block ---- read ---- Commit -- Terminate
- * \---- read ----/
- *
- * For writes, this DAG is as follows:
- *
- * /---- write ----\
- * Header -- Commit ---- write ---- Block -- Terminate
- * \---- write ----/
- *
- * There is one disk node per stripe unit accessed, and all disk nodes are in
- * parallel.
- *
- * Tricky point here: The first disk node (read or write) is created
- * normally. Subsequent disk nodes are created by copying the first one,
- * and modifying a few params. The "succedents" and "antecedents" fields are
- * _not_ re-created in each node, but rather left pointing to the same array
- * that was malloc'd when the first node was created. Thus, it's essential
- * that when this DAG is freed, the succedents and antecedents fields be freed
- * in ONLY ONE of the read nodes. This does not apply to the "params" field
- * because it is recreated for each READ node.
- *
- * Note that normal-priority accesses do not need to be tagged with their
- * parity stripe ID, because they will never be promoted. Hence, I've
- * commented-out the code to do this, and marked it with UNNEEDED.
- *
- *****************************************************************************/
-
-void
-rf_CreateNonredundantDAG(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h,
- void *bp,
- RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList,
- RF_IoType_t type)
-{
- RF_DagNode_t *nodes, *diskNodes, *blockNode, *commitNode, *termNode;
- RF_PhysDiskAddr_t *pda = asmap->physInfo;
- int (*doFunc) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *);
- int i, n, totalNumNodes;
- char *name;
-
- n = asmap->numStripeUnitsAccessed;
- dag_h->creator = "NonredundantDAG";
-
- RF_ASSERT(RF_IO_IS_R_OR_W(type));
- switch (type) {
- case RF_IO_TYPE_READ:
- doFunc = rf_DiskReadFunc;
- undoFunc = rf_DiskReadUndoFunc;
- name = "R ";
- if (rf_dagDebug)
- printf("[Creating non-redundant read DAG]\n");
- break;
- case RF_IO_TYPE_WRITE:
- doFunc = rf_DiskWriteFunc;
- undoFunc = rf_DiskWriteUndoFunc;
- name = "W ";
- if (rf_dagDebug)
- printf("[Creating non-redundant write DAG]\n");
- break;
- default:
- RF_PANIC();
- }
-
- /*
- * For reads, the dag can not commit until the block node is reached.
- * for writes, the dag commits immediately.
- */
- dag_h->numCommitNodes = 1;
- dag_h->numCommits = 0;
- dag_h->numSuccedents = 1;
-
- /*
- * Node count:
- * 1 block node
- * n data reads (or writes)
- * 1 commit node
- * 1 terminator node
- */
- RF_ASSERT(n > 0);
- totalNumNodes = n + 3;
- RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t),
- (RF_DagNode_t *), allocList);
- i = 0;
- diskNodes = &nodes[i];
- i += n;
- blockNode = &nodes[i];
- i += 1;
- commitNode = &nodes[i];
- i += 1;
- termNode = &nodes[i];
- i += 1;
- RF_ASSERT(i == totalNumNodes);
-
- /* initialize nodes */
- switch (type) {
- case RF_IO_TYPE_READ:
- rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
- NULL, n, 0, 0, 0, dag_h, "Nil", allocList);
- rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
- NULL, 1, n, 0, 0, dag_h, "Cmt", allocList);
- rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc,
- NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
- break;
- case RF_IO_TYPE_WRITE:
- rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
- NULL, 1, 0, 0, 0, dag_h, "Nil", allocList);
- rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
- NULL, n, 1, 0, 0, dag_h, "Cmt", allocList);
- rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc,
- NULL, 0, n, 0, 0, dag_h, "Trm", allocList);
- break;
- default:
- RF_PANIC();
- }
-
- for (i = 0; i < n; i++) {
- RF_ASSERT(pda != NULL);
- rf_InitNode(&diskNodes[i], rf_wait, RF_FALSE, doFunc, undoFunc, rf_GenericWakeupFunc,
- 1, 1, 4, 0, dag_h, name, allocList);
- diskNodes[i].params[0].p = pda;
- diskNodes[i].params[1].p = pda->bufPtr;
- /* parity stripe id is not necessary */
- diskNodes[i].params[2].v = 0;
- diskNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
- pda = pda->next;
- }
-
- /*
- * Connect nodes.
- */
-
- /* connect hdr to block node */
- RF_ASSERT(blockNode->numAntecedents == 0);
- dag_h->succedents[0] = blockNode;
-
- if (type == RF_IO_TYPE_READ) {
- /* connecting a nonredundant read DAG */
- RF_ASSERT(blockNode->numSuccedents == n);
- RF_ASSERT(commitNode->numAntecedents == n);
- for (i = 0; i < n; i++) {
- /* connect block node to each read node */
- RF_ASSERT(diskNodes[i].numAntecedents == 1);
- blockNode->succedents[i] = &diskNodes[i];
- diskNodes[i].antecedents[0] = blockNode;
- diskNodes[i].antType[0] = rf_control;
-
- /* connect each read node to the commit node */
- RF_ASSERT(diskNodes[i].numSuccedents == 1);
- diskNodes[i].succedents[0] = commitNode;
- commitNode->antecedents[i] = &diskNodes[i];
- commitNode->antType[i] = rf_control;
- }
- /* connect the commit node to the term node */
- RF_ASSERT(commitNode->numSuccedents == 1);
- RF_ASSERT(termNode->numAntecedents == 1);
- RF_ASSERT(termNode->numSuccedents == 0);
- commitNode->succedents[0] = termNode;
- termNode->antecedents[0] = commitNode;
- termNode->antType[0] = rf_control;
- } else {
- /* connecting a nonredundant write DAG */
- /* connect the block node to the commit node */
- RF_ASSERT(blockNode->numSuccedents == 1);
- RF_ASSERT(commitNode->numAntecedents == 1);
- blockNode->succedents[0] = commitNode;
- commitNode->antecedents[0] = blockNode;
- commitNode->antType[0] = rf_control;
-
- RF_ASSERT(commitNode->numSuccedents == n);
- RF_ASSERT(termNode->numAntecedents == n);
- RF_ASSERT(termNode->numSuccedents == 0);
- for (i = 0; i < n; i++) {
- /* connect the commit node to each write node */
- RF_ASSERT(diskNodes[i].numAntecedents == 1);
- commitNode->succedents[i] = &diskNodes[i];
- diskNodes[i].antecedents[0] = commitNode;
- diskNodes[i].antType[0] = rf_control;
-
- /* connect each write node to the term node */
- RF_ASSERT(diskNodes[i].numSuccedents == 1);
- diskNodes[i].succedents[0] = termNode;
- termNode->antecedents[i] = &diskNodes[i];
- termNode->antType[i] = rf_control;
- }
- }
-}
-/******************************************************************************
- * Create a fault-free read DAG for RAID level 1
- *
- * Hdr -> Nil -> Rmir -> Cmt -> Trm
- *
- * The "Rmir" node schedules a read from the disk in the mirror pair with the
- * shortest disk queue. the proper queue is selected at Rmir execution. this
- * deferred mapping is unlike other archs in RAIDframe which generally fix
- * mapping at DAG creation time.
- *
- * Parameters: raidPtr - description of the physical array
- * asmap - logical & physical addresses for this access
- * bp - buffer ptr (for holding read data)
- * flags - general flags (e.g. disk locking)
- * allocList - list of memory allocated in DAG creation
- *****************************************************************************/
-
-static void
-CreateMirrorReadDAG(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h,
- void *bp,
- RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList,
- int (*readfunc) (RF_DagNode_t * node))
-{
- RF_DagNode_t *readNodes, *nodes, *blockNode, *commitNode, *termNode;
- RF_PhysDiskAddr_t *data_pda = asmap->physInfo;
- RF_PhysDiskAddr_t *parity_pda = asmap->parityInfo;
- int i, n, totalNumNodes;
-
- n = asmap->numStripeUnitsAccessed;
- dag_h->creator = "RaidOneReadDAG";
- if (rf_dagDebug) {
- printf("[Creating RAID level 1 read DAG]\n");
- }
- /*
- * This dag can not commit until the commit node is reached
- * errors prior to the commit point imply the dag has failed.
- */
- dag_h->numCommitNodes = 1;
- dag_h->numCommits = 0;
- dag_h->numSuccedents = 1;
-
- /*
- * Node count:
- * n data reads
- * 1 block node
- * 1 commit node
- * 1 terminator node
- */
- RF_ASSERT(n > 0);
- totalNumNodes = n + 3;
- RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t),
- (RF_DagNode_t *), allocList);
- i = 0;
- readNodes = &nodes[i];
- i += n;
- blockNode = &nodes[i];
- i += 1;
- commitNode = &nodes[i];
- i += 1;
- termNode = &nodes[i];
- i += 1;
- RF_ASSERT(i == totalNumNodes);
-
- /* initialize nodes */
- rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
- rf_NullNodeUndoFunc, NULL, n, 0, 0, 0, dag_h, "Nil", allocList);
- rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
- rf_NullNodeUndoFunc, NULL, 1, n, 0, 0, dag_h, "Cmt", allocList);
- rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
- rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
-
- for (i = 0; i < n; i++) {
- RF_ASSERT(data_pda != NULL);
- RF_ASSERT(parity_pda != NULL);
- rf_InitNode(&readNodes[i], rf_wait, RF_FALSE, readfunc,
- rf_DiskReadMirrorUndoFunc, rf_GenericWakeupFunc, 1, 1, 5, 0, dag_h,
- "Rmir", allocList);
- readNodes[i].params[0].p = data_pda;
- readNodes[i].params[1].p = data_pda->bufPtr;
- /* parity stripe id is not necessary */
- readNodes[i].params[2].p = 0;
- readNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
- readNodes[i].params[4].p = parity_pda;
- data_pda = data_pda->next;
- parity_pda = parity_pda->next;
- }
-
- /*
- * Connect nodes
- */
-
- /* connect hdr to block node */
- RF_ASSERT(blockNode->numAntecedents == 0);
- dag_h->succedents[0] = blockNode;
-
- /* connect block node to read nodes */
- RF_ASSERT(blockNode->numSuccedents == n);
- for (i = 0; i < n; i++) {
- RF_ASSERT(readNodes[i].numAntecedents == 1);
- blockNode->succedents[i] = &readNodes[i];
- readNodes[i].antecedents[0] = blockNode;
- readNodes[i].antType[0] = rf_control;
- }
-
- /* connect read nodes to commit node */
- RF_ASSERT(commitNode->numAntecedents == n);
- for (i = 0; i < n; i++) {
- RF_ASSERT(readNodes[i].numSuccedents == 1);
- readNodes[i].succedents[0] = commitNode;
- commitNode->antecedents[i] = &readNodes[i];
- commitNode->antType[i] = rf_control;
- }
-
- /* connect commit node to term node */
- RF_ASSERT(commitNode->numSuccedents == 1);
- RF_ASSERT(termNode->numAntecedents == 1);
- RF_ASSERT(termNode->numSuccedents == 0);
- commitNode->succedents[0] = termNode;
- termNode->antecedents[0] = commitNode;
- termNode->antType[0] = rf_control;
-}
-
-void
-rf_CreateMirrorIdleReadDAG(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h,
- void *bp,
- RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList)
-{
- CreateMirrorReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
- rf_DiskReadMirrorIdleFunc);
-}
-
-void
-rf_CreateMirrorPartitionReadDAG(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h,
- void *bp,
- RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList)
-{
- CreateMirrorReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
- rf_DiskReadMirrorPartitionFunc);
-}
diff --git a/sys/dev/raidframe/rf_dagffrd.h b/sys/dev/raidframe/rf_dagffrd.h
deleted file mode 100644
index 6862a8d..0000000
--- a/sys/dev/raidframe/rf_dagffrd.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_dagffrd.h,v 1.3 1999/02/05 00:06:07 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#ifndef _RF__RF_DAGFFRD_H_
-#define _RF__RF_DAGFFRD_H_
-
-#include <dev/raidframe/rf_types.h>
-
-/* fault-free read DAG creation routines */
-void
-rf_CreateFaultFreeReadDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList);
-void
-rf_CreateNonredundantDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList, RF_IoType_t type);
-void
-rf_CreateMirrorIdleReadDAG(RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp,
- RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList);
-void
-rf_CreateMirrorPartitionReadDAG(RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp,
- RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList);
-
-#endif /* !_RF__RF_DAGFFRD_H_ */
diff --git a/sys/dev/raidframe/rf_dagffwr.c b/sys/dev/raidframe/rf_dagffwr.c
deleted file mode 100644
index 9216b29..0000000
--- a/sys/dev/raidframe/rf_dagffwr.c
+++ /dev/null
@@ -1,2131 +0,0 @@
-/* $NetBSD: rf_dagffwr.c,v 1.5 2000/01/07 03:40:58 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*
- * rf_dagff.c
- *
- * code for creating fault-free DAGs
- *
- */
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_dagutils.h>
-#include <dev/raidframe/rf_dagfuncs.h>
-#include <dev/raidframe/rf_debugMem.h>
-#include <dev/raidframe/rf_dagffrd.h>
-#include <dev/raidframe/rf_memchunk.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_dagffwr.h>
-
-/******************************************************************************
- *
- * General comments on DAG creation:
- *
- * All DAGs in this file use roll-away error recovery. Each DAG has a single
- * commit node, usually called "Cmt." If an error occurs before the Cmt node
- * is reached, the execution engine will halt forward execution and work
- * backward through the graph, executing the undo functions. Assuming that
- * each node in the graph prior to the Cmt node are undoable and atomic - or -
- * does not make changes to permanent state, the graph will fail atomically.
- * If an error occurs after the Cmt node executes, the engine will roll-forward
- * through the graph, blindly executing nodes until it reaches the end.
- * If a graph reaches the end, it is assumed to have completed successfully.
- *
- * A graph has only 1 Cmt node.
- *
- */
-
-
-/******************************************************************************
- *
- * The following wrappers map the standard DAG creation interface to the
- * DAG creation routines. Additionally, these wrappers enable experimentation
- * with new DAG structures by providing an extra level of indirection, allowing
- * the DAG creation routines to be replaced at this single point.
- */
-
-
-void
-rf_CreateNonRedundantWriteDAG(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h,
- void *bp,
- RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList,
- RF_IoType_t type)
-{
- rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
- RF_IO_TYPE_WRITE);
-}
-
-void
-rf_CreateRAID0WriteDAG(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h,
- void *bp,
- RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList,
- RF_IoType_t type)
-{
- rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
- RF_IO_TYPE_WRITE);
-}
-
-void
-rf_CreateSmallWriteDAG(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h,
- void *bp,
- RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList)
-{
- /* "normal" rollaway */
- rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
- &rf_xorFuncs, NULL);
-}
-
-void
-rf_CreateLargeWriteDAG(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h,
- void *bp,
- RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList)
-{
- /* "normal" rollaway */
- rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
- 1, rf_RegularXorFunc, RF_TRUE);
-}
-
-
-/******************************************************************************
- *
- * DAG creation code begins here
- */
-
-
-/******************************************************************************
- *
- * creates a DAG to perform a large-write operation:
- *
- * / Rod \ / Wnd \
- * H -- block- Rod - Xor - Cmt - Wnd --- T
- * \ Rod / \ Wnp /
- * \[Wnq]/
- *
- * The XOR node also does the Q calculation in the P+Q architecture.
- * All nodes are before the commit node (Cmt) are assumed to be atomic and
- * undoable - or - they make no changes to permanent state.
- *
- * Rod = read old data
- * Cmt = commit node
- * Wnp = write new parity
- * Wnd = write new data
- * Wnq = write new "q"
- * [] denotes optional segments in the graph
- *
- * Parameters: raidPtr - description of the physical array
- * asmap - logical & physical addresses for this access
- * bp - buffer ptr (holds write data)
- * flags - general flags (e.g. disk locking)
- * allocList - list of memory allocated in DAG creation
- * nfaults - number of faults array can tolerate
- * (equal to # redundancy units in stripe)
- * redfuncs - list of redundancy generating functions
- *
- *****************************************************************************/
-
-void
-rf_CommonCreateLargeWriteDAG(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h,
- void *bp,
- RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList,
- int nfaults,
- int (*redFunc) (RF_DagNode_t *),
- int allowBufferRecycle)
-{
- RF_DagNode_t *nodes, *wndNodes, *rodNodes, *xorNode, *wnpNode;
- RF_DagNode_t *wnqNode, *blockNode, *commitNode, *termNode;
- int nWndNodes, nRodNodes, i, nodeNum, asmNum;
- RF_AccessStripeMapHeader_t *new_asm_h[2];
- RF_StripeNum_t parityStripeID;
- char *sosBuffer, *eosBuffer;
- RF_ReconUnitNum_t which_ru;
- RF_RaidLayout_t *layoutPtr;
- RF_PhysDiskAddr_t *pda;
-
- layoutPtr = &(raidPtr->Layout);
- parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress,
- &which_ru);
-
- if (rf_dagDebug) {
- printf("[Creating large-write DAG]\n");
- }
- dag_h->creator = "LargeWriteDAG";
-
- dag_h->numCommitNodes = 1;
- dag_h->numCommits = 0;
- dag_h->numSuccedents = 1;
-
- /* alloc the nodes: Wnd, xor, commit, block, term, and Wnp */
- nWndNodes = asmap->numStripeUnitsAccessed;
- RF_CallocAndAdd(nodes, nWndNodes + 4 + nfaults, sizeof(RF_DagNode_t),
- (RF_DagNode_t *), allocList);
- i = 0;
- wndNodes = &nodes[i];
- i += nWndNodes;
- xorNode = &nodes[i];
- i += 1;
- wnpNode = &nodes[i];
- i += 1;
- blockNode = &nodes[i];
- i += 1;
- commitNode = &nodes[i];
- i += 1;
- termNode = &nodes[i];
- i += 1;
- if (nfaults == 2) {
- wnqNode = &nodes[i];
- i += 1;
- } else {
- wnqNode = NULL;
- }
- rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h, new_asm_h,
- &nRodNodes, &sosBuffer, &eosBuffer, allocList);
- if (nRodNodes > 0) {
- RF_CallocAndAdd(rodNodes, nRodNodes, sizeof(RF_DagNode_t),
- (RF_DagNode_t *), allocList);
- } else {
- rodNodes = NULL;
- }
-
- /* begin node initialization */
- if (nRodNodes > 0) {
- rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
- NULL, nRodNodes, 0, 0, 0, dag_h, "Nil", allocList);
- } else {
- rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
- NULL, 1, 0, 0, 0, dag_h, "Nil", allocList);
- }
-
- rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL,
- nWndNodes + nfaults, 1, 0, 0, dag_h, "Cmt", allocList);
- rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL,
- 0, nWndNodes + nfaults, 0, 0, dag_h, "Trm", allocList);
-
- /* initialize the Rod nodes */
- for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) {
- if (new_asm_h[asmNum]) {
- pda = new_asm_h[asmNum]->stripeMap->physInfo;
- while (pda) {
- rf_InitNode(&rodNodes[nodeNum], rf_wait, RF_FALSE, rf_DiskReadFunc,
- rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
- "Rod", allocList);
- rodNodes[nodeNum].params[0].p = pda;
- rodNodes[nodeNum].params[1].p = pda->bufPtr;
- rodNodes[nodeNum].params[2].v = parityStripeID;
- rodNodes[nodeNum].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
- 0, 0, which_ru);
- nodeNum++;
- pda = pda->next;
- }
- }
- }
- RF_ASSERT(nodeNum == nRodNodes);
-
- /* initialize the wnd nodes */
- pda = asmap->physInfo;
- for (i = 0; i < nWndNodes; i++) {
- rf_InitNode(&wndNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
- rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList);
- RF_ASSERT(pda != NULL);
- wndNodes[i].params[0].p = pda;
- wndNodes[i].params[1].p = pda->bufPtr;
- wndNodes[i].params[2].v = parityStripeID;
- wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- pda = pda->next;
- }
-
- /* initialize the redundancy node */
- if (nRodNodes > 0) {
- rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1,
- nRodNodes, 2 * (nWndNodes + nRodNodes) + 1, nfaults, dag_h,
- "Xr ", allocList);
- } else {
- rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1,
- 1, 2 * (nWndNodes + nRodNodes) + 1, nfaults, dag_h, "Xr ", allocList);
- }
- xorNode->flags |= RF_DAGNODE_FLAG_YIELD;
- for (i = 0; i < nWndNodes; i++) {
- xorNode->params[2 * i + 0] = wndNodes[i].params[0]; /* pda */
- xorNode->params[2 * i + 1] = wndNodes[i].params[1]; /* buf ptr */
- }
- for (i = 0; i < nRodNodes; i++) {
- xorNode->params[2 * (nWndNodes + i) + 0] = rodNodes[i].params[0]; /* pda */
- xorNode->params[2 * (nWndNodes + i) + 1] = rodNodes[i].params[1]; /* buf ptr */
- }
- /* xor node needs to get at RAID information */
- xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr;
-
- /*
- * Look for an Rod node that reads a complete SU. If none, alloc a buffer
- * to receive the parity info. Note that we can't use a new data buffer
- * because it will not have gotten written when the xor occurs.
- */
- if (allowBufferRecycle) {
- for (i = 0; i < nRodNodes; i++) {
- if (((RF_PhysDiskAddr_t *) rodNodes[i].params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit)
- break;
- }
- }
- if ((!allowBufferRecycle) || (i == nRodNodes)) {
- RF_CallocAndAdd(xorNode->results[0], 1,
- rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit),
- (void *), allocList);
- } else {
- xorNode->results[0] = rodNodes[i].params[1].p;
- }
-
- /* initialize the Wnp node */
- rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
- rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnp", allocList);
- wnpNode->params[0].p = asmap->parityInfo;
- wnpNode->params[1].p = xorNode->results[0];
- wnpNode->params[2].v = parityStripeID;
- wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- /* parityInfo must describe entire parity unit */
- RF_ASSERT(asmap->parityInfo->next == NULL);
-
- if (nfaults == 2) {
- /*
- * We never try to recycle a buffer for the Q calcuation
- * in addition to the parity. This would cause two buffers
- * to get smashed during the P and Q calculation, guaranteeing
- * one would be wrong.
- */
- RF_CallocAndAdd(xorNode->results[1], 1,
- rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit),
- (void *), allocList);
- rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
- rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnq", allocList);
- wnqNode->params[0].p = asmap->qInfo;
- wnqNode->params[1].p = xorNode->results[1];
- wnqNode->params[2].v = parityStripeID;
- wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- /* parityInfo must describe entire parity unit */
- RF_ASSERT(asmap->parityInfo->next == NULL);
- }
- /*
- * Connect nodes to form graph.
- */
-
- /* connect dag header to block node */
- RF_ASSERT(blockNode->numAntecedents == 0);
- dag_h->succedents[0] = blockNode;
-
- if (nRodNodes > 0) {
- /* connect the block node to the Rod nodes */
- RF_ASSERT(blockNode->numSuccedents == nRodNodes);
- RF_ASSERT(xorNode->numAntecedents == nRodNodes);
- for (i = 0; i < nRodNodes; i++) {
- RF_ASSERT(rodNodes[i].numAntecedents == 1);
- blockNode->succedents[i] = &rodNodes[i];
- rodNodes[i].antecedents[0] = blockNode;
- rodNodes[i].antType[0] = rf_control;
-
- /* connect the Rod nodes to the Xor node */
- RF_ASSERT(rodNodes[i].numSuccedents == 1);
- rodNodes[i].succedents[0] = xorNode;
- xorNode->antecedents[i] = &rodNodes[i];
- xorNode->antType[i] = rf_trueData;
- }
- } else {
- /* connect the block node to the Xor node */
- RF_ASSERT(blockNode->numSuccedents == 1);
- RF_ASSERT(xorNode->numAntecedents == 1);
- blockNode->succedents[0] = xorNode;
- xorNode->antecedents[0] = blockNode;
- xorNode->antType[0] = rf_control;
- }
-
- /* connect the xor node to the commit node */
- RF_ASSERT(xorNode->numSuccedents == 1);
- RF_ASSERT(commitNode->numAntecedents == 1);
- xorNode->succedents[0] = commitNode;
- commitNode->antecedents[0] = xorNode;
- commitNode->antType[0] = rf_control;
-
- /* connect the commit node to the write nodes */
- RF_ASSERT(commitNode->numSuccedents == nWndNodes + nfaults);
- for (i = 0; i < nWndNodes; i++) {
- RF_ASSERT(wndNodes->numAntecedents == 1);
- commitNode->succedents[i] = &wndNodes[i];
- wndNodes[i].antecedents[0] = commitNode;
- wndNodes[i].antType[0] = rf_control;
- }
- RF_ASSERT(wnpNode->numAntecedents == 1);
- commitNode->succedents[nWndNodes] = wnpNode;
- wnpNode->antecedents[0] = commitNode;
- wnpNode->antType[0] = rf_trueData;
- if (nfaults == 2) {
- RF_ASSERT(wnqNode->numAntecedents == 1);
- commitNode->succedents[nWndNodes + 1] = wnqNode;
- wnqNode->antecedents[0] = commitNode;
- wnqNode->antType[0] = rf_trueData;
- }
- /* connect the write nodes to the term node */
- RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults);
- RF_ASSERT(termNode->numSuccedents == 0);
- for (i = 0; i < nWndNodes; i++) {
- RF_ASSERT(wndNodes->numSuccedents == 1);
- wndNodes[i].succedents[0] = termNode;
- termNode->antecedents[i] = &wndNodes[i];
- termNode->antType[i] = rf_control;
- }
- RF_ASSERT(wnpNode->numSuccedents == 1);
- wnpNode->succedents[0] = termNode;
- termNode->antecedents[nWndNodes] = wnpNode;
- termNode->antType[nWndNodes] = rf_control;
- if (nfaults == 2) {
- RF_ASSERT(wnqNode->numSuccedents == 1);
- wnqNode->succedents[0] = termNode;
- termNode->antecedents[nWndNodes + 1] = wnqNode;
- termNode->antType[nWndNodes + 1] = rf_control;
- }
-}
-/******************************************************************************
- *
- * creates a DAG to perform a small-write operation (either raid 5 or pq),
- * which is as follows:
- *
- * Hdr -> Nil -> Rop -> Xor -> Cmt ----> Wnp [Unp] --> Trm
- * \- Rod X / \----> Wnd [Und]-/
- * [\- Rod X / \---> Wnd [Und]-/]
- * [\- Roq -> Q / \--> Wnq [Unq]-/]
- *
- * Rop = read old parity
- * Rod = read old data
- * Roq = read old "q"
- * Cmt = commit node
- * Und = unlock data disk
- * Unp = unlock parity disk
- * Unq = unlock q disk
- * Wnp = write new parity
- * Wnd = write new data
- * Wnq = write new "q"
- * [ ] denotes optional segments in the graph
- *
- * Parameters: raidPtr - description of the physical array
- * asmap - logical & physical addresses for this access
- * bp - buffer ptr (holds write data)
- * flags - general flags (e.g. disk locking)
- * allocList - list of memory allocated in DAG creation
- * pfuncs - list of parity generating functions
- * qfuncs - list of q generating functions
- *
- * A null qfuncs indicates single fault tolerant
- *****************************************************************************/
-
-void
-rf_CommonCreateSmallWriteDAG(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h,
- void *bp,
- RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList,
- RF_RedFuncs_t * pfuncs,
- RF_RedFuncs_t * qfuncs)
-{
- RF_DagNode_t *readDataNodes, *readParityNodes, *readQNodes, *termNode;
- RF_DagNode_t *unlockDataNodes, *unlockParityNodes, *unlockQNodes;
- RF_DagNode_t *xorNodes, *qNodes, *blockNode, *commitNode, *nodes;
- RF_DagNode_t *writeDataNodes, *writeParityNodes, *writeQNodes;
- int i, j, nNodes, totalNumNodes, lu_flag;
- RF_ReconUnitNum_t which_ru;
- int (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *);
- int (*qfunc) (RF_DagNode_t *);
- int numDataNodes, numParityNodes;
- RF_StripeNum_t parityStripeID;
- RF_PhysDiskAddr_t *pda;
- char *name, *qname;
- long nfaults;
-
- nfaults = qfuncs ? 2 : 1;
- lu_flag = (rf_enableAtomicRMW) ? 1 : 0; /* lock/unlock flag */
-
- parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
- asmap->raidAddress, &which_ru);
- pda = asmap->physInfo;
- numDataNodes = asmap->numStripeUnitsAccessed;
- numParityNodes = (asmap->parityInfo->next) ? 2 : 1;
-
- if (rf_dagDebug) {
- printf("[Creating small-write DAG]\n");
- }
- RF_ASSERT(numDataNodes > 0);
- dag_h->creator = "SmallWriteDAG";
-
- dag_h->numCommitNodes = 1;
- dag_h->numCommits = 0;
- dag_h->numSuccedents = 1;
-
- /*
- * DAG creation occurs in four steps:
- * 1. count the number of nodes in the DAG
- * 2. create the nodes
- * 3. initialize the nodes
- * 4. connect the nodes
- */
-
- /*
- * Step 1. compute number of nodes in the graph
- */
-
- /* number of nodes: a read and write for each data unit a redundancy
- * computation node for each parity node (nfaults * nparity) a read
- * and write for each parity unit a block and commit node (2) a
- * terminate node if atomic RMW an unlock node for each data unit,
- * redundancy unit */
- totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes)
- + (nfaults * 2 * numParityNodes) + 3;
- if (lu_flag) {
- totalNumNodes += (numDataNodes + (nfaults * numParityNodes));
- }
- /*
- * Step 2. create the nodes
- */
- RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t),
- (RF_DagNode_t *), allocList);
- i = 0;
- blockNode = &nodes[i];
- i += 1;
- commitNode = &nodes[i];
- i += 1;
- readDataNodes = &nodes[i];
- i += numDataNodes;
- readParityNodes = &nodes[i];
- i += numParityNodes;
- writeDataNodes = &nodes[i];
- i += numDataNodes;
- writeParityNodes = &nodes[i];
- i += numParityNodes;
- xorNodes = &nodes[i];
- i += numParityNodes;
- termNode = &nodes[i];
- i += 1;
- if (lu_flag) {
- unlockDataNodes = &nodes[i];
- i += numDataNodes;
- unlockParityNodes = &nodes[i];
- i += numParityNodes;
- } else {
- unlockDataNodes = unlockParityNodes = NULL;
- }
- if (nfaults == 2) {
- readQNodes = &nodes[i];
- i += numParityNodes;
- writeQNodes = &nodes[i];
- i += numParityNodes;
- qNodes = &nodes[i];
- i += numParityNodes;
- if (lu_flag) {
- unlockQNodes = &nodes[i];
- i += numParityNodes;
- } else {
- unlockQNodes = NULL;
- }
- } else {
- readQNodes = writeQNodes = qNodes = unlockQNodes = NULL;
- }
- RF_ASSERT(i == totalNumNodes);
-
- /*
- * Step 3. initialize the nodes
- */
- /* initialize block node (Nil) */
- nNodes = numDataNodes + (nfaults * numParityNodes);
- rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
- NULL, nNodes, 0, 0, 0, dag_h, "Nil", allocList);
-
- /* initialize commit node (Cmt) */
- rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
- NULL, nNodes, (nfaults * numParityNodes), 0, 0, dag_h, "Cmt", allocList);
-
- /* initialize terminate node (Trm) */
- rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc,
- NULL, 0, nNodes, 0, 0, dag_h, "Trm", allocList);
-
- /* initialize nodes which read old data (Rod) */
- for (i = 0; i < numDataNodes; i++) {
- rf_InitNode(&readDataNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc,
- rf_GenericWakeupFunc, (nfaults * numParityNodes), 1, 4, 0, dag_h,
- "Rod", allocList);
- RF_ASSERT(pda != NULL);
- /* physical disk addr desc */
- readDataNodes[i].params[0].p = pda;
- /* buffer to hold old data */
- readDataNodes[i].params[1].p = rf_AllocBuffer(raidPtr,
- dag_h, pda, allocList);
- readDataNodes[i].params[2].v = parityStripeID;
- readDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
- lu_flag, 0, which_ru);
- pda = pda->next;
- for (j = 0; j < readDataNodes[i].numSuccedents; j++) {
- readDataNodes[i].propList[j] = NULL;
- }
- }
-
- /* initialize nodes which read old parity (Rop) */
- pda = asmap->parityInfo;
- i = 0;
- for (i = 0; i < numParityNodes; i++) {
- RF_ASSERT(pda != NULL);
- rf_InitNode(&readParityNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc,
- rf_DiskReadUndoFunc, rf_GenericWakeupFunc, numParityNodes, 1, 4,
- 0, dag_h, "Rop", allocList);
- readParityNodes[i].params[0].p = pda;
- /* buffer to hold old parity */
- readParityNodes[i].params[1].p = rf_AllocBuffer(raidPtr,
- dag_h, pda, allocList);
- readParityNodes[i].params[2].v = parityStripeID;
- readParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
- lu_flag, 0, which_ru);
- pda = pda->next;
- for (j = 0; j < readParityNodes[i].numSuccedents; j++) {
- readParityNodes[i].propList[0] = NULL;
- }
- }
-
- /* initialize nodes which read old Q (Roq) */
- if (nfaults == 2) {
- pda = asmap->qInfo;
- for (i = 0; i < numParityNodes; i++) {
- RF_ASSERT(pda != NULL);
- rf_InitNode(&readQNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc,
- rf_GenericWakeupFunc, numParityNodes, 1, 4, 0, dag_h, "Roq", allocList);
- readQNodes[i].params[0].p = pda;
- /* buffer to hold old Q */
- readQNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda,
- allocList);
- readQNodes[i].params[2].v = parityStripeID;
- readQNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
- lu_flag, 0, which_ru);
- pda = pda->next;
- for (j = 0; j < readQNodes[i].numSuccedents; j++) {
- readQNodes[i].propList[0] = NULL;
- }
- }
- }
- /* initialize nodes which write new data (Wnd) */
- pda = asmap->physInfo;
- for (i = 0; i < numDataNodes; i++) {
- RF_ASSERT(pda != NULL);
- rf_InitNode(&writeDataNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc,
- rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
- "Wnd", allocList);
- /* physical disk addr desc */
- writeDataNodes[i].params[0].p = pda;
- /* buffer holding new data to be written */
- writeDataNodes[i].params[1].p = pda->bufPtr;
- writeDataNodes[i].params[2].v = parityStripeID;
- writeDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
- 0, 0, which_ru);
- if (lu_flag) {
- /* initialize node to unlock the disk queue */
- rf_InitNode(&unlockDataNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc,
- rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h,
- "Und", allocList);
- /* physical disk addr desc */
- unlockDataNodes[i].params[0].p = pda;
- unlockDataNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
- 0, lu_flag, which_ru);
- }
- pda = pda->next;
- }
-
- /*
- * Initialize nodes which compute new parity and Q.
- */
- /*
- * We use the simple XOR func in the double-XOR case, and when
- * we're accessing only a portion of one stripe unit. The distinction
- * between the two is that the regular XOR func assumes that the targbuf
- * is a full SU in size, and examines the pda associated with the buffer
- * to decide where within the buffer to XOR the data, whereas
- * the simple XOR func just XORs the data into the start of the buffer.
- */
- if ((numParityNodes == 2) || ((numDataNodes == 1)
- && (asmap->totalSectorsAccessed < raidPtr->Layout.sectorsPerStripeUnit))) {
- func = pfuncs->simple;
- undoFunc = rf_NullNodeUndoFunc;
- name = pfuncs->SimpleName;
- if (qfuncs) {
- qfunc = qfuncs->simple;
- qname = qfuncs->SimpleName;
- } else {
- qfunc = NULL;
- qname = NULL;
- }
- } else {
- func = pfuncs->regular;
- undoFunc = rf_NullNodeUndoFunc;
- name = pfuncs->RegularName;
- if (qfuncs) {
- qfunc = qfuncs->regular;
- qname = qfuncs->RegularName;
- } else {
- qfunc = NULL;
- qname = NULL;
- }
- }
- /*
- * Initialize the xor nodes: params are {pda,buf}
- * from {Rod,Wnd,Rop} nodes, and raidPtr
- */
- if (numParityNodes == 2) {
- /* double-xor case */
- for (i = 0; i < numParityNodes; i++) {
- /* note: no wakeup func for xor */
- rf_InitNode(&xorNodes[i], rf_wait, RF_FALSE, func, undoFunc, NULL,
- 1, (numDataNodes + numParityNodes), 7, 1, dag_h, name, allocList);
- xorNodes[i].flags |= RF_DAGNODE_FLAG_YIELD;
- xorNodes[i].params[0] = readDataNodes[i].params[0];
- xorNodes[i].params[1] = readDataNodes[i].params[1];
- xorNodes[i].params[2] = readParityNodes[i].params[0];
- xorNodes[i].params[3] = readParityNodes[i].params[1];
- xorNodes[i].params[4] = writeDataNodes[i].params[0];
- xorNodes[i].params[5] = writeDataNodes[i].params[1];
- xorNodes[i].params[6].p = raidPtr;
- /* use old parity buf as target buf */
- xorNodes[i].results[0] = readParityNodes[i].params[1].p;
- if (nfaults == 2) {
- /* note: no wakeup func for qor */
- rf_InitNode(&qNodes[i], rf_wait, RF_FALSE, qfunc, undoFunc, NULL, 1,
- (numDataNodes + numParityNodes), 7, 1, dag_h, qname, allocList);
- qNodes[i].params[0] = readDataNodes[i].params[0];
- qNodes[i].params[1] = readDataNodes[i].params[1];
- qNodes[i].params[2] = readQNodes[i].params[0];
- qNodes[i].params[3] = readQNodes[i].params[1];
- qNodes[i].params[4] = writeDataNodes[i].params[0];
- qNodes[i].params[5] = writeDataNodes[i].params[1];
- qNodes[i].params[6].p = raidPtr;
- /* use old Q buf as target buf */
- qNodes[i].results[0] = readQNodes[i].params[1].p;
- }
- }
- } else {
- /* there is only one xor node in this case */
- rf_InitNode(&xorNodes[0], rf_wait, RF_FALSE, func, undoFunc, NULL, 1,
- (numDataNodes + numParityNodes),
- (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, name, allocList);
- xorNodes[0].flags |= RF_DAGNODE_FLAG_YIELD;
- for (i = 0; i < numDataNodes + 1; i++) {
- /* set up params related to Rod and Rop nodes */
- xorNodes[0].params[2 * i + 0] = readDataNodes[i].params[0]; /* pda */
- xorNodes[0].params[2 * i + 1] = readDataNodes[i].params[1]; /* buffer ptr */
- }
- for (i = 0; i < numDataNodes; i++) {
- /* set up params related to Wnd and Wnp nodes */
- xorNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = /* pda */
- writeDataNodes[i].params[0];
- xorNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = /* buffer ptr */
- writeDataNodes[i].params[1];
- }
- /* xor node needs to get at RAID information */
- xorNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
- xorNodes[0].results[0] = readParityNodes[0].params[1].p;
- if (nfaults == 2) {
- rf_InitNode(&qNodes[0], rf_wait, RF_FALSE, qfunc, undoFunc, NULL, 1,
- (numDataNodes + numParityNodes),
- (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h,
- qname, allocList);
- for (i = 0; i < numDataNodes; i++) {
- /* set up params related to Rod */
- qNodes[0].params[2 * i + 0] = readDataNodes[i].params[0]; /* pda */
- qNodes[0].params[2 * i + 1] = readDataNodes[i].params[1]; /* buffer ptr */
- }
- /* and read old q */
- qNodes[0].params[2 * numDataNodes + 0] = /* pda */
- readQNodes[0].params[0];
- qNodes[0].params[2 * numDataNodes + 1] = /* buffer ptr */
- readQNodes[0].params[1];
- for (i = 0; i < numDataNodes; i++) {
- /* set up params related to Wnd nodes */
- qNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = /* pda */
- writeDataNodes[i].params[0];
- qNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = /* buffer ptr */
- writeDataNodes[i].params[1];
- }
- /* xor node needs to get at RAID information */
- qNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
- qNodes[0].results[0] = readQNodes[0].params[1].p;
- }
- }
-
- /* initialize nodes which write new parity (Wnp) */
- pda = asmap->parityInfo;
- for (i = 0; i < numParityNodes; i++) {
- rf_InitNode(&writeParityNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc,
- rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
- "Wnp", allocList);
- RF_ASSERT(pda != NULL);
- writeParityNodes[i].params[0].p = pda; /* param 1 (bufPtr)
- * filled in by xor node */
- writeParityNodes[i].params[1].p = xorNodes[i].results[0]; /* buffer pointer for
- * parity write
- * operation */
- writeParityNodes[i].params[2].v = parityStripeID;
- writeParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
- 0, 0, which_ru);
- if (lu_flag) {
- /* initialize node to unlock the disk queue */
- rf_InitNode(&unlockParityNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc,
- rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h,
- "Unp", allocList);
- unlockParityNodes[i].params[0].p = pda; /* physical disk addr
- * desc */
- unlockParityNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
- 0, lu_flag, which_ru);
- }
- pda = pda->next;
- }
-
- /* initialize nodes which write new Q (Wnq) */
- if (nfaults == 2) {
- pda = asmap->qInfo;
- for (i = 0; i < numParityNodes; i++) {
- rf_InitNode(&writeQNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc,
- rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
- "Wnq", allocList);
- RF_ASSERT(pda != NULL);
- writeQNodes[i].params[0].p = pda; /* param 1 (bufPtr)
- * filled in by xor node */
- writeQNodes[i].params[1].p = qNodes[i].results[0]; /* buffer pointer for
- * parity write
- * operation */
- writeQNodes[i].params[2].v = parityStripeID;
- writeQNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
- 0, 0, which_ru);
- if (lu_flag) {
- /* initialize node to unlock the disk queue */
- rf_InitNode(&unlockQNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc,
- rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h,
- "Unq", allocList);
- unlockQNodes[i].params[0].p = pda; /* physical disk addr
- * desc */
- unlockQNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
- 0, lu_flag, which_ru);
- }
- pda = pda->next;
- }
- }
- /*
- * Step 4. connect the nodes.
- */
-
- /* connect header to block node */
- dag_h->succedents[0] = blockNode;
-
- /* connect block node to read old data nodes */
- RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults)));
- for (i = 0; i < numDataNodes; i++) {
- blockNode->succedents[i] = &readDataNodes[i];
- RF_ASSERT(readDataNodes[i].numAntecedents == 1);
- readDataNodes[i].antecedents[0] = blockNode;
- readDataNodes[i].antType[0] = rf_control;
- }
-
- /* connect block node to read old parity nodes */
- for (i = 0; i < numParityNodes; i++) {
- blockNode->succedents[numDataNodes + i] = &readParityNodes[i];
- RF_ASSERT(readParityNodes[i].numAntecedents == 1);
- readParityNodes[i].antecedents[0] = blockNode;
- readParityNodes[i].antType[0] = rf_control;
- }
-
- /* connect block node to read old Q nodes */
- if (nfaults == 2) {
- for (i = 0; i < numParityNodes; i++) {
- blockNode->succedents[numDataNodes + numParityNodes + i] = &readQNodes[i];
- RF_ASSERT(readQNodes[i].numAntecedents == 1);
- readQNodes[i].antecedents[0] = blockNode;
- readQNodes[i].antType[0] = rf_control;
- }
- }
- /* connect read old data nodes to xor nodes */
- for (i = 0; i < numDataNodes; i++) {
- RF_ASSERT(readDataNodes[i].numSuccedents == (nfaults * numParityNodes));
- for (j = 0; j < numParityNodes; j++) {
- RF_ASSERT(xorNodes[j].numAntecedents == numDataNodes + numParityNodes);
- readDataNodes[i].succedents[j] = &xorNodes[j];
- xorNodes[j].antecedents[i] = &readDataNodes[i];
- xorNodes[j].antType[i] = rf_trueData;
- }
- }
-
- /* connect read old data nodes to q nodes */
- if (nfaults == 2) {
- for (i = 0; i < numDataNodes; i++) {
- for (j = 0; j < numParityNodes; j++) {
- RF_ASSERT(qNodes[j].numAntecedents == numDataNodes + numParityNodes);
- readDataNodes[i].succedents[numParityNodes + j] = &qNodes[j];
- qNodes[j].antecedents[i] = &readDataNodes[i];
- qNodes[j].antType[i] = rf_trueData;
- }
- }
- }
- /* connect read old parity nodes to xor nodes */
- for (i = 0; i < numParityNodes; i++) {
- RF_ASSERT(readParityNodes[i].numSuccedents == numParityNodes);
- for (j = 0; j < numParityNodes; j++) {
- readParityNodes[i].succedents[j] = &xorNodes[j];
- xorNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i];
- xorNodes[j].antType[numDataNodes + i] = rf_trueData;
- }
- }
-
- /* connect read old q nodes to q nodes */
- if (nfaults == 2) {
- for (i = 0; i < numParityNodes; i++) {
- RF_ASSERT(readParityNodes[i].numSuccedents == numParityNodes);
- for (j = 0; j < numParityNodes; j++) {
- readQNodes[i].succedents[j] = &qNodes[j];
- qNodes[j].antecedents[numDataNodes + i] = &readQNodes[i];
- qNodes[j].antType[numDataNodes + i] = rf_trueData;
- }
- }
- }
- /* connect xor nodes to commit node */
- RF_ASSERT(commitNode->numAntecedents == (nfaults * numParityNodes));
- for (i = 0; i < numParityNodes; i++) {
- RF_ASSERT(xorNodes[i].numSuccedents == 1);
- xorNodes[i].succedents[0] = commitNode;
- commitNode->antecedents[i] = &xorNodes[i];
- commitNode->antType[i] = rf_control;
- }
-
- /* connect q nodes to commit node */
- if (nfaults == 2) {
- for (i = 0; i < numParityNodes; i++) {
- RF_ASSERT(qNodes[i].numSuccedents == 1);
- qNodes[i].succedents[0] = commitNode;
- commitNode->antecedents[i + numParityNodes] = &qNodes[i];
- commitNode->antType[i + numParityNodes] = rf_control;
- }
- }
- /* connect commit node to write nodes */
- RF_ASSERT(commitNode->numSuccedents == (numDataNodes + (nfaults * numParityNodes)));
- for (i = 0; i < numDataNodes; i++) {
- RF_ASSERT(writeDataNodes[i].numAntecedents == 1);
- commitNode->succedents[i] = &writeDataNodes[i];
- writeDataNodes[i].antecedents[0] = commitNode;
- writeDataNodes[i].antType[0] = rf_trueData;
- }
- for (i = 0; i < numParityNodes; i++) {
- RF_ASSERT(writeParityNodes[i].numAntecedents == 1);
- commitNode->succedents[i + numDataNodes] = &writeParityNodes[i];
- writeParityNodes[i].antecedents[0] = commitNode;
- writeParityNodes[i].antType[0] = rf_trueData;
- }
- if (nfaults == 2) {
- for (i = 0; i < numParityNodes; i++) {
- RF_ASSERT(writeQNodes[i].numAntecedents == 1);
- commitNode->succedents[i + numDataNodes + numParityNodes] = &writeQNodes[i];
- writeQNodes[i].antecedents[0] = commitNode;
- writeQNodes[i].antType[0] = rf_trueData;
- }
- }
- RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
- RF_ASSERT(termNode->numSuccedents == 0);
- for (i = 0; i < numDataNodes; i++) {
- if (lu_flag) {
- /* connect write new data nodes to unlock nodes */
- RF_ASSERT(writeDataNodes[i].numSuccedents == 1);
- RF_ASSERT(unlockDataNodes[i].numAntecedents == 1);
- writeDataNodes[i].succedents[0] = &unlockDataNodes[i];
- unlockDataNodes[i].antecedents[0] = &writeDataNodes[i];
- unlockDataNodes[i].antType[0] = rf_control;
-
- /* connect unlock nodes to term node */
- RF_ASSERT(unlockDataNodes[i].numSuccedents == 1);
- unlockDataNodes[i].succedents[0] = termNode;
- termNode->antecedents[i] = &unlockDataNodes[i];
- termNode->antType[i] = rf_control;
- } else {
- /* connect write new data nodes to term node */
- RF_ASSERT(writeDataNodes[i].numSuccedents == 1);
- RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
- writeDataNodes[i].succedents[0] = termNode;
- termNode->antecedents[i] = &writeDataNodes[i];
- termNode->antType[i] = rf_control;
- }
- }
-
- for (i = 0; i < numParityNodes; i++) {
- if (lu_flag) {
- /* connect write new parity nodes to unlock nodes */
- RF_ASSERT(writeParityNodes[i].numSuccedents == 1);
- RF_ASSERT(unlockParityNodes[i].numAntecedents == 1);
- writeParityNodes[i].succedents[0] = &unlockParityNodes[i];
- unlockParityNodes[i].antecedents[0] = &writeParityNodes[i];
- unlockParityNodes[i].antType[0] = rf_control;
-
- /* connect unlock nodes to term node */
- RF_ASSERT(unlockParityNodes[i].numSuccedents == 1);
- unlockParityNodes[i].succedents[0] = termNode;
- termNode->antecedents[numDataNodes + i] = &unlockParityNodes[i];
- termNode->antType[numDataNodes + i] = rf_control;
- } else {
- RF_ASSERT(writeParityNodes[i].numSuccedents == 1);
- writeParityNodes[i].succedents[0] = termNode;
- termNode->antecedents[numDataNodes + i] = &writeParityNodes[i];
- termNode->antType[numDataNodes + i] = rf_control;
- }
- }
-
- if (nfaults == 2) {
- for (i = 0; i < numParityNodes; i++) {
- if (lu_flag) {
- /* connect write new Q nodes to unlock nodes */
- RF_ASSERT(writeQNodes[i].numSuccedents == 1);
- RF_ASSERT(unlockQNodes[i].numAntecedents == 1);
- writeQNodes[i].succedents[0] = &unlockQNodes[i];
- unlockQNodes[i].antecedents[0] = &writeQNodes[i];
- unlockQNodes[i].antType[0] = rf_control;
-
- /* connect unlock nodes to unblock node */
- RF_ASSERT(unlockQNodes[i].numSuccedents == 1);
- unlockQNodes[i].succedents[0] = termNode;
- termNode->antecedents[numDataNodes + numParityNodes + i] = &unlockQNodes[i];
- termNode->antType[numDataNodes + numParityNodes + i] = rf_control;
- } else {
- RF_ASSERT(writeQNodes[i].numSuccedents == 1);
- writeQNodes[i].succedents[0] = termNode;
- termNode->antecedents[numDataNodes + numParityNodes + i] = &writeQNodes[i];
- termNode->antType[numDataNodes + numParityNodes + i] = rf_control;
- }
- }
- }
-}
-
-
-/******************************************************************************
- * create a write graph (fault-free or degraded) for RAID level 1
- *
- * Hdr -> Commit -> Wpd -> Nil -> Trm
- * -> Wsd ->
- *
- * The "Wpd" node writes data to the primary copy in the mirror pair
- * The "Wsd" node writes data to the secondary copy in the mirror pair
- *
- * Parameters: raidPtr - description of the physical array
- * asmap - logical & physical addresses for this access
- * bp - buffer ptr (holds write data)
- * flags - general flags (e.g. disk locking)
- * allocList - list of memory allocated in DAG creation
- *****************************************************************************/
-
-void
-rf_CreateRaidOneWriteDAG(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h,
- void *bp,
- RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList)
-{
- RF_DagNode_t *unblockNode, *termNode, *commitNode;
- RF_DagNode_t *nodes, *wndNode, *wmirNode;
- int nWndNodes, nWmirNodes, i;
- RF_ReconUnitNum_t which_ru;
- RF_PhysDiskAddr_t *pda, *pdaP;
- RF_StripeNum_t parityStripeID;
-
- parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
- asmap->raidAddress, &which_ru);
- if (rf_dagDebug) {
- printf("[Creating RAID level 1 write DAG]\n");
- }
- dag_h->creator = "RaidOneWriteDAG";
-
- /* 2 implies access not SU aligned */
- nWmirNodes = (asmap->parityInfo->next) ? 2 : 1;
- nWndNodes = (asmap->physInfo->next) ? 2 : 1;
-
- /* alloc the Wnd nodes and the Wmir node */
- if (asmap->numDataFailed == 1)
- nWndNodes--;
- if (asmap->numParityFailed == 1)
- nWmirNodes--;
-
- /* total number of nodes = nWndNodes + nWmirNodes + (commit + unblock
- * + terminator) */
- RF_CallocAndAdd(nodes, nWndNodes + nWmirNodes + 3, sizeof(RF_DagNode_t),
- (RF_DagNode_t *), allocList);
- i = 0;
- wndNode = &nodes[i];
- i += nWndNodes;
- wmirNode = &nodes[i];
- i += nWmirNodes;
- commitNode = &nodes[i];
- i += 1;
- unblockNode = &nodes[i];
- i += 1;
- termNode = &nodes[i];
- i += 1;
- RF_ASSERT(i == (nWndNodes + nWmirNodes + 3));
-
- /* this dag can commit immediately */
- dag_h->numCommitNodes = 1;
- dag_h->numCommits = 0;
- dag_h->numSuccedents = 1;
-
- /* initialize the commit, unblock, and term nodes */
- rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
- NULL, (nWndNodes + nWmirNodes), 0, 0, 0, dag_h, "Cmt", allocList);
- rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
- NULL, 1, (nWndNodes + nWmirNodes), 0, 0, dag_h, "Nil", allocList);
- rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc,
- NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
-
- /* initialize the wnd nodes */
- if (nWndNodes > 0) {
- pda = asmap->physInfo;
- for (i = 0; i < nWndNodes; i++) {
- rf_InitNode(&wndNode[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
- rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wpd", allocList);
- RF_ASSERT(pda != NULL);
- wndNode[i].params[0].p = pda;
- wndNode[i].params[1].p = pda->bufPtr;
- wndNode[i].params[2].v = parityStripeID;
- wndNode[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- pda = pda->next;
- }
- RF_ASSERT(pda == NULL);
- }
- /* initialize the mirror nodes */
- if (nWmirNodes > 0) {
- pda = asmap->physInfo;
- pdaP = asmap->parityInfo;
- for (i = 0; i < nWmirNodes; i++) {
- rf_InitNode(&wmirNode[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
- rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wsd", allocList);
- RF_ASSERT(pda != NULL);
- wmirNode[i].params[0].p = pdaP;
- wmirNode[i].params[1].p = pda->bufPtr;
- wmirNode[i].params[2].v = parityStripeID;
- wmirNode[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- pda = pda->next;
- pdaP = pdaP->next;
- }
- RF_ASSERT(pda == NULL);
- RF_ASSERT(pdaP == NULL);
- }
- /* link the header node to the commit node */
- RF_ASSERT(dag_h->numSuccedents == 1);
- RF_ASSERT(commitNode->numAntecedents == 0);
- dag_h->succedents[0] = commitNode;
-
- /* link the commit node to the write nodes */
- RF_ASSERT(commitNode->numSuccedents == (nWndNodes + nWmirNodes));
- for (i = 0; i < nWndNodes; i++) {
- RF_ASSERT(wndNode[i].numAntecedents == 1);
- commitNode->succedents[i] = &wndNode[i];
- wndNode[i].antecedents[0] = commitNode;
- wndNode[i].antType[0] = rf_control;
- }
- for (i = 0; i < nWmirNodes; i++) {
- RF_ASSERT(wmirNode[i].numAntecedents == 1);
- commitNode->succedents[i + nWndNodes] = &wmirNode[i];
- wmirNode[i].antecedents[0] = commitNode;
- wmirNode[i].antType[0] = rf_control;
- }
-
- /* link the write nodes to the unblock node */
- RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes));
- for (i = 0; i < nWndNodes; i++) {
- RF_ASSERT(wndNode[i].numSuccedents == 1);
- wndNode[i].succedents[0] = unblockNode;
- unblockNode->antecedents[i] = &wndNode[i];
- unblockNode->antType[i] = rf_control;
- }
- for (i = 0; i < nWmirNodes; i++) {
- RF_ASSERT(wmirNode[i].numSuccedents == 1);
- wmirNode[i].succedents[0] = unblockNode;
- unblockNode->antecedents[i + nWndNodes] = &wmirNode[i];
- unblockNode->antType[i + nWndNodes] = rf_control;
- }
-
- /* link the unblock node to the term node */
- RF_ASSERT(unblockNode->numSuccedents == 1);
- RF_ASSERT(termNode->numAntecedents == 1);
- RF_ASSERT(termNode->numSuccedents == 0);
- unblockNode->succedents[0] = termNode;
- termNode->antecedents[0] = unblockNode;
- termNode->antType[0] = rf_control;
-}
-
-
-
-/* DAGs which have no commit points.
- *
- * The following DAGs are used in forward and backward error recovery experiments.
- * They are identical to the DAGs above this comment with the exception that the
- * the commit points have been removed.
- */
-
-
-
-void
-rf_CommonCreateLargeWriteDAGFwd(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h,
- void *bp,
- RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList,
- int nfaults,
- int (*redFunc) (RF_DagNode_t *),
- int allowBufferRecycle)
-{
- RF_DagNode_t *nodes, *wndNodes, *rodNodes, *xorNode, *wnpNode;
- RF_DagNode_t *wnqNode, *blockNode, *syncNode, *termNode;
- int nWndNodes, nRodNodes, i, nodeNum, asmNum;
- RF_AccessStripeMapHeader_t *new_asm_h[2];
- RF_StripeNum_t parityStripeID;
- char *sosBuffer, *eosBuffer;
- RF_ReconUnitNum_t which_ru;
- RF_RaidLayout_t *layoutPtr;
- RF_PhysDiskAddr_t *pda;
-
- layoutPtr = &(raidPtr->Layout);
- parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru);
-
- if (rf_dagDebug)
- printf("[Creating large-write DAG]\n");
- dag_h->creator = "LargeWriteDAGFwd";
-
- dag_h->numCommitNodes = 0;
- dag_h->numCommits = 0;
- dag_h->numSuccedents = 1;
-
- /* alloc the nodes: Wnd, xor, commit, block, term, and Wnp */
- nWndNodes = asmap->numStripeUnitsAccessed;
- RF_CallocAndAdd(nodes, nWndNodes + 4 + nfaults, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList);
- i = 0;
- wndNodes = &nodes[i];
- i += nWndNodes;
- xorNode = &nodes[i];
- i += 1;
- wnpNode = &nodes[i];
- i += 1;
- blockNode = &nodes[i];
- i += 1;
- syncNode = &nodes[i];
- i += 1;
- termNode = &nodes[i];
- i += 1;
- if (nfaults == 2) {
- wnqNode = &nodes[i];
- i += 1;
- } else {
- wnqNode = NULL;
- }
- rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h, new_asm_h, &nRodNodes, &sosBuffer, &eosBuffer, allocList);
- if (nRodNodes > 0) {
- RF_CallocAndAdd(rodNodes, nRodNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList);
- } else {
- rodNodes = NULL;
- }
-
- /* begin node initialization */
- if (nRodNodes > 0) {
- rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0, dag_h, "Nil", allocList);
- rf_InitNode(syncNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nWndNodes + 1, nRodNodes, 0, 0, dag_h, "Nil", allocList);
- } else {
- rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0, dag_h, "Nil", allocList);
- rf_InitNode(syncNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nWndNodes + 1, 1, 0, 0, dag_h, "Nil", allocList);
- }
-
- rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0, dag_h, "Trm", allocList);
-
- /* initialize the Rod nodes */
- for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) {
- if (new_asm_h[asmNum]) {
- pda = new_asm_h[asmNum]->stripeMap->physInfo;
- while (pda) {
- rf_InitNode(&rodNodes[nodeNum], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rod", allocList);
- rodNodes[nodeNum].params[0].p = pda;
- rodNodes[nodeNum].params[1].p = pda->bufPtr;
- rodNodes[nodeNum].params[2].v = parityStripeID;
- rodNodes[nodeNum].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- nodeNum++;
- pda = pda->next;
- }
- }
- }
- RF_ASSERT(nodeNum == nRodNodes);
-
- /* initialize the wnd nodes */
- pda = asmap->physInfo;
- for (i = 0; i < nWndNodes; i++) {
- rf_InitNode(&wndNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList);
- RF_ASSERT(pda != NULL);
- wndNodes[i].params[0].p = pda;
- wndNodes[i].params[1].p = pda->bufPtr;
- wndNodes[i].params[2].v = parityStripeID;
- wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- pda = pda->next;
- }
-
- /* initialize the redundancy node */
- rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1, nfaults, 2 * (nWndNodes + nRodNodes) + 1, nfaults, dag_h, "Xr ", allocList);
- xorNode->flags |= RF_DAGNODE_FLAG_YIELD;
- for (i = 0; i < nWndNodes; i++) {
- xorNode->params[2 * i + 0] = wndNodes[i].params[0]; /* pda */
- xorNode->params[2 * i + 1] = wndNodes[i].params[1]; /* buf ptr */
- }
- for (i = 0; i < nRodNodes; i++) {
- xorNode->params[2 * (nWndNodes + i) + 0] = rodNodes[i].params[0]; /* pda */
- xorNode->params[2 * (nWndNodes + i) + 1] = rodNodes[i].params[1]; /* buf ptr */
- }
- xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr; /* xor node needs to get
- * at RAID information */
-
- /* look for an Rod node that reads a complete SU. If none, alloc a
- * buffer to receive the parity info. Note that we can't use a new
- * data buffer because it will not have gotten written when the xor
- * occurs. */
- if (allowBufferRecycle) {
- for (i = 0; i < nRodNodes; i++)
- if (((RF_PhysDiskAddr_t *) rodNodes[i].params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit)
- break;
- }
- if ((!allowBufferRecycle) || (i == nRodNodes)) {
- RF_CallocAndAdd(xorNode->results[0], 1, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (void *), allocList);
- } else
- xorNode->results[0] = rodNodes[i].params[1].p;
-
- /* initialize the Wnp node */
- rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnp", allocList);
- wnpNode->params[0].p = asmap->parityInfo;
- wnpNode->params[1].p = xorNode->results[0];
- wnpNode->params[2].v = parityStripeID;
- wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- RF_ASSERT(asmap->parityInfo->next == NULL); /* parityInfo must
- * describe entire
- * parity unit */
-
- if (nfaults == 2) {
- /* we never try to recycle a buffer for the Q calcuation in
- * addition to the parity. This would cause two buffers to get
- * smashed during the P and Q calculation, guaranteeing one
- * would be wrong. */
- RF_CallocAndAdd(xorNode->results[1], 1, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (void *), allocList);
- rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnq", allocList);
- wnqNode->params[0].p = asmap->qInfo;
- wnqNode->params[1].p = xorNode->results[1];
- wnqNode->params[2].v = parityStripeID;
- wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- RF_ASSERT(asmap->parityInfo->next == NULL); /* parityInfo must
- * describe entire
- * parity unit */
- }
- /* connect nodes to form graph */
-
- /* connect dag header to block node */
- RF_ASSERT(blockNode->numAntecedents == 0);
- dag_h->succedents[0] = blockNode;
-
- if (nRodNodes > 0) {
- /* connect the block node to the Rod nodes */
- RF_ASSERT(blockNode->numSuccedents == nRodNodes);
- RF_ASSERT(syncNode->numAntecedents == nRodNodes);
- for (i = 0; i < nRodNodes; i++) {
- RF_ASSERT(rodNodes[i].numAntecedents == 1);
- blockNode->succedents[i] = &rodNodes[i];
- rodNodes[i].antecedents[0] = blockNode;
- rodNodes[i].antType[0] = rf_control;
-
- /* connect the Rod nodes to the Nil node */
- RF_ASSERT(rodNodes[i].numSuccedents == 1);
- rodNodes[i].succedents[0] = syncNode;
- syncNode->antecedents[i] = &rodNodes[i];
- syncNode->antType[i] = rf_trueData;
- }
- } else {
- /* connect the block node to the Nil node */
- RF_ASSERT(blockNode->numSuccedents == 1);
- RF_ASSERT(syncNode->numAntecedents == 1);
- blockNode->succedents[0] = syncNode;
- syncNode->antecedents[0] = blockNode;
- syncNode->antType[0] = rf_control;
- }
-
- /* connect the sync node to the Wnd nodes */
- RF_ASSERT(syncNode->numSuccedents == (1 + nWndNodes));
- for (i = 0; i < nWndNodes; i++) {
- RF_ASSERT(wndNodes->numAntecedents == 1);
- syncNode->succedents[i] = &wndNodes[i];
- wndNodes[i].antecedents[0] = syncNode;
- wndNodes[i].antType[0] = rf_control;
- }
-
- /* connect the sync node to the Xor node */
- RF_ASSERT(xorNode->numAntecedents == 1);
- syncNode->succedents[nWndNodes] = xorNode;
- xorNode->antecedents[0] = syncNode;
- xorNode->antType[0] = rf_control;
-
- /* connect the xor node to the write parity node */
- RF_ASSERT(xorNode->numSuccedents == nfaults);
- RF_ASSERT(wnpNode->numAntecedents == 1);
- xorNode->succedents[0] = wnpNode;
- wnpNode->antecedents[0] = xorNode;
- wnpNode->antType[0] = rf_trueData;
- if (nfaults == 2) {
- RF_ASSERT(wnqNode->numAntecedents == 1);
- xorNode->succedents[1] = wnqNode;
- wnqNode->antecedents[0] = xorNode;
- wnqNode->antType[0] = rf_trueData;
- }
- /* connect the write nodes to the term node */
- RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults);
- RF_ASSERT(termNode->numSuccedents == 0);
- for (i = 0; i < nWndNodes; i++) {
- RF_ASSERT(wndNodes->numSuccedents == 1);
- wndNodes[i].succedents[0] = termNode;
- termNode->antecedents[i] = &wndNodes[i];
- termNode->antType[i] = rf_control;
- }
- RF_ASSERT(wnpNode->numSuccedents == 1);
- wnpNode->succedents[0] = termNode;
- termNode->antecedents[nWndNodes] = wnpNode;
- termNode->antType[nWndNodes] = rf_control;
- if (nfaults == 2) {
- RF_ASSERT(wnqNode->numSuccedents == 1);
- wnqNode->succedents[0] = termNode;
- termNode->antecedents[nWndNodes + 1] = wnqNode;
- termNode->antType[nWndNodes + 1] = rf_control;
- }
-}
-
-
-/******************************************************************************
- *
- * creates a DAG to perform a small-write operation (either raid 5 or pq),
- * which is as follows:
- *
- * Hdr -> Nil -> Rop - Xor - Wnp [Unp] -- Trm
- * \- Rod X- Wnd [Und] -------/
- * [\- Rod X- Wnd [Und] ------/]
- * [\- Roq - Q --> Wnq [Unq]-/]
- *
- * Rop = read old parity
- * Rod = read old data
- * Roq = read old "q"
- * Cmt = commit node
- * Und = unlock data disk
- * Unp = unlock parity disk
- * Unq = unlock q disk
- * Wnp = write new parity
- * Wnd = write new data
- * Wnq = write new "q"
- * [ ] denotes optional segments in the graph
- *
- * Parameters: raidPtr - description of the physical array
- * asmap - logical & physical addresses for this access
- * bp - buffer ptr (holds write data)
- * flags - general flags (e.g. disk locking)
- * allocList - list of memory allocated in DAG creation
- * pfuncs - list of parity generating functions
- * qfuncs - list of q generating functions
- *
- * A null qfuncs indicates single fault tolerant
- *****************************************************************************/
-
-void
-rf_CommonCreateSmallWriteDAGFwd(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h,
- void *bp,
- RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList,
- RF_RedFuncs_t * pfuncs,
- RF_RedFuncs_t * qfuncs)
-{
- RF_DagNode_t *readDataNodes, *readParityNodes, *readQNodes, *termNode;
- RF_DagNode_t *unlockDataNodes, *unlockParityNodes, *unlockQNodes;
- RF_DagNode_t *xorNodes, *qNodes, *blockNode, *nodes;
- RF_DagNode_t *writeDataNodes, *writeParityNodes, *writeQNodes;
- int i, j, nNodes, totalNumNodes, lu_flag;
- RF_ReconUnitNum_t which_ru;
- int (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *);
- int (*qfunc) (RF_DagNode_t *);
- int numDataNodes, numParityNodes;
- RF_StripeNum_t parityStripeID;
- RF_PhysDiskAddr_t *pda;
- char *name, *qname;
- long nfaults;
-
- nfaults = qfuncs ? 2 : 1;
- lu_flag = (rf_enableAtomicRMW) ? 1 : 0; /* lock/unlock flag */
-
- parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru);
- pda = asmap->physInfo;
- numDataNodes = asmap->numStripeUnitsAccessed;
- numParityNodes = (asmap->parityInfo->next) ? 2 : 1;
-
- if (rf_dagDebug)
- printf("[Creating small-write DAG]\n");
- RF_ASSERT(numDataNodes > 0);
- dag_h->creator = "SmallWriteDAGFwd";
-
- dag_h->numCommitNodes = 0;
- dag_h->numCommits = 0;
- dag_h->numSuccedents = 1;
-
- qfunc = NULL;
- qname = NULL;
-
- /* DAG creation occurs in four steps: 1. count the number of nodes in
- * the DAG 2. create the nodes 3. initialize the nodes 4. connect the
- * nodes */
-
- /* Step 1. compute number of nodes in the graph */
-
- /* number of nodes: a read and write for each data unit a redundancy
- * computation node for each parity node (nfaults * nparity) a read
- * and write for each parity unit a block node a terminate node if
- * atomic RMW an unlock node for each data unit, redundancy unit */
- totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes) + (nfaults * 2 * numParityNodes) + 2;
- if (lu_flag)
- totalNumNodes += (numDataNodes + (nfaults * numParityNodes));
-
-
- /* Step 2. create the nodes */
- RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList);
- i = 0;
- blockNode = &nodes[i];
- i += 1;
- readDataNodes = &nodes[i];
- i += numDataNodes;
- readParityNodes = &nodes[i];
- i += numParityNodes;
- writeDataNodes = &nodes[i];
- i += numDataNodes;
- writeParityNodes = &nodes[i];
- i += numParityNodes;
- xorNodes = &nodes[i];
- i += numParityNodes;
- termNode = &nodes[i];
- i += 1;
- if (lu_flag) {
- unlockDataNodes = &nodes[i];
- i += numDataNodes;
- unlockParityNodes = &nodes[i];
- i += numParityNodes;
- } else {
- unlockDataNodes = unlockParityNodes = NULL;
- }
- if (nfaults == 2) {
- readQNodes = &nodes[i];
- i += numParityNodes;
- writeQNodes = &nodes[i];
- i += numParityNodes;
- qNodes = &nodes[i];
- i += numParityNodes;
- if (lu_flag) {
- unlockQNodes = &nodes[i];
- i += numParityNodes;
- } else {
- unlockQNodes = NULL;
- }
- } else {
- readQNodes = writeQNodes = qNodes = unlockQNodes = NULL;
- }
- RF_ASSERT(i == totalNumNodes);
-
- /* Step 3. initialize the nodes */
- /* initialize block node (Nil) */
- nNodes = numDataNodes + (nfaults * numParityNodes);
- rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", allocList);
-
- /* initialize terminate node (Trm) */
- rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, nNodes, 0, 0, dag_h, "Trm", allocList);
-
- /* initialize nodes which read old data (Rod) */
- for (i = 0; i < numDataNodes; i++) {
- rf_InitNode(&readDataNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, (numParityNodes * nfaults) + 1, 1, 4, 0, dag_h, "Rod", allocList);
- RF_ASSERT(pda != NULL);
- readDataNodes[i].params[0].p = pda; /* physical disk addr
- * desc */
- readDataNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old
- * data */
- readDataNodes[i].params[2].v = parityStripeID;
- readDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru);
- pda = pda->next;
- for (j = 0; j < readDataNodes[i].numSuccedents; j++)
- readDataNodes[i].propList[j] = NULL;
- }
-
- /* initialize nodes which read old parity (Rop) */
- pda = asmap->parityInfo;
- i = 0;
- for (i = 0; i < numParityNodes; i++) {
- RF_ASSERT(pda != NULL);
- rf_InitNode(&readParityNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, numParityNodes, 1, 4, 0, dag_h, "Rop", allocList);
- readParityNodes[i].params[0].p = pda;
- readParityNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old
- * parity */
- readParityNodes[i].params[2].v = parityStripeID;
- readParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru);
- for (j = 0; j < readParityNodes[i].numSuccedents; j++)
- readParityNodes[i].propList[0] = NULL;
- pda = pda->next;
- }
-
- /* initialize nodes which read old Q (Roq) */
- if (nfaults == 2) {
- pda = asmap->qInfo;
- for (i = 0; i < numParityNodes; i++) {
- RF_ASSERT(pda != NULL);
- rf_InitNode(&readQNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, numParityNodes, 1, 4, 0, dag_h, "Roq", allocList);
- readQNodes[i].params[0].p = pda;
- readQNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old Q */
- readQNodes[i].params[2].v = parityStripeID;
- readQNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru);
- for (j = 0; j < readQNodes[i].numSuccedents; j++)
- readQNodes[i].propList[0] = NULL;
- pda = pda->next;
- }
- }
- /* initialize nodes which write new data (Wnd) */
- pda = asmap->physInfo;
- for (i = 0; i < numDataNodes; i++) {
- RF_ASSERT(pda != NULL);
- rf_InitNode(&writeDataNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList);
- writeDataNodes[i].params[0].p = pda; /* physical disk addr
- * desc */
- writeDataNodes[i].params[1].p = pda->bufPtr; /* buffer holding new
- * data to be written */
- writeDataNodes[i].params[2].v = parityStripeID;
- writeDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
-
- if (lu_flag) {
- /* initialize node to unlock the disk queue */
- rf_InitNode(&unlockDataNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Und", allocList);
- unlockDataNodes[i].params[0].p = pda; /* physical disk addr
- * desc */
- unlockDataNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru);
- }
- pda = pda->next;
- }
-
-
- /* initialize nodes which compute new parity and Q */
- /* we use the simple XOR func in the double-XOR case, and when we're
- * accessing only a portion of one stripe unit. the distinction
- * between the two is that the regular XOR func assumes that the
- * targbuf is a full SU in size, and examines the pda associated with
- * the buffer to decide where within the buffer to XOR the data,
- * whereas the simple XOR func just XORs the data into the start of
- * the buffer. */
- if ((numParityNodes == 2) || ((numDataNodes == 1) && (asmap->totalSectorsAccessed < raidPtr->Layout.sectorsPerStripeUnit))) {
- func = pfuncs->simple;
- undoFunc = rf_NullNodeUndoFunc;
- name = pfuncs->SimpleName;
- if (qfuncs) {
- qfunc = qfuncs->simple;
- qname = qfuncs->SimpleName;
- }
- } else {
- func = pfuncs->regular;
- undoFunc = rf_NullNodeUndoFunc;
- name = pfuncs->RegularName;
- if (qfuncs) {
- qfunc = qfuncs->regular;
- qname = qfuncs->RegularName;
- }
- }
- /* initialize the xor nodes: params are {pda,buf} from {Rod,Wnd,Rop}
- * nodes, and raidPtr */
- if (numParityNodes == 2) { /* double-xor case */
- for (i = 0; i < numParityNodes; i++) {
- rf_InitNode(&xorNodes[i], rf_wait, RF_FALSE, func, undoFunc, NULL, numParityNodes, numParityNodes + numDataNodes, 7, 1, dag_h, name, allocList); /* no wakeup func for
- * xor */
- xorNodes[i].flags |= RF_DAGNODE_FLAG_YIELD;
- xorNodes[i].params[0] = readDataNodes[i].params[0];
- xorNodes[i].params[1] = readDataNodes[i].params[1];
- xorNodes[i].params[2] = readParityNodes[i].params[0];
- xorNodes[i].params[3] = readParityNodes[i].params[1];
- xorNodes[i].params[4] = writeDataNodes[i].params[0];
- xorNodes[i].params[5] = writeDataNodes[i].params[1];
- xorNodes[i].params[6].p = raidPtr;
- xorNodes[i].results[0] = readParityNodes[i].params[1].p; /* use old parity buf as
- * target buf */
- if (nfaults == 2) {
- rf_InitNode(&qNodes[i], rf_wait, RF_FALSE, qfunc, undoFunc, NULL, numParityNodes, numParityNodes + numDataNodes, 7, 1, dag_h, qname, allocList); /* no wakeup func for
- * xor */
- qNodes[i].params[0] = readDataNodes[i].params[0];
- qNodes[i].params[1] = readDataNodes[i].params[1];
- qNodes[i].params[2] = readQNodes[i].params[0];
- qNodes[i].params[3] = readQNodes[i].params[1];
- qNodes[i].params[4] = writeDataNodes[i].params[0];
- qNodes[i].params[5] = writeDataNodes[i].params[1];
- qNodes[i].params[6].p = raidPtr;
- qNodes[i].results[0] = readQNodes[i].params[1].p; /* use old Q buf as
- * target buf */
- }
- }
- } else {
- /* there is only one xor node in this case */
- rf_InitNode(&xorNodes[0], rf_wait, RF_FALSE, func, undoFunc, NULL, numParityNodes, numParityNodes + numDataNodes, (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, name, allocList);
- xorNodes[0].flags |= RF_DAGNODE_FLAG_YIELD;
- for (i = 0; i < numDataNodes + 1; i++) {
- /* set up params related to Rod and Rop nodes */
- xorNodes[0].params[2 * i + 0] = readDataNodes[i].params[0]; /* pda */
- xorNodes[0].params[2 * i + 1] = readDataNodes[i].params[1]; /* buffer pointer */
- }
- for (i = 0; i < numDataNodes; i++) {
- /* set up params related to Wnd and Wnp nodes */
- xorNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = writeDataNodes[i].params[0]; /* pda */
- xorNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = writeDataNodes[i].params[1]; /* buffer pointer */
- }
- xorNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr; /* xor node needs to get
- * at RAID information */
- xorNodes[0].results[0] = readParityNodes[0].params[1].p;
- if (nfaults == 2) {
- rf_InitNode(&qNodes[0], rf_wait, RF_FALSE, qfunc, undoFunc, NULL, numParityNodes, numParityNodes + numDataNodes, (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, qname, allocList);
- for (i = 0; i < numDataNodes; i++) {
- /* set up params related to Rod */
- qNodes[0].params[2 * i + 0] = readDataNodes[i].params[0]; /* pda */
- qNodes[0].params[2 * i + 1] = readDataNodes[i].params[1]; /* buffer pointer */
- }
- /* and read old q */
- qNodes[0].params[2 * numDataNodes + 0] = readQNodes[0].params[0]; /* pda */
- qNodes[0].params[2 * numDataNodes + 1] = readQNodes[0].params[1]; /* buffer pointer */
- for (i = 0; i < numDataNodes; i++) {
- /* set up params related to Wnd nodes */
- qNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = writeDataNodes[i].params[0]; /* pda */
- qNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = writeDataNodes[i].params[1]; /* buffer pointer */
- }
- qNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr; /* xor node needs to get
- * at RAID information */
- qNodes[0].results[0] = readQNodes[0].params[1].p;
- }
- }
-
- /* initialize nodes which write new parity (Wnp) */
- pda = asmap->parityInfo;
- for (i = 0; i < numParityNodes; i++) {
- rf_InitNode(&writeParityNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, numParityNodes, 4, 0, dag_h, "Wnp", allocList);
- RF_ASSERT(pda != NULL);
- writeParityNodes[i].params[0].p = pda; /* param 1 (bufPtr)
- * filled in by xor node */
- writeParityNodes[i].params[1].p = xorNodes[i].results[0]; /* buffer pointer for
- * parity write
- * operation */
- writeParityNodes[i].params[2].v = parityStripeID;
- writeParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
-
- if (lu_flag) {
- /* initialize node to unlock the disk queue */
- rf_InitNode(&unlockParityNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Unp", allocList);
- unlockParityNodes[i].params[0].p = pda; /* physical disk addr
- * desc */
- unlockParityNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru);
- }
- pda = pda->next;
- }
-
- /* initialize nodes which write new Q (Wnq) */
- if (nfaults == 2) {
- pda = asmap->qInfo;
- for (i = 0; i < numParityNodes; i++) {
- rf_InitNode(&writeQNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, numParityNodes, 4, 0, dag_h, "Wnq", allocList);
- RF_ASSERT(pda != NULL);
- writeQNodes[i].params[0].p = pda; /* param 1 (bufPtr)
- * filled in by xor node */
- writeQNodes[i].params[1].p = qNodes[i].results[0]; /* buffer pointer for
- * parity write
- * operation */
- writeQNodes[i].params[2].v = parityStripeID;
- writeQNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
-
- if (lu_flag) {
- /* initialize node to unlock the disk queue */
- rf_InitNode(&unlockQNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Unq", allocList);
- unlockQNodes[i].params[0].p = pda; /* physical disk addr
- * desc */
- unlockQNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru);
- }
- pda = pda->next;
- }
- }
- /* Step 4. connect the nodes */
-
- /* connect header to block node */
- dag_h->succedents[0] = blockNode;
-
- /* connect block node to read old data nodes */
- RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults)));
- for (i = 0; i < numDataNodes; i++) {
- blockNode->succedents[i] = &readDataNodes[i];
- RF_ASSERT(readDataNodes[i].numAntecedents == 1);
- readDataNodes[i].antecedents[0] = blockNode;
- readDataNodes[i].antType[0] = rf_control;
- }
-
- /* connect block node to read old parity nodes */
- for (i = 0; i < numParityNodes; i++) {
- blockNode->succedents[numDataNodes + i] = &readParityNodes[i];
- RF_ASSERT(readParityNodes[i].numAntecedents == 1);
- readParityNodes[i].antecedents[0] = blockNode;
- readParityNodes[i].antType[0] = rf_control;
- }
-
- /* connect block node to read old Q nodes */
- if (nfaults == 2)
- for (i = 0; i < numParityNodes; i++) {
- blockNode->succedents[numDataNodes + numParityNodes + i] = &readQNodes[i];
- RF_ASSERT(readQNodes[i].numAntecedents == 1);
- readQNodes[i].antecedents[0] = blockNode;
- readQNodes[i].antType[0] = rf_control;
- }
-
- /* connect read old data nodes to write new data nodes */
- for (i = 0; i < numDataNodes; i++) {
- RF_ASSERT(readDataNodes[i].numSuccedents == ((nfaults * numParityNodes) + 1));
- RF_ASSERT(writeDataNodes[i].numAntecedents == 1);
- readDataNodes[i].succedents[0] = &writeDataNodes[i];
- writeDataNodes[i].antecedents[0] = &readDataNodes[i];
- writeDataNodes[i].antType[0] = rf_antiData;
- }
-
- /* connect read old data nodes to xor nodes */
- for (i = 0; i < numDataNodes; i++) {
- for (j = 0; j < numParityNodes; j++) {
- RF_ASSERT(xorNodes[j].numAntecedents == numDataNodes + numParityNodes);
- readDataNodes[i].succedents[1 + j] = &xorNodes[j];
- xorNodes[j].antecedents[i] = &readDataNodes[i];
- xorNodes[j].antType[i] = rf_trueData;
- }
- }
-
- /* connect read old data nodes to q nodes */
- if (nfaults == 2)
- for (i = 0; i < numDataNodes; i++)
- for (j = 0; j < numParityNodes; j++) {
- RF_ASSERT(qNodes[j].numAntecedents == numDataNodes + numParityNodes);
- readDataNodes[i].succedents[1 + numParityNodes + j] = &qNodes[j];
- qNodes[j].antecedents[i] = &readDataNodes[i];
- qNodes[j].antType[i] = rf_trueData;
- }
-
- /* connect read old parity nodes to xor nodes */
- for (i = 0; i < numParityNodes; i++) {
- for (j = 0; j < numParityNodes; j++) {
- RF_ASSERT(readParityNodes[i].numSuccedents == numParityNodes);
- readParityNodes[i].succedents[j] = &xorNodes[j];
- xorNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i];
- xorNodes[j].antType[numDataNodes + i] = rf_trueData;
- }
- }
-
- /* connect read old q nodes to q nodes */
- if (nfaults == 2)
- for (i = 0; i < numParityNodes; i++) {
- for (j = 0; j < numParityNodes; j++) {
- RF_ASSERT(readQNodes[i].numSuccedents == numParityNodes);
- readQNodes[i].succedents[j] = &qNodes[j];
- qNodes[j].antecedents[numDataNodes + i] = &readQNodes[i];
- qNodes[j].antType[numDataNodes + i] = rf_trueData;
- }
- }
-
- /* connect xor nodes to the write new parity nodes */
- for (i = 0; i < numParityNodes; i++) {
- RF_ASSERT(writeParityNodes[i].numAntecedents == numParityNodes);
- for (j = 0; j < numParityNodes; j++) {
- RF_ASSERT(xorNodes[j].numSuccedents == numParityNodes);
- xorNodes[i].succedents[j] = &writeParityNodes[j];
- writeParityNodes[j].antecedents[i] = &xorNodes[i];
- writeParityNodes[j].antType[i] = rf_trueData;
- }
- }
-
- /* connect q nodes to the write new q nodes */
- if (nfaults == 2)
- for (i = 0; i < numParityNodes; i++) {
- RF_ASSERT(writeQNodes[i].numAntecedents == numParityNodes);
- for (j = 0; j < numParityNodes; j++) {
- RF_ASSERT(qNodes[j].numSuccedents == 1);
- qNodes[i].succedents[j] = &writeQNodes[j];
- writeQNodes[j].antecedents[i] = &qNodes[i];
- writeQNodes[j].antType[i] = rf_trueData;
- }
- }
-
- RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
- RF_ASSERT(termNode->numSuccedents == 0);
- for (i = 0; i < numDataNodes; i++) {
- if (lu_flag) {
- /* connect write new data nodes to unlock nodes */
- RF_ASSERT(writeDataNodes[i].numSuccedents == 1);
- RF_ASSERT(unlockDataNodes[i].numAntecedents == 1);
- writeDataNodes[i].succedents[0] = &unlockDataNodes[i];
- unlockDataNodes[i].antecedents[0] = &writeDataNodes[i];
- unlockDataNodes[i].antType[0] = rf_control;
-
- /* connect unlock nodes to term node */
- RF_ASSERT(unlockDataNodes[i].numSuccedents == 1);
- unlockDataNodes[i].succedents[0] = termNode;
- termNode->antecedents[i] = &unlockDataNodes[i];
- termNode->antType[i] = rf_control;
- } else {
- /* connect write new data nodes to term node */
- RF_ASSERT(writeDataNodes[i].numSuccedents == 1);
- RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
- writeDataNodes[i].succedents[0] = termNode;
- termNode->antecedents[i] = &writeDataNodes[i];
- termNode->antType[i] = rf_control;
- }
- }
-
- for (i = 0; i < numParityNodes; i++) {
- if (lu_flag) {
- /* connect write new parity nodes to unlock nodes */
- RF_ASSERT(writeParityNodes[i].numSuccedents == 1);
- RF_ASSERT(unlockParityNodes[i].numAntecedents == 1);
- writeParityNodes[i].succedents[0] = &unlockParityNodes[i];
- unlockParityNodes[i].antecedents[0] = &writeParityNodes[i];
- unlockParityNodes[i].antType[0] = rf_control;
-
- /* connect unlock nodes to term node */
- RF_ASSERT(unlockParityNodes[i].numSuccedents == 1);
- unlockParityNodes[i].succedents[0] = termNode;
- termNode->antecedents[numDataNodes + i] = &unlockParityNodes[i];
- termNode->antType[numDataNodes + i] = rf_control;
- } else {
- RF_ASSERT(writeParityNodes[i].numSuccedents == 1);
- writeParityNodes[i].succedents[0] = termNode;
- termNode->antecedents[numDataNodes + i] = &writeParityNodes[i];
- termNode->antType[numDataNodes + i] = rf_control;
- }
- }
-
- if (nfaults == 2)
- for (i = 0; i < numParityNodes; i++) {
- if (lu_flag) {
- /* connect write new Q nodes to unlock nodes */
- RF_ASSERT(writeQNodes[i].numSuccedents == 1);
- RF_ASSERT(unlockQNodes[i].numAntecedents == 1);
- writeQNodes[i].succedents[0] = &unlockQNodes[i];
- unlockQNodes[i].antecedents[0] = &writeQNodes[i];
- unlockQNodes[i].antType[0] = rf_control;
-
- /* connect unlock nodes to unblock node */
- RF_ASSERT(unlockQNodes[i].numSuccedents == 1);
- unlockQNodes[i].succedents[0] = termNode;
- termNode->antecedents[numDataNodes + numParityNodes + i] = &unlockQNodes[i];
- termNode->antType[numDataNodes + numParityNodes + i] = rf_control;
- } else {
- RF_ASSERT(writeQNodes[i].numSuccedents == 1);
- writeQNodes[i].succedents[0] = termNode;
- termNode->antecedents[numDataNodes + numParityNodes + i] = &writeQNodes[i];
- termNode->antType[numDataNodes + numParityNodes + i] = rf_control;
- }
- }
-}
-
-
-
-/******************************************************************************
- * create a write graph (fault-free or degraded) for RAID level 1
- *
- * Hdr Nil -> Wpd -> Nil -> Trm
- * Nil -> Wsd ->
- *
- * The "Wpd" node writes data to the primary copy in the mirror pair
- * The "Wsd" node writes data to the secondary copy in the mirror pair
- *
- * Parameters: raidPtr - description of the physical array
- * asmap - logical & physical addresses for this access
- * bp - buffer ptr (holds write data)
- * flags - general flags (e.g. disk locking)
- * allocList - list of memory allocated in DAG creation
- *****************************************************************************/
-
-void
-rf_CreateRaidOneWriteDAGFwd(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h,
- void *bp,
- RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList)
-{
- RF_DagNode_t *blockNode, *unblockNode, *termNode;
- RF_DagNode_t *nodes, *wndNode, *wmirNode;
- int nWndNodes, nWmirNodes, i;
- RF_ReconUnitNum_t which_ru;
- RF_PhysDiskAddr_t *pda, *pdaP;
- RF_StripeNum_t parityStripeID;
-
- parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
- asmap->raidAddress, &which_ru);
- if (rf_dagDebug) {
- printf("[Creating RAID level 1 write DAG]\n");
- }
- nWmirNodes = (asmap->parityInfo->next) ? 2 : 1; /* 2 implies access not
- * SU aligned */
- nWndNodes = (asmap->physInfo->next) ? 2 : 1;
-
- /* alloc the Wnd nodes and the Wmir node */
- if (asmap->numDataFailed == 1)
- nWndNodes--;
- if (asmap->numParityFailed == 1)
- nWmirNodes--;
-
- /* total number of nodes = nWndNodes + nWmirNodes + (block + unblock +
- * terminator) */
- RF_CallocAndAdd(nodes, nWndNodes + nWmirNodes + 3, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList);
- i = 0;
- wndNode = &nodes[i];
- i += nWndNodes;
- wmirNode = &nodes[i];
- i += nWmirNodes;
- blockNode = &nodes[i];
- i += 1;
- unblockNode = &nodes[i];
- i += 1;
- termNode = &nodes[i];
- i += 1;
- RF_ASSERT(i == (nWndNodes + nWmirNodes + 3));
-
- /* this dag can commit immediately */
- dag_h->numCommitNodes = 0;
- dag_h->numCommits = 0;
- dag_h->numSuccedents = 1;
-
- /* initialize the unblock and term nodes */
- rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, (nWndNodes + nWmirNodes), 0, 0, 0, dag_h, "Nil", allocList);
- rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, (nWndNodes + nWmirNodes), 0, 0, dag_h, "Nil", allocList);
- rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
-
- /* initialize the wnd nodes */
- if (nWndNodes > 0) {
- pda = asmap->physInfo;
- for (i = 0; i < nWndNodes; i++) {
- rf_InitNode(&wndNode[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wpd", allocList);
- RF_ASSERT(pda != NULL);
- wndNode[i].params[0].p = pda;
- wndNode[i].params[1].p = pda->bufPtr;
- wndNode[i].params[2].v = parityStripeID;
- wndNode[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- pda = pda->next;
- }
- RF_ASSERT(pda == NULL);
- }
- /* initialize the mirror nodes */
- if (nWmirNodes > 0) {
- pda = asmap->physInfo;
- pdaP = asmap->parityInfo;
- for (i = 0; i < nWmirNodes; i++) {
- rf_InitNode(&wmirNode[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wsd", allocList);
- RF_ASSERT(pda != NULL);
- wmirNode[i].params[0].p = pdaP;
- wmirNode[i].params[1].p = pda->bufPtr;
- wmirNode[i].params[2].v = parityStripeID;
- wmirNode[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- pda = pda->next;
- pdaP = pdaP->next;
- }
- RF_ASSERT(pda == NULL);
- RF_ASSERT(pdaP == NULL);
- }
- /* link the header node to the block node */
- RF_ASSERT(dag_h->numSuccedents == 1);
- RF_ASSERT(blockNode->numAntecedents == 0);
- dag_h->succedents[0] = blockNode;
-
- /* link the block node to the write nodes */
- RF_ASSERT(blockNode->numSuccedents == (nWndNodes + nWmirNodes));
- for (i = 0; i < nWndNodes; i++) {
- RF_ASSERT(wndNode[i].numAntecedents == 1);
- blockNode->succedents[i] = &wndNode[i];
- wndNode[i].antecedents[0] = blockNode;
- wndNode[i].antType[0] = rf_control;
- }
- for (i = 0; i < nWmirNodes; i++) {
- RF_ASSERT(wmirNode[i].numAntecedents == 1);
- blockNode->succedents[i + nWndNodes] = &wmirNode[i];
- wmirNode[i].antecedents[0] = blockNode;
- wmirNode[i].antType[0] = rf_control;
- }
-
- /* link the write nodes to the unblock node */
- RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes));
- for (i = 0; i < nWndNodes; i++) {
- RF_ASSERT(wndNode[i].numSuccedents == 1);
- wndNode[i].succedents[0] = unblockNode;
- unblockNode->antecedents[i] = &wndNode[i];
- unblockNode->antType[i] = rf_control;
- }
- for (i = 0; i < nWmirNodes; i++) {
- RF_ASSERT(wmirNode[i].numSuccedents == 1);
- wmirNode[i].succedents[0] = unblockNode;
- unblockNode->antecedents[i + nWndNodes] = &wmirNode[i];
- unblockNode->antType[i + nWndNodes] = rf_control;
- }
-
- /* link the unblock node to the term node */
- RF_ASSERT(unblockNode->numSuccedents == 1);
- RF_ASSERT(termNode->numAntecedents == 1);
- RF_ASSERT(termNode->numSuccedents == 0);
- unblockNode->succedents[0] = termNode;
- termNode->antecedents[0] = unblockNode;
- termNode->antType[0] = rf_control;
-
- return;
-}
diff --git a/sys/dev/raidframe/rf_dagffwr.h b/sys/dev/raidframe/rf_dagffwr.h
deleted file mode 100644
index f65875e..0000000
--- a/sys/dev/raidframe/rf_dagffwr.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_dagffwr.h,v 1.3 1999/02/05 00:06:08 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#ifndef _RF__RF_DAGFFWR_H_
-#define _RF__RF_DAGFFWR_H_
-
-#include <dev/raidframe/rf_types.h>
-
-/* fault-free write DAG creation routines */
-void
-rf_CreateNonRedundantWriteDAG(RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp,
- RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList,
- RF_IoType_t type);
-void
-rf_CreateRAID0WriteDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList, RF_IoType_t type);
-void
-rf_CreateSmallWriteDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList);
-void
-rf_CreateLargeWriteDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList);
-void
-rf_CommonCreateLargeWriteDAG(RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp,
- RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, int nfaults,
- int (*redFunc) (RF_DagNode_t *), int allowBufferRecycle);
- void rf_CommonCreateLargeWriteDAGFwd(RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp,
- RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, int nfaults,
- int (*redFunc) (RF_DagNode_t *), int allowBufferRecycle);
- void rf_CommonCreateSmallWriteDAG(RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp,
- RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList,
- RF_RedFuncs_t * pfuncs, RF_RedFuncs_t * qfuncs);
- void rf_CommonCreateSmallWriteDAGFwd(RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp,
- RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList,
- RF_RedFuncs_t * pfuncs, RF_RedFuncs_t * qfuncs);
- void rf_CreateRaidOneWriteDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList);
- void rf_CreateRaidOneWriteDAGFwd(RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp,
- RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList);
-
-#endif /* !_RF__RF_DAGFFWR_H_ */
diff --git a/sys/dev/raidframe/rf_dagflags.h b/sys/dev/raidframe/rf_dagflags.h
deleted file mode 100644
index b0777bd..0000000
--- a/sys/dev/raidframe/rf_dagflags.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_dagflags.h,v 1.3 1999/02/05 00:06:08 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/**************************************************************************************
- *
- * dagflags.h -- flags that can be given to DoAccess
- * I pulled these out of dag.h because routines that call DoAccess may need these flags,
- * but certainly do not need the declarations related to the DAG data structures.
- *
- **************************************************************************************/
-
-
-#ifndef _RF__RF_DAGFLAGS_H_
-#define _RF__RF_DAGFLAGS_H_
-
-/*
- * Bitmasks for the "flags" parameter (RF_RaidAccessFlags_t) used
- * by DoAccess, SelectAlgorithm, and the DAG creation routines.
- *
- * If USE_DAG or USE_ASM is specified, neither the DAG nor the ASM
- * will be modified, which means that you can't SUPRESS if you
- * specify USE_DAG.
- */
-
-#define RF_DAG_FLAGS_NONE 0 /* no flags */
-#define RF_DAG_SUPPRESS_LOCKS (1<<0) /* supress all stripe locks in
- * the DAG */
-#define RF_DAG_RETURN_ASM (1<<1) /* create an ASM and return it
- * instead of freeing it */
-#define RF_DAG_RETURN_DAG (1<<2) /* create a DAG and return it
- * instead of freeing it */
-#define RF_DAG_NONBLOCKING_IO (1<<3) /* cause DoAccess to be
- * non-blocking */
-#define RF_DAG_ACCESS_COMPLETE (1<<4) /* the access is complete */
-#define RF_DAG_DISPATCH_RETURNED (1<<5) /* used to handle the case
- * where the dag invokes no
- * I/O */
-#define RF_DAG_TEST_ACCESS (1<<6) /* this access came through
- * rf_ioctl instead of
- * rf_strategy */
-
-#endif /* !_RF__RF_DAGFLAGS_H_ */
diff --git a/sys/dev/raidframe/rf_dagfuncs.c b/sys/dev/raidframe/rf_dagfuncs.c
deleted file mode 100644
index a1ba8150..0000000
--- a/sys/dev/raidframe/rf_dagfuncs.c
+++ /dev/null
@@ -1,906 +0,0 @@
-/* $NetBSD: rf_dagfuncs.c,v 1.7 2001/02/03 12:51:10 mrg Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland, William V. Courtright II
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*
- * dagfuncs.c -- DAG node execution routines
- *
- * Rules:
- * 1. Every DAG execution function must eventually cause node->status to
- * get set to "good" or "bad", and "FinishNode" to be called. In the
- * case of nodes that complete immediately (xor, NullNodeFunc, etc),
- * the node execution function can do these two things directly. In
- * the case of nodes that have to wait for some event (a disk read to
- * complete, a lock to be released, etc) to occur before they can
- * complete, this is typically achieved by having whatever module
- * is doing the operation call GenericWakeupFunc upon completion.
- * 2. DAG execution functions should check the status in the DAG header
- * and NOP out their operations if the status is not "enable". However,
- * execution functions that release resources must be sure to release
- * them even when they NOP out the function that would use them.
- * Functions that acquire resources should go ahead and acquire them
- * even when they NOP, so that a downstream release node will not have
- * to check to find out whether or not the acquire was suppressed.
- */
-
-#include <sys/param.h>
-#if defined(__NetBSD__)
-#include <sys/ioctl.h>
-#elif defined(__FreeBSD__)
-#include <sys/ioccom.h>
-#include <sys/filio.h>
-#endif
-
-#include <dev/raidframe/rf_archs.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_layout.h>
-#include <dev/raidframe/rf_etimer.h>
-#include <dev/raidframe/rf_acctrace.h>
-#include <dev/raidframe/rf_diskqueue.h>
-#include <dev/raidframe/rf_dagfuncs.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_engine.h>
-#include <dev/raidframe/rf_dagutils.h>
-
-#include <dev/raidframe/rf_kintf.h>
-
-#if RF_INCLUDE_PARITYLOGGING > 0
-#include <dev/raidframe/rf_paritylog.h>
-#endif /* RF_INCLUDE_PARITYLOGGING > 0 */
-
-int (*rf_DiskReadFunc) (RF_DagNode_t *);
-int (*rf_DiskWriteFunc) (RF_DagNode_t *);
-int (*rf_DiskReadUndoFunc) (RF_DagNode_t *);
-int (*rf_DiskWriteUndoFunc) (RF_DagNode_t *);
-int (*rf_DiskUnlockFunc) (RF_DagNode_t *);
-int (*rf_DiskUnlockUndoFunc) (RF_DagNode_t *);
-int (*rf_RegularXorUndoFunc) (RF_DagNode_t *);
-int (*rf_SimpleXorUndoFunc) (RF_DagNode_t *);
-int (*rf_RecoveryXorUndoFunc) (RF_DagNode_t *);
-
-/*****************************************************************************************
- * main (only) configuration routine for this module
- ****************************************************************************************/
-int
-rf_ConfigureDAGFuncs(listp)
- RF_ShutdownList_t **listp;
-{
- RF_ASSERT(((sizeof(long) == 8) && RF_LONGSHIFT == 3) || ((sizeof(long) == 4) && RF_LONGSHIFT == 2));
- rf_DiskReadFunc = rf_DiskReadFuncForThreads;
- rf_DiskReadUndoFunc = rf_DiskUndoFunc;
- rf_DiskWriteFunc = rf_DiskWriteFuncForThreads;
- rf_DiskWriteUndoFunc = rf_DiskUndoFunc;
- rf_DiskUnlockFunc = rf_DiskUnlockFuncForThreads;
- rf_DiskUnlockUndoFunc = rf_NullNodeUndoFunc;
- rf_RegularXorUndoFunc = rf_NullNodeUndoFunc;
- rf_SimpleXorUndoFunc = rf_NullNodeUndoFunc;
- rf_RecoveryXorUndoFunc = rf_NullNodeUndoFunc;
- return (0);
-}
-
-
-
-/*****************************************************************************************
- * the execution function associated with a terminate node
- ****************************************************************************************/
-int
-rf_TerminateFunc(node)
- RF_DagNode_t *node;
-{
- RF_ASSERT(node->dagHdr->numCommits == node->dagHdr->numCommitNodes);
- node->status = rf_good;
- return (rf_FinishNode(node, RF_THREAD_CONTEXT));
-}
-
-int
-rf_TerminateUndoFunc(node)
- RF_DagNode_t *node;
-{
- return (0);
-}
-
-
-/*****************************************************************************************
- * execution functions associated with a mirror node
- *
- * parameters:
- *
- * 0 - physical disk addres of data
- * 1 - buffer for holding read data
- * 2 - parity stripe ID
- * 3 - flags
- * 4 - physical disk address of mirror (parity)
- *
- ****************************************************************************************/
-
-int
-rf_DiskReadMirrorIdleFunc(node)
- RF_DagNode_t *node;
-{
- /* select the mirror copy with the shortest queue and fill in node
- * parameters with physical disk address */
-
- rf_SelectMirrorDiskIdle(node);
- return (rf_DiskReadFunc(node));
-}
-
-int
-rf_DiskReadMirrorPartitionFunc(node)
- RF_DagNode_t *node;
-{
- /* select the mirror copy with the shortest queue and fill in node
- * parameters with physical disk address */
-
- rf_SelectMirrorDiskPartition(node);
- return (rf_DiskReadFunc(node));
-}
-
-int
-rf_DiskReadMirrorUndoFunc(node)
- RF_DagNode_t *node;
-{
- return (0);
-}
-
-
-
-#if RF_INCLUDE_PARITYLOGGING > 0
-/*****************************************************************************************
- * the execution function associated with a parity log update node
- ****************************************************************************************/
-int
-rf_ParityLogUpdateFunc(node)
- RF_DagNode_t *node;
-{
- RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
- caddr_t buf = (caddr_t) node->params[1].p;
- RF_ParityLogData_t *logData;
- RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
- RF_Etimer_t timer;
-
- if (node->dagHdr->status == rf_enable) {
- RF_ETIMER_START(timer);
- logData = rf_CreateParityLogData(RF_UPDATE, pda, buf,
- (RF_Raid_t *) (node->dagHdr->raidPtr),
- node->wakeFunc, (void *) node,
- node->dagHdr->tracerec, timer);
- if (logData)
- rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE);
- else {
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- tracerec->plog_us += RF_ETIMER_VAL_US(timer);
- (node->wakeFunc) (node, ENOMEM);
- }
- }
- return (0);
-}
-
-
-/*****************************************************************************************
- * the execution function associated with a parity log overwrite node
- ****************************************************************************************/
-int
-rf_ParityLogOverwriteFunc(node)
- RF_DagNode_t *node;
-{
- RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
- caddr_t buf = (caddr_t) node->params[1].p;
- RF_ParityLogData_t *logData;
- RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
- RF_Etimer_t timer;
-
- if (node->dagHdr->status == rf_enable) {
- RF_ETIMER_START(timer);
- logData = rf_CreateParityLogData(RF_OVERWRITE, pda, buf, (RF_Raid_t *) (node->dagHdr->raidPtr),
- node->wakeFunc, (void *) node, node->dagHdr->tracerec, timer);
- if (logData)
- rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE);
- else {
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- tracerec->plog_us += RF_ETIMER_VAL_US(timer);
- (node->wakeFunc) (node, ENOMEM);
- }
- }
- return (0);
-}
-#else /* RF_INCLUDE_PARITYLOGGING > 0 */
-
-int
-rf_ParityLogUpdateFunc(node)
- RF_DagNode_t *node;
-{
- return (0);
-}
-int
-rf_ParityLogOverwriteFunc(node)
- RF_DagNode_t *node;
-{
- return (0);
-}
-#endif /* RF_INCLUDE_PARITYLOGGING > 0 */
-
-int
-rf_ParityLogUpdateUndoFunc(node)
- RF_DagNode_t *node;
-{
- return (0);
-}
-
-int
-rf_ParityLogOverwriteUndoFunc(node)
- RF_DagNode_t *node;
-{
- return (0);
-}
-/*****************************************************************************************
- * the execution function associated with a NOP node
- ****************************************************************************************/
-int
-rf_NullNodeFunc(node)
- RF_DagNode_t *node;
-{
- node->status = rf_good;
- return (rf_FinishNode(node, RF_THREAD_CONTEXT));
-}
-
-int
-rf_NullNodeUndoFunc(node)
- RF_DagNode_t *node;
-{
- node->status = rf_undone;
- return (rf_FinishNode(node, RF_THREAD_CONTEXT));
-}
-
-
-/*****************************************************************************************
- * the execution function associated with a disk-read node
- ****************************************************************************************/
-int
-rf_DiskReadFuncForThreads(node)
- RF_DagNode_t *node;
-{
- RF_DiskQueueData_t *req;
- RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
- caddr_t buf = (caddr_t) node->params[1].p;
- RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v;
- unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v);
- unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v);
- unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v);
- unsigned which_ru = RF_EXTRACT_RU(node->params[3].v);
- RF_DiskQueueDataFlags_t flags = 0;
- RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_READ : RF_IO_TYPE_NOP;
- RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
- void *b_proc = NULL;
-
-#if defined(__NetBSD__)
- if (node->dagHdr->bp)
- b_proc = (void *) ((RF_Buf_t) node->dagHdr->bp)->b_proc;
-#endif
-
- RF_ASSERT(!(lock && unlock));
- flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0;
- flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0;
-
- req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector,
- buf, parityStripeID, which_ru,
- (int (*) (void *, int)) node->wakeFunc,
- node, NULL, node->dagHdr->tracerec,
- (void *) (node->dagHdr->raidPtr), flags, b_proc);
- if (!req) {
- (node->wakeFunc) (node, ENOMEM);
- } else {
- node->dagFuncData = (void *) req;
- rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority);
- }
- return (0);
-}
-
-
-/*****************************************************************************************
- * the execution function associated with a disk-write node
- ****************************************************************************************/
-int
-rf_DiskWriteFuncForThreads(node)
- RF_DagNode_t *node;
-{
- RF_DiskQueueData_t *req;
- RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
- caddr_t buf = (caddr_t) node->params[1].p;
- RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v;
- unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v);
- unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v);
- unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v);
- unsigned which_ru = RF_EXTRACT_RU(node->params[3].v);
- RF_DiskQueueDataFlags_t flags = 0;
- RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_WRITE : RF_IO_TYPE_NOP;
- RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
- void *b_proc = NULL;
-
-#if defined(__NetBSD__)
- if (node->dagHdr->bp)
- b_proc = (void *) ((RF_Buf_t) node->dagHdr->bp)->b_proc;
-#endif
-
- /* normal processing (rollaway or forward recovery) begins here */
- RF_ASSERT(!(lock && unlock));
- flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0;
- flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0;
- req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector,
- buf, parityStripeID, which_ru,
- (int (*) (void *, int)) node->wakeFunc,
- (void *) node, NULL,
- node->dagHdr->tracerec,
- (void *) (node->dagHdr->raidPtr),
- flags, b_proc);
-
- if (!req) {
- (node->wakeFunc) (node, ENOMEM);
- } else {
- node->dagFuncData = (void *) req;
- rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority);
- }
-
- return (0);
-}
-/*****************************************************************************************
- * the undo function for disk nodes
- * Note: this is not a proper undo of a write node, only locks are released.
- * old data is not restored to disk!
- ****************************************************************************************/
-int
-rf_DiskUndoFunc(node)
- RF_DagNode_t *node;
-{
- RF_DiskQueueData_t *req;
- RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
- RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
-
- req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP,
- 0L, 0, NULL, 0L, 0,
- (int (*) (void *, int)) node->wakeFunc,
- (void *) node,
- NULL, node->dagHdr->tracerec,
- (void *) (node->dagHdr->raidPtr),
- RF_UNLOCK_DISK_QUEUE, NULL);
- if (!req)
- (node->wakeFunc) (node, ENOMEM);
- else {
- node->dagFuncData = (void *) req;
- rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, RF_IO_NORMAL_PRIORITY);
- }
-
- return (0);
-}
-/*****************************************************************************************
- * the execution function associated with an "unlock disk queue" node
- ****************************************************************************************/
-int
-rf_DiskUnlockFuncForThreads(node)
- RF_DagNode_t *node;
-{
- RF_DiskQueueData_t *req;
- RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
- RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
-
- req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP,
- 0L, 0, NULL, 0L, 0,
- (int (*) (void *, int)) node->wakeFunc,
- (void *) node,
- NULL, node->dagHdr->tracerec,
- (void *) (node->dagHdr->raidPtr),
- RF_UNLOCK_DISK_QUEUE, NULL);
- if (!req)
- (node->wakeFunc) (node, ENOMEM);
- else {
- node->dagFuncData = (void *) req;
- rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, RF_IO_NORMAL_PRIORITY);
- }
-
- return (0);
-}
-/*****************************************************************************************
- * Callback routine for DiskRead and DiskWrite nodes. When the disk op completes,
- * the routine is called to set the node status and inform the execution engine that
- * the node has fired.
- ****************************************************************************************/
-int
-rf_GenericWakeupFunc(node, status)
- RF_DagNode_t *node;
- int status;
-{
- switch (node->status) {
- case rf_bwd1:
- node->status = rf_bwd2;
- if (node->dagFuncData)
- rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData);
- return (rf_DiskWriteFuncForThreads(node));
- break;
- case rf_fired:
- if (status)
- node->status = rf_bad;
- else
- node->status = rf_good;
- break;
- case rf_recover:
- /* probably should never reach this case */
- if (status)
- node->status = rf_panic;
- else
- node->status = rf_undone;
- break;
- default:
- printf("rf_GenericWakeupFunc:");
- printf("node->status is %d,", node->status);
- printf("status is %d \n", status);
- RF_PANIC();
- break;
- }
- if (node->dagFuncData)
- rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData);
- return (rf_FinishNode(node, RF_INTR_CONTEXT));
-}
-
-
-/*****************************************************************************************
- * there are three distinct types of xor nodes
- * A "regular xor" is used in the fault-free case where the access spans a complete
- * stripe unit. It assumes that the result buffer is one full stripe unit in size,
- * and uses the stripe-unit-offset values that it computes from the PDAs to determine
- * where within the stripe unit to XOR each argument buffer.
- *
- * A "simple xor" is used in the fault-free case where the access touches only a portion
- * of one (or two, in some cases) stripe unit(s). It assumes that all the argument
- * buffers are of the same size and have the same stripe unit offset.
- *
- * A "recovery xor" is used in the degraded-mode case. It's similar to the regular
- * xor function except that it takes the failed PDA as an additional parameter, and
- * uses it to determine what portions of the argument buffers need to be xor'd into
- * the result buffer, and where in the result buffer they should go.
- ****************************************************************************************/
-
-/* xor the params together and store the result in the result field.
- * assume the result field points to a buffer that is the size of one SU,
- * and use the pda params to determine where within the buffer to XOR
- * the input buffers.
- */
-int
-rf_RegularXorFunc(node)
- RF_DagNode_t *node;
-{
- RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
- RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
- RF_Etimer_t timer;
- int i, retcode;
-
- retcode = 0;
- if (node->dagHdr->status == rf_enable) {
- /* don't do the XOR if the input is the same as the output */
- RF_ETIMER_START(timer);
- for (i = 0; i < node->numParams - 1; i += 2)
- if (node->params[i + 1].p != node->results[0]) {
- retcode = rf_XorIntoBuffer(raidPtr, (RF_PhysDiskAddr_t *) node->params[i].p,
- (char *) node->params[i + 1].p, (char *) node->results[0], node->dagHdr->bp);
- }
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- tracerec->xor_us += RF_ETIMER_VAL_US(timer);
- }
- return (rf_GenericWakeupFunc(node, retcode)); /* call wake func
- * explicitly since no
- * I/O in this node */
-}
-/* xor the inputs into the result buffer, ignoring placement issues */
-int
-rf_SimpleXorFunc(node)
- RF_DagNode_t *node;
-{
- RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
- int i, retcode = 0;
- RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
- RF_Etimer_t timer;
-
- if (node->dagHdr->status == rf_enable) {
- RF_ETIMER_START(timer);
- /* don't do the XOR if the input is the same as the output */
- for (i = 0; i < node->numParams - 1; i += 2)
- if (node->params[i + 1].p != node->results[0]) {
- retcode = rf_bxor((char *)node->params[i + 1].p,
- (char *)node->results[0],
- rf_RaidAddressToByte(raidPtr,
- ((RF_PhysDiskAddr_t *)node->params[i].p)->
- numSector), (RF_Buf_t)node->dagHdr->bp);
- }
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- tracerec->xor_us += RF_ETIMER_VAL_US(timer);
- }
- return (rf_GenericWakeupFunc(node, retcode)); /* call wake func
- * explicitly since no
- * I/O in this node */
-}
-/* this xor is used by the degraded-mode dag functions to recover lost data.
- * the second-to-last parameter is the PDA for the failed portion of the access.
- * the code here looks at this PDA and assumes that the xor target buffer is
- * equal in size to the number of sectors in the failed PDA. It then uses
- * the other PDAs in the parameter list to determine where within the target
- * buffer the corresponding data should be xored.
- */
-int
-rf_RecoveryXorFunc(node)
- RF_DagNode_t *node;
-{
- RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
- RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
- RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
- int i, retcode = 0;
- RF_PhysDiskAddr_t *pda;
- int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
- char *srcbuf, *destbuf;
- RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
- RF_Etimer_t timer;
-
- if (node->dagHdr->status == rf_enable) {
- RF_ETIMER_START(timer);
- for (i = 0; i < node->numParams - 2; i += 2)
- if (node->params[i + 1].p != node->results[0]) {
- pda = (RF_PhysDiskAddr_t *) node->params[i].p;
- srcbuf = (char *) node->params[i + 1].p;
- suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
- destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
- retcode = rf_bxor(srcbuf, destbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), node->dagHdr->bp);
- }
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- tracerec->xor_us += RF_ETIMER_VAL_US(timer);
- }
- return (rf_GenericWakeupFunc(node, retcode));
-}
-/*****************************************************************************************
- * The next three functions are utilities used by the above xor-execution functions.
- ****************************************************************************************/
-
-
-/*
- * this is just a glorified buffer xor. targbuf points to a buffer that is one full stripe unit
- * in size. srcbuf points to a buffer that may be less than 1 SU, but never more. When the
- * access described by pda is one SU in size (which by implication means it's SU-aligned),
- * all that happens is (targbuf) <- (srcbuf ^ targbuf). When the access is less than one
- * SU in size the XOR occurs on only the portion of targbuf identified in the pda.
- */
-
-int
-rf_XorIntoBuffer(raidPtr, pda, srcbuf, targbuf, bp)
- RF_Raid_t *raidPtr;
- RF_PhysDiskAddr_t *pda;
- char *srcbuf;
- char *targbuf;
- void *bp;
-{
- char *targptr;
- int sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
- int SUOffset = pda->startSector % sectPerSU;
- int length, retcode = 0;
-
- RF_ASSERT(pda->numSector <= sectPerSU);
-
- targptr = targbuf + rf_RaidAddressToByte(raidPtr, SUOffset);
- length = rf_RaidAddressToByte(raidPtr, pda->numSector);
- retcode = rf_bxor(srcbuf, targptr, length, bp);
- return (retcode);
-}
-/* it really should be the case that the buffer pointers (returned by malloc)
- * are aligned to the natural word size of the machine, so this is the only
- * case we optimize for. The length should always be a multiple of the sector
- * size, so there should be no problem with leftover bytes at the end.
- */
-int
-rf_bxor(src, dest, len, bp)
- char *src;
- char *dest;
- int len;
- void *bp;
-{
- unsigned mask = sizeof(long) - 1, retcode = 0;
-
- if (!(((unsigned long) src) & mask) && !(((unsigned long) dest) & mask) && !(len & mask)) {
- retcode = rf_longword_bxor((unsigned long *) src, (unsigned long *) dest, len >> RF_LONGSHIFT, bp);
- } else {
- RF_ASSERT(0);
- }
- return (retcode);
-}
-/* map a user buffer into kernel space, if necessary */
-#define REMAP_VA(_bp,x,y) (y) = (x)
-
-/* When XORing in kernel mode, we need to map each user page to kernel space before we can access it.
- * We don't want to assume anything about which input buffers are in kernel/user
- * space, nor about their alignment, so in each loop we compute the maximum number
- * of bytes that we can xor without crossing any page boundaries, and do only this many
- * bytes before the next remap.
- */
-int
-rf_longword_bxor(src, dest, len, bp)
- unsigned long *src;
- unsigned long *dest;
- int len; /* longwords */
- void *bp;
-{
- unsigned long *end = src + len;
- unsigned long d0, d1, d2, d3, s0, s1, s2, s3; /* temps */
- unsigned long *pg_src, *pg_dest; /* per-page source/dest
- * pointers */
- int longs_this_time;/* # longwords to xor in the current iteration */
-
- REMAP_VA(bp, src, pg_src);
- REMAP_VA(bp, dest, pg_dest);
- if (!pg_src || !pg_dest)
- return (EFAULT);
-
- while (len >= 4) {
- longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(pg_src), RF_BLIP(pg_dest)) >> RF_LONGSHIFT); /* note len in longwords */
- src += longs_this_time;
- dest += longs_this_time;
- len -= longs_this_time;
- while (longs_this_time >= 4) {
- d0 = pg_dest[0];
- d1 = pg_dest[1];
- d2 = pg_dest[2];
- d3 = pg_dest[3];
- s0 = pg_src[0];
- s1 = pg_src[1];
- s2 = pg_src[2];
- s3 = pg_src[3];
- pg_dest[0] = d0 ^ s0;
- pg_dest[1] = d1 ^ s1;
- pg_dest[2] = d2 ^ s2;
- pg_dest[3] = d3 ^ s3;
- pg_src += 4;
- pg_dest += 4;
- longs_this_time -= 4;
- }
- while (longs_this_time > 0) { /* cannot cross any page
- * boundaries here */
- *pg_dest++ ^= *pg_src++;
- longs_this_time--;
- }
-
- /* either we're done, or we've reached a page boundary on one
- * (or possibly both) of the pointers */
- if (len) {
- if (RF_PAGE_ALIGNED(src))
- REMAP_VA(bp, src, pg_src);
- if (RF_PAGE_ALIGNED(dest))
- REMAP_VA(bp, dest, pg_dest);
- if (!pg_src || !pg_dest)
- return (EFAULT);
- }
- }
- while (src < end) {
- *pg_dest++ ^= *pg_src++;
- src++;
- dest++;
- len--;
- if (RF_PAGE_ALIGNED(src))
- REMAP_VA(bp, src, pg_src);
- if (RF_PAGE_ALIGNED(dest))
- REMAP_VA(bp, dest, pg_dest);
- }
- RF_ASSERT(len == 0);
- return (0);
-}
-
-
-/*
- dst = a ^ b ^ c;
- a may equal dst
- see comment above longword_bxor
-*/
-int
-rf_longword_bxor3(dst, a, b, c, len, bp)
- unsigned long *dst;
- unsigned long *a;
- unsigned long *b;
- unsigned long *c;
- int len; /* length in longwords */
- void *bp;
-{
- unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
- unsigned long *pg_a, *pg_b, *pg_c, *pg_dst; /* per-page source/dest
- * pointers */
- int longs_this_time;/* # longs to xor in the current iteration */
- char dst_is_a = 0;
-
- REMAP_VA(bp, a, pg_a);
- REMAP_VA(bp, b, pg_b);
- REMAP_VA(bp, c, pg_c);
- if (a == dst) {
- pg_dst = pg_a;
- dst_is_a = 1;
- } else {
- REMAP_VA(bp, dst, pg_dst);
- }
-
- /* align dest to cache line. Can't cross a pg boundary on dst here. */
- while ((((unsigned long) pg_dst) & 0x1f)) {
- *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++;
- dst++;
- a++;
- b++;
- c++;
- if (RF_PAGE_ALIGNED(a)) {
- REMAP_VA(bp, a, pg_a);
- if (!pg_a)
- return (EFAULT);
- }
- if (RF_PAGE_ALIGNED(b)) {
- REMAP_VA(bp, a, pg_b);
- if (!pg_b)
- return (EFAULT);
- }
- if (RF_PAGE_ALIGNED(c)) {
- REMAP_VA(bp, a, pg_c);
- if (!pg_c)
- return (EFAULT);
- }
- len--;
- }
-
- while (len > 4) {
- longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(a), RF_MIN(RF_BLIP(b), RF_MIN(RF_BLIP(c), RF_BLIP(dst)))) >> RF_LONGSHIFT);
- a += longs_this_time;
- b += longs_this_time;
- c += longs_this_time;
- dst += longs_this_time;
- len -= longs_this_time;
- while (longs_this_time >= 4) {
- a0 = pg_a[0];
- longs_this_time -= 4;
-
- a1 = pg_a[1];
- a2 = pg_a[2];
-
- a3 = pg_a[3];
- pg_a += 4;
-
- b0 = pg_b[0];
- b1 = pg_b[1];
-
- b2 = pg_b[2];
- b3 = pg_b[3];
- /* start dual issue */
- a0 ^= b0;
- b0 = pg_c[0];
-
- pg_b += 4;
- a1 ^= b1;
-
- a2 ^= b2;
- a3 ^= b3;
-
- b1 = pg_c[1];
- a0 ^= b0;
-
- b2 = pg_c[2];
- a1 ^= b1;
-
- b3 = pg_c[3];
- a2 ^= b2;
-
- pg_dst[0] = a0;
- a3 ^= b3;
- pg_dst[1] = a1;
- pg_c += 4;
- pg_dst[2] = a2;
- pg_dst[3] = a3;
- pg_dst += 4;
- }
- while (longs_this_time > 0) { /* cannot cross any page
- * boundaries here */
- *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++;
- longs_this_time--;
- }
-
- if (len) {
- if (RF_PAGE_ALIGNED(a)) {
- REMAP_VA(bp, a, pg_a);
- if (!pg_a)
- return (EFAULT);
- if (dst_is_a)
- pg_dst = pg_a;
- }
- if (RF_PAGE_ALIGNED(b)) {
- REMAP_VA(bp, b, pg_b);
- if (!pg_b)
- return (EFAULT);
- }
- if (RF_PAGE_ALIGNED(c)) {
- REMAP_VA(bp, c, pg_c);
- if (!pg_c)
- return (EFAULT);
- }
- if (!dst_is_a)
- if (RF_PAGE_ALIGNED(dst)) {
- REMAP_VA(bp, dst, pg_dst);
- if (!pg_dst)
- return (EFAULT);
- }
- }
- }
- while (len) {
- *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++;
- dst++;
- a++;
- b++;
- c++;
- if (RF_PAGE_ALIGNED(a)) {
- REMAP_VA(bp, a, pg_a);
- if (!pg_a)
- return (EFAULT);
- if (dst_is_a)
- pg_dst = pg_a;
- }
- if (RF_PAGE_ALIGNED(b)) {
- REMAP_VA(bp, b, pg_b);
- if (!pg_b)
- return (EFAULT);
- }
- if (RF_PAGE_ALIGNED(c)) {
- REMAP_VA(bp, c, pg_c);
- if (!pg_c)
- return (EFAULT);
- }
- if (!dst_is_a)
- if (RF_PAGE_ALIGNED(dst)) {
- REMAP_VA(bp, dst, pg_dst);
- if (!pg_dst)
- return (EFAULT);
- }
- len--;
- }
- return (0);
-}
-
-int
-rf_bxor3(dst, a, b, c, len, bp)
- unsigned char *dst;
- unsigned char *a;
- unsigned char *b;
- unsigned char *c;
- unsigned long len;
- void *bp;
-{
- RF_ASSERT(((RF_UL(dst) | RF_UL(a) | RF_UL(b) | RF_UL(c) | len) & 0x7) == 0);
-
- return (rf_longword_bxor3((unsigned long *) dst, (unsigned long *) a,
- (unsigned long *) b, (unsigned long *) c, len >> RF_LONGSHIFT, bp));
-}
diff --git a/sys/dev/raidframe/rf_dagfuncs.h b/sys/dev/raidframe/rf_dagfuncs.h
deleted file mode 100644
index da7e8b2..0000000
--- a/sys/dev/raidframe/rf_dagfuncs.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_dagfuncs.h,v 1.4 2000/03/30 13:39:07 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland, William V. Courtright II, Jim Zelenka
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*****************************************************************************************
- *
- * dagfuncs.h -- header file for DAG node execution routines
- *
- ****************************************************************************************/
-
-#ifndef _RF__RF_DAGFUNCS_H_
-#define _RF__RF_DAGFUNCS_H_
-
-int rf_ConfigureDAGFuncs(RF_ShutdownList_t ** listp);
-int rf_TerminateFunc(RF_DagNode_t * node);
-int rf_TerminateUndoFunc(RF_DagNode_t * node);
-int rf_DiskReadMirrorIdleFunc(RF_DagNode_t * node);
-int rf_DiskReadMirrorPartitionFunc(RF_DagNode_t * node);
-int rf_DiskReadMirrorUndoFunc(RF_DagNode_t * node);
-int rf_ParityLogUpdateFunc(RF_DagNode_t * node);
-int rf_ParityLogOverwriteFunc(RF_DagNode_t * node);
-int rf_ParityLogUpdateUndoFunc(RF_DagNode_t * node);
-int rf_ParityLogOverwriteUndoFunc(RF_DagNode_t * node);
-int rf_NullNodeFunc(RF_DagNode_t * node);
-int rf_NullNodeUndoFunc(RF_DagNode_t * node);
-int rf_DiskReadFuncForThreads(RF_DagNode_t * node);
-int rf_DiskWriteFuncForThreads(RF_DagNode_t * node);
-int rf_DiskUndoFunc(RF_DagNode_t * node);
-int rf_DiskUnlockFuncForThreads(RF_DagNode_t * node);
-int rf_GenericWakeupFunc(RF_DagNode_t * node, int status);
-int rf_RegularXorFunc(RF_DagNode_t * node);
-int rf_SimpleXorFunc(RF_DagNode_t * node);
-int rf_RecoveryXorFunc(RF_DagNode_t * node);
-int
-rf_XorIntoBuffer(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * pda, char *srcbuf,
- char *targbuf, void *bp);
-int rf_bxor(char *src, char *dest, int len, void *bp);
-int
-rf_longword_bxor(unsigned long *src, unsigned long *dest, int len, void *bp);
-int
-rf_longword_bxor3(unsigned long *dest, unsigned long *a, unsigned long *b,
- unsigned long *c, int len, void *bp);
-int
-rf_bxor3(unsigned char *dst, unsigned char *a, unsigned char *b,
- unsigned char *c, unsigned long len, void *bp);
-
-/* function ptrs defined in ConfigureDAGFuncs() */
-extern int (*rf_DiskReadFunc) (RF_DagNode_t *);
-extern int (*rf_DiskWriteFunc) (RF_DagNode_t *);
-extern int (*rf_DiskReadUndoFunc) (RF_DagNode_t *);
-extern int (*rf_DiskWriteUndoFunc) (RF_DagNode_t *);
-extern int (*rf_DiskUnlockFunc) (RF_DagNode_t *);
-extern int (*rf_DiskUnlockUndoFunc) (RF_DagNode_t *);
-extern int (*rf_SimpleXorUndoFunc) (RF_DagNode_t *);
-extern int (*rf_RegularXorUndoFunc) (RF_DagNode_t *);
-extern int (*rf_RecoveryXorUndoFunc) (RF_DagNode_t *);
-
-/* macros for manipulating the param[3] in a read or write node */
-#define RF_CREATE_PARAM3(pri, lk, unlk, wru) (((RF_uint64)(((wru&0xFFFFFF)<<8)|((lk)?0x10:0)|((unlk)?0x20:0)|((pri)&0xF)) ))
-#define RF_EXTRACT_PRIORITY(_x_) ((((unsigned) ((unsigned long)(_x_))) >> 0) & 0x0F)
-#define RF_EXTRACT_LOCK_FLAG(_x_) ((((unsigned) ((unsigned long)(_x_))) >> 4) & 0x1)
-#define RF_EXTRACT_UNLOCK_FLAG(_x_) ((((unsigned) ((unsigned long)(_x_))) >> 5) & 0x1)
-#define RF_EXTRACT_RU(_x_) ((((unsigned) ((unsigned long)(_x_))) >> 8) & 0xFFFFFF)
-
-#endif /* !_RF__RF_DAGFUNCS_H_ */
diff --git a/sys/dev/raidframe/rf_dagutils.c b/sys/dev/raidframe/rf_dagutils.c
deleted file mode 100644
index c961870..0000000
--- a/sys/dev/raidframe/rf_dagutils.c
+++ /dev/null
@@ -1,1299 +0,0 @@
-/* $NetBSD: rf_dagutils.c,v 1.6 1999/12/09 02:26:09 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Authors: Mark Holland, William V. Courtright II, Jim Zelenka
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/******************************************************************************
- *
- * rf_dagutils.c -- utility routines for manipulating dags
- *
- *****************************************************************************/
-
-#include <dev/raidframe/rf_archs.h>
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_threadstuff.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_dagutils.h>
-#include <dev/raidframe/rf_dagfuncs.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_freelist.h>
-#include <dev/raidframe/rf_map.h>
-#include <dev/raidframe/rf_shutdown.h>
-
-#define SNUM_DIFF(_a_,_b_) (((_a_)>(_b_))?((_a_)-(_b_)):((_b_)-(_a_)))
-
-RF_RedFuncs_t rf_xorFuncs = {
- rf_RegularXorFunc, "Reg Xr",
-rf_SimpleXorFunc, "Simple Xr"};
-
-RF_RedFuncs_t rf_xorRecoveryFuncs = {
- rf_RecoveryXorFunc, "Recovery Xr",
-rf_RecoveryXorFunc, "Recovery Xr"};
-
-static void rf_RecurPrintDAG(RF_DagNode_t *, int, int);
-static void rf_PrintDAG(RF_DagHeader_t *);
-static int
-rf_ValidateBranch(RF_DagNode_t *, int *, int *,
- RF_DagNode_t **, int);
-static void rf_ValidateBranchVisitedBits(RF_DagNode_t *, int, int);
-static void rf_ValidateVisitedBits(RF_DagHeader_t *);
-
-/******************************************************************************
- *
- * InitNode - initialize a dag node
- *
- * the size of the propList array is always the same as that of the
- * successors array.
- *
- *****************************************************************************/
-void
-rf_InitNode(
- RF_DagNode_t * node,
- RF_NodeStatus_t initstatus,
- int commit,
- int (*doFunc) (RF_DagNode_t * node),
- int (*undoFunc) (RF_DagNode_t * node),
- int (*wakeFunc) (RF_DagNode_t * node, int status),
- int nSucc,
- int nAnte,
- int nParam,
- int nResult,
- RF_DagHeader_t * hdr,
- char *name,
- RF_AllocListElem_t * alist)
-{
- void **ptrs;
- int nptrs;
-
- if (nAnte > RF_MAX_ANTECEDENTS)
- RF_PANIC();
- node->status = initstatus;
- node->commitNode = commit;
- node->doFunc = doFunc;
- node->undoFunc = undoFunc;
- node->wakeFunc = wakeFunc;
- node->numParams = nParam;
- node->numResults = nResult;
- node->numAntecedents = nAnte;
- node->numAntDone = 0;
- node->next = NULL;
- node->numSuccedents = nSucc;
- node->name = name;
- node->dagHdr = hdr;
- node->visited = 0;
-
- /* allocate all the pointers with one call to malloc */
- nptrs = nSucc + nAnte + nResult + nSucc;
-
- if (nptrs <= RF_DAG_PTRCACHESIZE) {
- /*
- * The dag_ptrs field of the node is basically some scribble
- * space to be used here. We could get rid of it, and always
- * allocate the range of pointers, but that's expensive. So,
- * we pick a "common case" size for the pointer cache. Hopefully,
- * we'll find that:
- * (1) Generally, nptrs doesn't exceed RF_DAG_PTRCACHESIZE by
- * only a little bit (least efficient case)
- * (2) Generally, ntprs isn't a lot less than RF_DAG_PTRCACHESIZE
- * (wasted memory)
- */
- ptrs = (void **) node->dag_ptrs;
- } else {
- RF_CallocAndAdd(ptrs, nptrs, sizeof(void *), (void **), alist);
- }
- node->succedents = (nSucc) ? (RF_DagNode_t **) ptrs : NULL;
- node->antecedents = (nAnte) ? (RF_DagNode_t **) (ptrs + nSucc) : NULL;
- node->results = (nResult) ? (void **) (ptrs + nSucc + nAnte) : NULL;
- node->propList = (nSucc) ? (RF_PropHeader_t **) (ptrs + nSucc + nAnte + nResult) : NULL;
-
- if (nParam) {
- if (nParam <= RF_DAG_PARAMCACHESIZE) {
- node->params = (RF_DagParam_t *) node->dag_params;
- } else {
- RF_CallocAndAdd(node->params, nParam, sizeof(RF_DagParam_t), (RF_DagParam_t *), alist);
- }
- } else {
- node->params = NULL;
- }
-}
-
-
-
-/******************************************************************************
- *
- * allocation and deallocation routines
- *
- *****************************************************************************/
-
-void
-rf_FreeDAG(dag_h)
- RF_DagHeader_t *dag_h;
-{
- RF_AccessStripeMapHeader_t *asmap, *t_asmap;
- RF_DagHeader_t *nextDag;
- int i;
-
- while (dag_h) {
- nextDag = dag_h->next;
- for (i = 0; dag_h->memChunk[i] && i < RF_MAXCHUNKS; i++) {
- /* release mem chunks */
- rf_ReleaseMemChunk(dag_h->memChunk[i]);
- dag_h->memChunk[i] = NULL;
- }
-
- RF_ASSERT(i == dag_h->chunkIndex);
- if (dag_h->xtraChunkCnt > 0) {
- /* free xtraMemChunks */
- for (i = 0; dag_h->xtraMemChunk[i] && i < dag_h->xtraChunkIndex; i++) {
- rf_ReleaseMemChunk(dag_h->xtraMemChunk[i]);
- dag_h->xtraMemChunk[i] = NULL;
- }
- RF_ASSERT(i == dag_h->xtraChunkIndex);
- /* free ptrs to xtraMemChunks */
- RF_Free(dag_h->xtraMemChunk, dag_h->xtraChunkCnt * sizeof(RF_ChunkDesc_t *));
- }
- rf_FreeAllocList(dag_h->allocList);
- for (asmap = dag_h->asmList; asmap;) {
- t_asmap = asmap;
- asmap = asmap->next;
- rf_FreeAccessStripeMap(t_asmap);
- }
- rf_FreeDAGHeader(dag_h);
- dag_h = nextDag;
- }
-}
-
-RF_PropHeader_t *
-rf_MakePropListEntry(
- RF_DagHeader_t * dag_h,
- int resultNum,
- int paramNum,
- RF_PropHeader_t * next,
- RF_AllocListElem_t * allocList)
-{
- RF_PropHeader_t *p;
-
- RF_CallocAndAdd(p, 1, sizeof(RF_PropHeader_t),
- (RF_PropHeader_t *), allocList);
- p->resultNum = resultNum;
- p->paramNum = paramNum;
- p->next = next;
- return (p);
-}
-
-static RF_FreeList_t *rf_dagh_freelist;
-
-#define RF_MAX_FREE_DAGH 128
-#define RF_DAGH_INC 16
-#define RF_DAGH_INITIAL 32
-
-static void rf_ShutdownDAGs(void *);
-static void
-rf_ShutdownDAGs(ignored)
- void *ignored;
-{
- RF_FREELIST_DESTROY(rf_dagh_freelist, next, (RF_DagHeader_t *));
-}
-
-int
-rf_ConfigureDAGs(listp)
- RF_ShutdownList_t **listp;
-{
- int rc;
-
- RF_FREELIST_CREATE(rf_dagh_freelist, RF_MAX_FREE_DAGH,
- RF_DAGH_INC, sizeof(RF_DagHeader_t));
- if (rf_dagh_freelist == NULL)
- return (ENOMEM);
- rc = rf_ShutdownCreate(listp, rf_ShutdownDAGs, NULL);
- if (rc) {
- RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n",
- __FILE__, __LINE__, rc);
- rf_ShutdownDAGs(NULL);
- return (rc);
- }
- RF_FREELIST_PRIME(rf_dagh_freelist, RF_DAGH_INITIAL, next,
- (RF_DagHeader_t *));
- return (0);
-}
-
-RF_DagHeader_t *
-rf_AllocDAGHeader()
-{
- RF_DagHeader_t *dh;
-
- RF_FREELIST_GET(rf_dagh_freelist, dh, next, (RF_DagHeader_t *));
- if (dh) {
- bzero((char *) dh, sizeof(RF_DagHeader_t));
- }
- return (dh);
-}
-
-void
-rf_FreeDAGHeader(RF_DagHeader_t * dh)
-{
- RF_FREELIST_FREE(rf_dagh_freelist, dh, next);
-}
-/* allocates a buffer big enough to hold the data described by pda */
-void *
-rf_AllocBuffer(
- RF_Raid_t * raidPtr,
- RF_DagHeader_t * dag_h,
- RF_PhysDiskAddr_t * pda,
- RF_AllocListElem_t * allocList)
-{
- char *p;
-
- RF_MallocAndAdd(p, pda->numSector << raidPtr->logBytesPerSector,
- (char *), allocList);
- return ((void *) p);
-}
-/******************************************************************************
- *
- * debug routines
- *
- *****************************************************************************/
-
-char *
-rf_NodeStatusString(RF_DagNode_t * node)
-{
- switch (node->status) {
- case rf_wait:return ("wait");
- case rf_fired:
- return ("fired");
- case rf_good:
- return ("good");
- case rf_bad:
- return ("bad");
- default:
- return ("?");
- }
-}
-
-void
-rf_PrintNodeInfoString(RF_DagNode_t * node)
-{
- RF_PhysDiskAddr_t *pda;
- int (*df) (RF_DagNode_t *) = node->doFunc;
- int i, lk, unlk;
- void *bufPtr;
-
- if ((df == rf_DiskReadFunc) || (df == rf_DiskWriteFunc)
- || (df == rf_DiskReadMirrorIdleFunc)
- || (df == rf_DiskReadMirrorPartitionFunc)) {
- pda = (RF_PhysDiskAddr_t *) node->params[0].p;
- bufPtr = (void *) node->params[1].p;
- lk = RF_EXTRACT_LOCK_FLAG(node->params[3].v);
- unlk = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v);
- RF_ASSERT(!(lk && unlk));
- printf("r %d c %d offs %ld nsect %d buf 0x%lx %s\n", pda->row, pda->col,
- (long) pda->startSector, (int) pda->numSector, (long) bufPtr,
- (lk) ? "LOCK" : ((unlk) ? "UNLK" : " "));
- return;
- }
- if (df == rf_DiskUnlockFunc) {
- pda = (RF_PhysDiskAddr_t *) node->params[0].p;
- lk = RF_EXTRACT_LOCK_FLAG(node->params[3].v);
- unlk = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v);
- RF_ASSERT(!(lk && unlk));
- printf("r %d c %d %s\n", pda->row, pda->col,
- (lk) ? "LOCK" : ((unlk) ? "UNLK" : "nop"));
- return;
- }
- if ((df == rf_SimpleXorFunc) || (df == rf_RegularXorFunc)
- || (df == rf_RecoveryXorFunc)) {
- printf("result buf 0x%lx\n", (long) node->results[0]);
- for (i = 0; i < node->numParams - 1; i += 2) {
- pda = (RF_PhysDiskAddr_t *) node->params[i].p;
- bufPtr = (RF_PhysDiskAddr_t *) node->params[i + 1].p;
- printf(" buf 0x%lx r%d c%d offs %ld nsect %d\n",
- (long) bufPtr, pda->row, pda->col,
- (long) pda->startSector, (int) pda->numSector);
- }
- return;
- }
-#if RF_INCLUDE_PARITYLOGGING > 0
- if (df == rf_ParityLogOverwriteFunc || df == rf_ParityLogUpdateFunc) {
- for (i = 0; i < node->numParams - 1; i += 2) {
- pda = (RF_PhysDiskAddr_t *) node->params[i].p;
- bufPtr = (RF_PhysDiskAddr_t *) node->params[i + 1].p;
- printf(" r%d c%d offs %ld nsect %d buf 0x%lx\n",
- pda->row, pda->col, (long) pda->startSector,
- (int) pda->numSector, (long) bufPtr);
- }
- return;
- }
-#endif /* RF_INCLUDE_PARITYLOGGING > 0 */
-
- if ((df == rf_TerminateFunc) || (df == rf_NullNodeFunc)) {
- printf("\n");
- return;
- }
- printf("?\n");
-}
-
-static void
-rf_RecurPrintDAG(node, depth, unvisited)
- RF_DagNode_t *node;
- int depth;
- int unvisited;
-{
- char *anttype;
- int i;
-
- node->visited = (unvisited) ? 0 : 1;
- printf("(%d) %d C%d %s: %s,s%d %d/%d,a%d/%d,p%d,r%d S{", depth,
- node->nodeNum, node->commitNode, node->name, rf_NodeStatusString(node),
- node->numSuccedents, node->numSuccFired, node->numSuccDone,
- node->numAntecedents, node->numAntDone, node->numParams, node->numResults);
- for (i = 0; i < node->numSuccedents; i++) {
- printf("%d%s", node->succedents[i]->nodeNum,
- ((i == node->numSuccedents - 1) ? "\0" : " "));
- }
- printf("} A{");
- for (i = 0; i < node->numAntecedents; i++) {
- switch (node->antType[i]) {
- case rf_trueData:
- anttype = "T";
- break;
- case rf_antiData:
- anttype = "A";
- break;
- case rf_outputData:
- anttype = "O";
- break;
- case rf_control:
- anttype = "C";
- break;
- default:
- anttype = "?";
- break;
- }
- printf("%d(%s)%s", node->antecedents[i]->nodeNum, anttype, (i == node->numAntecedents - 1) ? "\0" : " ");
- }
- printf("}; ");
- rf_PrintNodeInfoString(node);
- for (i = 0; i < node->numSuccedents; i++) {
- if (node->succedents[i]->visited == unvisited)
- rf_RecurPrintDAG(node->succedents[i], depth + 1, unvisited);
- }
-}
-
-static void
-rf_PrintDAG(dag_h)
- RF_DagHeader_t *dag_h;
-{
- int unvisited, i;
- char *status;
-
- /* set dag status */
- switch (dag_h->status) {
- case rf_enable:
- status = "enable";
- break;
- case rf_rollForward:
- status = "rollForward";
- break;
- case rf_rollBackward:
- status = "rollBackward";
- break;
- default:
- status = "illegal!";
- break;
- }
- /* find out if visited bits are currently set or clear */
- unvisited = dag_h->succedents[0]->visited;
-
- printf("DAG type: %s\n", dag_h->creator);
- printf("format is (depth) num commit type: status,nSucc nSuccFired/nSuccDone,nAnte/nAnteDone,nParam,nResult S{x} A{x(type)}; info\n");
- printf("(0) %d Hdr: %s, s%d, (commit %d/%d) S{", dag_h->nodeNum,
- status, dag_h->numSuccedents, dag_h->numCommitNodes, dag_h->numCommits);
- for (i = 0; i < dag_h->numSuccedents; i++) {
- printf("%d%s", dag_h->succedents[i]->nodeNum,
- ((i == dag_h->numSuccedents - 1) ? "\0" : " "));
- }
- printf("};\n");
- for (i = 0; i < dag_h->numSuccedents; i++) {
- if (dag_h->succedents[i]->visited == unvisited)
- rf_RecurPrintDAG(dag_h->succedents[i], 1, unvisited);
- }
-}
-/* assigns node numbers */
-int
-rf_AssignNodeNums(RF_DagHeader_t * dag_h)
-{
- int unvisited, i, nnum;
- RF_DagNode_t *node;
-
- nnum = 0;
- unvisited = dag_h->succedents[0]->visited;
-
- dag_h->nodeNum = nnum++;
- for (i = 0; i < dag_h->numSuccedents; i++) {
- node = dag_h->succedents[i];
- if (node->visited == unvisited) {
- nnum = rf_RecurAssignNodeNums(dag_h->succedents[i], nnum, unvisited);
- }
- }
- return (nnum);
-}
-
-int
-rf_RecurAssignNodeNums(node, num, unvisited)
- RF_DagNode_t *node;
- int num;
- int unvisited;
-{
- int i;
-
- node->visited = (unvisited) ? 0 : 1;
-
- node->nodeNum = num++;
- for (i = 0; i < node->numSuccedents; i++) {
- if (node->succedents[i]->visited == unvisited) {
- num = rf_RecurAssignNodeNums(node->succedents[i], num, unvisited);
- }
- }
- return (num);
-}
-/* set the header pointers in each node to "newptr" */
-void
-rf_ResetDAGHeaderPointers(dag_h, newptr)
- RF_DagHeader_t *dag_h;
- RF_DagHeader_t *newptr;
-{
- int i;
- for (i = 0; i < dag_h->numSuccedents; i++)
- if (dag_h->succedents[i]->dagHdr != newptr)
- rf_RecurResetDAGHeaderPointers(dag_h->succedents[i], newptr);
-}
-
-void
-rf_RecurResetDAGHeaderPointers(node, newptr)
- RF_DagNode_t *node;
- RF_DagHeader_t *newptr;
-{
- int i;
- node->dagHdr = newptr;
- for (i = 0; i < node->numSuccedents; i++)
- if (node->succedents[i]->dagHdr != newptr)
- rf_RecurResetDAGHeaderPointers(node->succedents[i], newptr);
-}
-
-
-void
-rf_PrintDAGList(RF_DagHeader_t * dag_h)
-{
- int i = 0;
-
- for (; dag_h; dag_h = dag_h->next) {
- rf_AssignNodeNums(dag_h);
- printf("\n\nDAG %d IN LIST:\n", i++);
- rf_PrintDAG(dag_h);
- }
-}
-
-static int
-rf_ValidateBranch(node, scount, acount, nodes, unvisited)
- RF_DagNode_t *node;
- int *scount;
- int *acount;
- RF_DagNode_t **nodes;
- int unvisited;
-{
- int i, retcode = 0;
-
- /* construct an array of node pointers indexed by node num */
- node->visited = (unvisited) ? 0 : 1;
- nodes[node->nodeNum] = node;
-
- if (node->next != NULL) {
- printf("INVALID DAG: next pointer in node is not NULL\n");
- retcode = 1;
- }
- if (node->status != rf_wait) {
- printf("INVALID DAG: Node status is not wait\n");
- retcode = 1;
- }
- if (node->numAntDone != 0) {
- printf("INVALID DAG: numAntDone is not zero\n");
- retcode = 1;
- }
- if (node->doFunc == rf_TerminateFunc) {
- if (node->numSuccedents != 0) {
- printf("INVALID DAG: Terminator node has succedents\n");
- retcode = 1;
- }
- } else {
- if (node->numSuccedents == 0) {
- printf("INVALID DAG: Non-terminator node has no succedents\n");
- retcode = 1;
- }
- }
- for (i = 0; i < node->numSuccedents; i++) {
- if (!node->succedents[i]) {
- printf("INVALID DAG: succedent %d of node %s is NULL\n", i, node->name);
- retcode = 1;
- }
- scount[node->succedents[i]->nodeNum]++;
- }
- for (i = 0; i < node->numAntecedents; i++) {
- if (!node->antecedents[i]) {
- printf("INVALID DAG: antecedent %d of node %s is NULL\n", i, node->name);
- retcode = 1;
- }
- acount[node->antecedents[i]->nodeNum]++;
- }
- for (i = 0; i < node->numSuccedents; i++) {
- if (node->succedents[i]->visited == unvisited) {
- if (rf_ValidateBranch(node->succedents[i], scount,
- acount, nodes, unvisited)) {
- retcode = 1;
- }
- }
- }
- return (retcode);
-}
-
-static void
-rf_ValidateBranchVisitedBits(node, unvisited, rl)
- RF_DagNode_t *node;
- int unvisited;
- int rl;
-{
- int i;
-
- RF_ASSERT(node->visited == unvisited);
- for (i = 0; i < node->numSuccedents; i++) {
- if (node->succedents[i] == NULL) {
- printf("node=%lx node->succedents[%d] is NULL\n", (long) node, i);
- RF_ASSERT(0);
- }
- rf_ValidateBranchVisitedBits(node->succedents[i], unvisited, rl + 1);
- }
-}
-/* NOTE: never call this on a big dag, because it is exponential
- * in execution time
- */
-static void
-rf_ValidateVisitedBits(dag)
- RF_DagHeader_t *dag;
-{
- int i, unvisited;
-
- unvisited = dag->succedents[0]->visited;
-
- for (i = 0; i < dag->numSuccedents; i++) {
- if (dag->succedents[i] == NULL) {
- printf("dag=%lx dag->succedents[%d] is NULL\n", (long) dag, i);
- RF_ASSERT(0);
- }
- rf_ValidateBranchVisitedBits(dag->succedents[i], unvisited, 0);
- }
-}
-/* validate a DAG. _at entry_ verify that:
- * -- numNodesCompleted is zero
- * -- node queue is null
- * -- dag status is rf_enable
- * -- next pointer is null on every node
- * -- all nodes have status wait
- * -- numAntDone is zero in all nodes
- * -- terminator node has zero successors
- * -- no other node besides terminator has zero successors
- * -- no successor or antecedent pointer in a node is NULL
- * -- number of times that each node appears as a successor of another node
- * is equal to the antecedent count on that node
- * -- number of times that each node appears as an antecedent of another node
- * is equal to the succedent count on that node
- * -- what else?
- */
-int
-rf_ValidateDAG(dag_h)
- RF_DagHeader_t *dag_h;
-{
- int i, nodecount;
- int *scount, *acount;/* per-node successor and antecedent counts */
- RF_DagNode_t **nodes; /* array of ptrs to nodes in dag */
- int retcode = 0;
- int unvisited;
- int commitNodeCount = 0;
-
- if (rf_validateVisitedDebug)
- rf_ValidateVisitedBits(dag_h);
-
- if (dag_h->numNodesCompleted != 0) {
- printf("INVALID DAG: num nodes completed is %d, should be 0\n", dag_h->numNodesCompleted);
- retcode = 1;
- goto validate_dag_bad;
- }
- if (dag_h->status != rf_enable) {
- printf("INVALID DAG: not enabled\n");
- retcode = 1;
- goto validate_dag_bad;
- }
- if (dag_h->numCommits != 0) {
- printf("INVALID DAG: numCommits != 0 (%d)\n", dag_h->numCommits);
- retcode = 1;
- goto validate_dag_bad;
- }
- if (dag_h->numSuccedents != 1) {
- /* currently, all dags must have only one succedent */
- printf("INVALID DAG: numSuccedents !1 (%d)\n", dag_h->numSuccedents);
- retcode = 1;
- goto validate_dag_bad;
- }
- nodecount = rf_AssignNodeNums(dag_h);
-
- unvisited = dag_h->succedents[0]->visited;
-
- RF_Calloc(scount, nodecount, sizeof(int), (int *));
- RF_Calloc(acount, nodecount, sizeof(int), (int *));
- RF_Calloc(nodes, nodecount, sizeof(RF_DagNode_t *), (RF_DagNode_t **));
- for (i = 0; i < dag_h->numSuccedents; i++) {
- if ((dag_h->succedents[i]->visited == unvisited)
- && rf_ValidateBranch(dag_h->succedents[i], scount,
- acount, nodes, unvisited)) {
- retcode = 1;
- }
- }
- /* start at 1 to skip the header node */
- for (i = 1; i < nodecount; i++) {
- if (nodes[i]->commitNode)
- commitNodeCount++;
- if (nodes[i]->doFunc == NULL) {
- printf("INVALID DAG: node %s has an undefined doFunc\n", nodes[i]->name);
- retcode = 1;
- goto validate_dag_out;
- }
- if (nodes[i]->undoFunc == NULL) {
- printf("INVALID DAG: node %s has an undefined doFunc\n", nodes[i]->name);
- retcode = 1;
- goto validate_dag_out;
- }
- if (nodes[i]->numAntecedents != scount[nodes[i]->nodeNum]) {
- printf("INVALID DAG: node %s has %d antecedents but appears as a succedent %d times\n",
- nodes[i]->name, nodes[i]->numAntecedents, scount[nodes[i]->nodeNum]);
- retcode = 1;
- goto validate_dag_out;
- }
- if (nodes[i]->numSuccedents != acount[nodes[i]->nodeNum]) {
- printf("INVALID DAG: node %s has %d succedents but appears as an antecedent %d times\n",
- nodes[i]->name, nodes[i]->numSuccedents, acount[nodes[i]->nodeNum]);
- retcode = 1;
- goto validate_dag_out;
- }
- }
-
- if (dag_h->numCommitNodes != commitNodeCount) {
- printf("INVALID DAG: incorrect commit node count. hdr->numCommitNodes (%d) found (%d) commit nodes in graph\n",
- dag_h->numCommitNodes, commitNodeCount);
- retcode = 1;
- goto validate_dag_out;
- }
-validate_dag_out:
- RF_Free(scount, nodecount * sizeof(int));
- RF_Free(acount, nodecount * sizeof(int));
- RF_Free(nodes, nodecount * sizeof(RF_DagNode_t *));
- if (retcode)
- rf_PrintDAGList(dag_h);
-
- if (rf_validateVisitedDebug)
- rf_ValidateVisitedBits(dag_h);
-
- return (retcode);
-
-validate_dag_bad:
- rf_PrintDAGList(dag_h);
- return (retcode);
-}
-
-
-/******************************************************************************
- *
- * misc construction routines
- *
- *****************************************************************************/
-
-void
-rf_redirect_asm(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap)
-{
- int ds = (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) ? 1 : 0;
- int row = asmap->physInfo->row;
- int fcol = raidPtr->reconControl[row]->fcol;
- int srow = raidPtr->reconControl[row]->spareRow;
- int scol = raidPtr->reconControl[row]->spareCol;
- RF_PhysDiskAddr_t *pda;
-
- RF_ASSERT(raidPtr->status[row] == rf_rs_reconstructing);
- for (pda = asmap->physInfo; pda; pda = pda->next) {
- if (pda->col == fcol) {
- if (rf_dagDebug) {
- if (!rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap,
- pda->startSector)) {
- RF_PANIC();
- }
- }
- /* printf("Remapped data for large write\n"); */
- if (ds) {
- raidPtr->Layout.map->MapSector(raidPtr, pda->raidAddress,
- &pda->row, &pda->col, &pda->startSector, RF_REMAP);
- } else {
- pda->row = srow;
- pda->col = scol;
- }
- }
- }
- for (pda = asmap->parityInfo; pda; pda = pda->next) {
- if (pda->col == fcol) {
- if (rf_dagDebug) {
- if (!rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, pda->startSector)) {
- RF_PANIC();
- }
- }
- }
- if (ds) {
- (raidPtr->Layout.map->MapParity) (raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP);
- } else {
- pda->row = srow;
- pda->col = scol;
- }
- }
-}
-
-
-/* this routine allocates read buffers and generates stripe maps for the
- * regions of the array from the start of the stripe to the start of the
- * access, and from the end of the access to the end of the stripe. It also
- * computes and returns the number of DAG nodes needed to read all this data.
- * Note that this routine does the wrong thing if the access is fully
- * contained within one stripe unit, so we RF_ASSERT against this case at the
- * start.
- */
-void
-rf_MapUnaccessedPortionOfStripe(
- RF_Raid_t * raidPtr,
- RF_RaidLayout_t * layoutPtr,/* in: layout information */
- RF_AccessStripeMap_t * asmap, /* in: access stripe map */
- RF_DagHeader_t * dag_h, /* in: header of the dag to create */
- RF_AccessStripeMapHeader_t ** new_asm_h, /* in: ptr to array of 2
- * headers, to be filled in */
- int *nRodNodes, /* out: num nodes to be generated to read
- * unaccessed data */
- char **sosBuffer, /* out: pointers to newly allocated buffer */
- char **eosBuffer,
- RF_AllocListElem_t * allocList)
-{
- RF_RaidAddr_t sosRaidAddress, eosRaidAddress;
- RF_SectorNum_t sosNumSector, eosNumSector;
-
- RF_ASSERT(asmap->numStripeUnitsAccessed > (layoutPtr->numDataCol / 2));
- /* generate an access map for the region of the array from start of
- * stripe to start of access */
- new_asm_h[0] = new_asm_h[1] = NULL;
- *nRodNodes = 0;
- if (!rf_RaidAddressStripeAligned(layoutPtr, asmap->raidAddress)) {
- sosRaidAddress = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
- sosNumSector = asmap->raidAddress - sosRaidAddress;
- RF_MallocAndAdd(*sosBuffer, rf_RaidAddressToByte(raidPtr, sosNumSector), (char *), allocList);
- new_asm_h[0] = rf_MapAccess(raidPtr, sosRaidAddress, sosNumSector, *sosBuffer, RF_DONT_REMAP);
- new_asm_h[0]->next = dag_h->asmList;
- dag_h->asmList = new_asm_h[0];
- *nRodNodes += new_asm_h[0]->stripeMap->numStripeUnitsAccessed;
-
- RF_ASSERT(new_asm_h[0]->stripeMap->next == NULL);
- /* we're totally within one stripe here */
- if (asmap->flags & RF_ASM_REDIR_LARGE_WRITE)
- rf_redirect_asm(raidPtr, new_asm_h[0]->stripeMap);
- }
- /* generate an access map for the region of the array from end of
- * access to end of stripe */
- if (!rf_RaidAddressStripeAligned(layoutPtr, asmap->endRaidAddress)) {
- eosRaidAddress = asmap->endRaidAddress;
- eosNumSector = rf_RaidAddressOfNextStripeBoundary(layoutPtr, eosRaidAddress) - eosRaidAddress;
- RF_MallocAndAdd(*eosBuffer, rf_RaidAddressToByte(raidPtr, eosNumSector), (char *), allocList);
- new_asm_h[1] = rf_MapAccess(raidPtr, eosRaidAddress, eosNumSector, *eosBuffer, RF_DONT_REMAP);
- new_asm_h[1]->next = dag_h->asmList;
- dag_h->asmList = new_asm_h[1];
- *nRodNodes += new_asm_h[1]->stripeMap->numStripeUnitsAccessed;
-
- RF_ASSERT(new_asm_h[1]->stripeMap->next == NULL);
- /* we're totally within one stripe here */
- if (asmap->flags & RF_ASM_REDIR_LARGE_WRITE)
- rf_redirect_asm(raidPtr, new_asm_h[1]->stripeMap);
- }
-}
-
-
-
-/* returns non-zero if the indicated ranges of stripe unit offsets overlap */
-int
-rf_PDAOverlap(
- RF_RaidLayout_t * layoutPtr,
- RF_PhysDiskAddr_t * src,
- RF_PhysDiskAddr_t * dest)
-{
- RF_SectorNum_t soffs = rf_StripeUnitOffset(layoutPtr, src->startSector);
- RF_SectorNum_t doffs = rf_StripeUnitOffset(layoutPtr, dest->startSector);
- /* use -1 to be sure we stay within SU */
- RF_SectorNum_t send = rf_StripeUnitOffset(layoutPtr, src->startSector + src->numSector - 1);
- RF_SectorNum_t dend = rf_StripeUnitOffset(layoutPtr, dest->startSector + dest->numSector - 1);
- return ((RF_MAX(soffs, doffs) <= RF_MIN(send, dend)) ? 1 : 0);
-}
-
-
-/* GenerateFailedAccessASMs
- *
- * this routine figures out what portion of the stripe needs to be read
- * to effect the degraded read or write operation. It's primary function
- * is to identify everything required to recover the data, and then
- * eliminate anything that is already being accessed by the user.
- *
- * The main result is two new ASMs, one for the region from the start of the
- * stripe to the start of the access, and one for the region from the end of
- * the access to the end of the stripe. These ASMs describe everything that
- * needs to be read to effect the degraded access. Other results are:
- * nXorBufs -- the total number of buffers that need to be XORed together to
- * recover the lost data,
- * rpBufPtr -- ptr to a newly-allocated buffer to hold the parity. If NULL
- * at entry, not allocated.
- * overlappingPDAs --
- * describes which of the non-failed PDAs in the user access
- * overlap data that needs to be read to effect recovery.
- * overlappingPDAs[i]==1 if and only if, neglecting the failed
- * PDA, the ith pda in the input asm overlaps data that needs
- * to be read for recovery.
- */
- /* in: asm - ASM for the actual access, one stripe only */
- /* in: faildPDA - which component of the access has failed */
- /* in: dag_h - header of the DAG we're going to create */
- /* out: new_asm_h - the two new ASMs */
- /* out: nXorBufs - the total number of xor bufs required */
- /* out: rpBufPtr - a buffer for the parity read */
-void
-rf_GenerateFailedAccessASMs(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_PhysDiskAddr_t * failedPDA,
- RF_DagHeader_t * dag_h,
- RF_AccessStripeMapHeader_t ** new_asm_h,
- int *nXorBufs,
- char **rpBufPtr,
- char *overlappingPDAs,
- RF_AllocListElem_t * allocList)
-{
- RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
-
- /* s=start, e=end, s=stripe, a=access, f=failed, su=stripe unit */
- RF_RaidAddr_t sosAddr, sosEndAddr, eosStartAddr, eosAddr;
-
- RF_SectorCount_t numSect[2], numParitySect;
- RF_PhysDiskAddr_t *pda;
- char *rdBuf, *bufP;
- int foundit, i;
-
- bufP = NULL;
- foundit = 0;
- /* first compute the following raid addresses: start of stripe,
- * (sosAddr) MIN(start of access, start of failed SU), (sosEndAddr)
- * MAX(end of access, end of failed SU), (eosStartAddr) end of
- * stripe (i.e. start of next stripe) (eosAddr) */
- sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
- sosEndAddr = RF_MIN(asmap->raidAddress, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, failedPDA->raidAddress));
- eosStartAddr = RF_MAX(asmap->endRaidAddress, rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, failedPDA->raidAddress));
- eosAddr = rf_RaidAddressOfNextStripeBoundary(layoutPtr, asmap->raidAddress);
-
- /* now generate access stripe maps for each of the above regions of
- * the stripe. Use a dummy (NULL) buf ptr for now */
-
- new_asm_h[0] = (sosAddr != sosEndAddr) ? rf_MapAccess(raidPtr, sosAddr, sosEndAddr - sosAddr, NULL, RF_DONT_REMAP) : NULL;
- new_asm_h[1] = (eosStartAddr != eosAddr) ? rf_MapAccess(raidPtr, eosStartAddr, eosAddr - eosStartAddr, NULL, RF_DONT_REMAP) : NULL;
-
- /* walk through the PDAs and range-restrict each SU to the region of
- * the SU touched on the failed PDA. also compute total data buffer
- * space requirements in this step. Ignore the parity for now. */
-
- numSect[0] = numSect[1] = 0;
- if (new_asm_h[0]) {
- new_asm_h[0]->next = dag_h->asmList;
- dag_h->asmList = new_asm_h[0];
- for (pda = new_asm_h[0]->stripeMap->physInfo; pda; pda = pda->next) {
- rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_NOBUFFER, 0);
- numSect[0] += pda->numSector;
- }
- }
- if (new_asm_h[1]) {
- new_asm_h[1]->next = dag_h->asmList;
- dag_h->asmList = new_asm_h[1];
- for (pda = new_asm_h[1]->stripeMap->physInfo; pda; pda = pda->next) {
- rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_NOBUFFER, 0);
- numSect[1] += pda->numSector;
- }
- }
- numParitySect = failedPDA->numSector;
-
- /* allocate buffer space for the data & parity we have to read to
- * recover from the failure */
-
- if (numSect[0] + numSect[1] + ((rpBufPtr) ? numParitySect : 0)) { /* don't allocate parity
- * buf if not needed */
- RF_MallocAndAdd(rdBuf, rf_RaidAddressToByte(raidPtr, numSect[0] + numSect[1] + numParitySect), (char *), allocList);
- bufP = rdBuf;
- if (rf_degDagDebug)
- printf("Newly allocated buffer (%d bytes) is 0x%lx\n",
- (int) rf_RaidAddressToByte(raidPtr, numSect[0] + numSect[1] + numParitySect), (unsigned long) bufP);
- }
- /* now walk through the pdas one last time and assign buffer pointers
- * (ugh!). Again, ignore the parity. also, count nodes to find out
- * how many bufs need to be xored together */
- (*nXorBufs) = 1; /* in read case, 1 is for parity. In write
- * case, 1 is for failed data */
- if (new_asm_h[0]) {
- for (pda = new_asm_h[0]->stripeMap->physInfo; pda; pda = pda->next) {
- pda->bufPtr = bufP;
- bufP += rf_RaidAddressToByte(raidPtr, pda->numSector);
- }
- *nXorBufs += new_asm_h[0]->stripeMap->numStripeUnitsAccessed;
- }
- if (new_asm_h[1]) {
- for (pda = new_asm_h[1]->stripeMap->physInfo; pda; pda = pda->next) {
- pda->bufPtr = bufP;
- bufP += rf_RaidAddressToByte(raidPtr, pda->numSector);
- }
- (*nXorBufs) += new_asm_h[1]->stripeMap->numStripeUnitsAccessed;
- }
- if (rpBufPtr)
- *rpBufPtr = bufP; /* the rest of the buffer is for
- * parity */
-
- /* the last step is to figure out how many more distinct buffers need
- * to get xor'd to produce the missing unit. there's one for each
- * user-data read node that overlaps the portion of the failed unit
- * being accessed */
-
- for (foundit = i = 0, pda = asmap->physInfo; pda; i++, pda = pda->next) {
- if (pda == failedPDA) {
- i--;
- foundit = 1;
- continue;
- }
- if (rf_PDAOverlap(layoutPtr, pda, failedPDA)) {
- overlappingPDAs[i] = 1;
- (*nXorBufs)++;
- }
- }
- if (!foundit) {
- RF_ERRORMSG("GenerateFailedAccessASMs: did not find failedPDA in asm list\n");
- RF_ASSERT(0);
- }
- if (rf_degDagDebug) {
- if (new_asm_h[0]) {
- printf("First asm:\n");
- rf_PrintFullAccessStripeMap(new_asm_h[0], 1);
- }
- if (new_asm_h[1]) {
- printf("Second asm:\n");
- rf_PrintFullAccessStripeMap(new_asm_h[1], 1);
- }
- }
-}
-
-
-/* adjusts the offset and number of sectors in the destination pda so that
- * it covers at most the region of the SU covered by the source PDA. This
- * is exclusively a restriction: the number of sectors indicated by the
- * target PDA can only shrink.
- *
- * For example: s = sectors within SU indicated by source PDA
- * d = sectors within SU indicated by dest PDA
- * r = results, stored in dest PDA
- *
- * |--------------- one stripe unit ---------------------|
- * | sssssssssssssssssssssssssssssssss |
- * | ddddddddddddddddddddddddddddddddddddddddddddd |
- * | rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr |
- *
- * Another example:
- *
- * |--------------- one stripe unit ---------------------|
- * | sssssssssssssssssssssssssssssssss |
- * | ddddddddddddddddddddddd |
- * | rrrrrrrrrrrrrrrr |
- *
- */
-void
-rf_RangeRestrictPDA(
- RF_Raid_t * raidPtr,
- RF_PhysDiskAddr_t * src,
- RF_PhysDiskAddr_t * dest,
- int dobuffer,
- int doraidaddr)
-{
- RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
- RF_SectorNum_t soffs = rf_StripeUnitOffset(layoutPtr, src->startSector);
- RF_SectorNum_t doffs = rf_StripeUnitOffset(layoutPtr, dest->startSector);
- RF_SectorNum_t send = rf_StripeUnitOffset(layoutPtr, src->startSector + src->numSector - 1); /* use -1 to be sure we
- * stay within SU */
- RF_SectorNum_t dend = rf_StripeUnitOffset(layoutPtr, dest->startSector + dest->numSector - 1);
- RF_SectorNum_t subAddr = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, dest->startSector); /* stripe unit boundary */
-
- dest->startSector = subAddr + RF_MAX(soffs, doffs);
- dest->numSector = subAddr + RF_MIN(send, dend) + 1 - dest->startSector;
-
- if (dobuffer)
- dest->bufPtr += (soffs > doffs) ? rf_RaidAddressToByte(raidPtr, soffs - doffs) : 0;
- if (doraidaddr) {
- dest->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, dest->raidAddress) +
- rf_StripeUnitOffset(layoutPtr, dest->startSector);
- }
-}
-/*
- * Want the highest of these primes to be the largest one
- * less than the max expected number of columns (won't hurt
- * to be too small or too large, but won't be optimal, either)
- * --jimz
- */
-#define NLOWPRIMES 8
-static int lowprimes[NLOWPRIMES] = {2, 3, 5, 7, 11, 13, 17, 19};
-/*****************************************************************************
- * compute the workload shift factor. (chained declustering)
- *
- * return nonzero if access should shift to secondary, otherwise,
- * access is to primary
- *****************************************************************************/
-int
-rf_compute_workload_shift(
- RF_Raid_t * raidPtr,
- RF_PhysDiskAddr_t * pda)
-{
- /*
- * variables:
- * d = column of disk containing primary
- * f = column of failed disk
- * n = number of disks in array
- * sd = "shift distance" (number of columns that d is to the right of f)
- * row = row of array the access is in
- * v = numerator of redirection ratio
- * k = denominator of redirection ratio
- */
- RF_RowCol_t d, f, sd, row, n;
- int k, v, ret, i;
-
- row = pda->row;
- n = raidPtr->numCol;
-
- /* assign column of primary copy to d */
- d = pda->col;
-
- /* assign column of dead disk to f */
- for (f = 0; ((!RF_DEAD_DISK(raidPtr->Disks[row][f].status)) && (f < n)); f++);
-
- RF_ASSERT(f < n);
- RF_ASSERT(f != d);
-
- sd = (f > d) ? (n + d - f) : (d - f);
- RF_ASSERT(sd < n);
-
- /*
- * v of every k accesses should be redirected
- *
- * v/k := (n-1-sd)/(n-1)
- */
- v = (n - 1 - sd);
- k = (n - 1);
-
-#if 1
- /*
- * XXX
- * Is this worth it?
- *
- * Now reduce the fraction, by repeatedly factoring
- * out primes (just like they teach in elementary school!)
- */
- for (i = 0; i < NLOWPRIMES; i++) {
- if (lowprimes[i] > v)
- break;
- while (((v % lowprimes[i]) == 0) && ((k % lowprimes[i]) == 0)) {
- v /= lowprimes[i];
- k /= lowprimes[i];
- }
- }
-#endif
-
- raidPtr->hist_diskreq[row][d]++;
- if (raidPtr->hist_diskreq[row][d] > v) {
- ret = 0; /* do not redirect */
- } else {
- ret = 1; /* redirect */
- }
-
-#if 0
- printf("d=%d f=%d sd=%d v=%d k=%d ret=%d h=%d\n", d, f, sd, v, k, ret,
- raidPtr->hist_diskreq[row][d]);
-#endif
-
- if (raidPtr->hist_diskreq[row][d] >= k) {
- /* reset counter */
- raidPtr->hist_diskreq[row][d] = 0;
- }
- return (ret);
-}
-/*
- * Disk selection routines
- */
-
-/*
- * Selects the disk with the shortest queue from a mirror pair.
- * Both the disk I/Os queued in RAIDframe as well as those at the physical
- * disk are counted as members of the "queue"
- */
-void
-rf_SelectMirrorDiskIdle(RF_DagNode_t * node)
-{
- RF_Raid_t *raidPtr = (RF_Raid_t *) node->dagHdr->raidPtr;
- RF_RowCol_t rowData, colData, rowMirror, colMirror;
- int dataQueueLength, mirrorQueueLength, usemirror;
- RF_PhysDiskAddr_t *data_pda = (RF_PhysDiskAddr_t *) node->params[0].p;
- RF_PhysDiskAddr_t *mirror_pda = (RF_PhysDiskAddr_t *) node->params[4].p;
- RF_PhysDiskAddr_t *tmp_pda;
- RF_RaidDisk_t **disks = raidPtr->Disks;
- RF_DiskQueue_t **dqs = raidPtr->Queues, *dataQueue, *mirrorQueue;
-
- /* return the [row col] of the disk with the shortest queue */
- rowData = data_pda->row;
- colData = data_pda->col;
- rowMirror = mirror_pda->row;
- colMirror = mirror_pda->col;
- dataQueue = &(dqs[rowData][colData]);
- mirrorQueue = &(dqs[rowMirror][colMirror]);
-
-#ifdef RF_LOCK_QUEUES_TO_READ_LEN
- RF_LOCK_QUEUE_MUTEX(dataQueue, "SelectMirrorDiskIdle");
-#endif /* RF_LOCK_QUEUES_TO_READ_LEN */
- dataQueueLength = dataQueue->queueLength + dataQueue->numOutstanding;
-#ifdef RF_LOCK_QUEUES_TO_READ_LEN
- RF_UNLOCK_QUEUE_MUTEX(dataQueue, "SelectMirrorDiskIdle");
- RF_LOCK_QUEUE_MUTEX(mirrorQueue, "SelectMirrorDiskIdle");
-#endif /* RF_LOCK_QUEUES_TO_READ_LEN */
- mirrorQueueLength = mirrorQueue->queueLength + mirrorQueue->numOutstanding;
-#ifdef RF_LOCK_QUEUES_TO_READ_LEN
- RF_UNLOCK_QUEUE_MUTEX(mirrorQueue, "SelectMirrorDiskIdle");
-#endif /* RF_LOCK_QUEUES_TO_READ_LEN */
-
- usemirror = 0;
- if (RF_DEAD_DISK(disks[rowMirror][colMirror].status)) {
- usemirror = 0;
- } else
- if (RF_DEAD_DISK(disks[rowData][colData].status)) {
- usemirror = 1;
- } else
- if (raidPtr->parity_good == RF_RAID_DIRTY) {
- /* Trust only the main disk */
- usemirror = 0;
- } else
- if (dataQueueLength < mirrorQueueLength) {
- usemirror = 0;
- } else
- if (mirrorQueueLength < dataQueueLength) {
- usemirror = 1;
- } else {
- /* queues are equal length. attempt
- * cleverness. */
- if (SNUM_DIFF(dataQueue->last_deq_sector, data_pda->startSector)
- <= SNUM_DIFF(mirrorQueue->last_deq_sector, mirror_pda->startSector)) {
- usemirror = 0;
- } else {
- usemirror = 1;
- }
- }
-
- if (usemirror) {
- /* use mirror (parity) disk, swap params 0 & 4 */
- tmp_pda = data_pda;
- node->params[0].p = mirror_pda;
- node->params[4].p = tmp_pda;
- } else {
- /* use data disk, leave param 0 unchanged */
- }
- /* printf("dataQueueLength %d, mirrorQueueLength
- * %d\n",dataQueueLength, mirrorQueueLength); */
-}
-/*
- * Do simple partitioning. This assumes that
- * the data and parity disks are laid out identically.
- */
-void
-rf_SelectMirrorDiskPartition(RF_DagNode_t * node)
-{
- RF_Raid_t *raidPtr = (RF_Raid_t *) node->dagHdr->raidPtr;
- RF_RowCol_t rowData, colData, rowMirror, colMirror;
- RF_PhysDiskAddr_t *data_pda = (RF_PhysDiskAddr_t *) node->params[0].p;
- RF_PhysDiskAddr_t *mirror_pda = (RF_PhysDiskAddr_t *) node->params[4].p;
- RF_PhysDiskAddr_t *tmp_pda;
- RF_RaidDisk_t **disks = raidPtr->Disks;
- RF_DiskQueue_t **dqs = raidPtr->Queues, *dataQueue, *mirrorQueue;
- int usemirror;
-
- /* return the [row col] of the disk with the shortest queue */
- rowData = data_pda->row;
- colData = data_pda->col;
- rowMirror = mirror_pda->row;
- colMirror = mirror_pda->col;
- dataQueue = &(dqs[rowData][colData]);
- mirrorQueue = &(dqs[rowMirror][colMirror]);
-
- usemirror = 0;
- if (RF_DEAD_DISK(disks[rowMirror][colMirror].status)) {
- usemirror = 0;
- } else
- if (RF_DEAD_DISK(disks[rowData][colData].status)) {
- usemirror = 1;
- } else
- if (raidPtr->parity_good == RF_RAID_DIRTY) {
- /* Trust only the main disk */
- usemirror = 0;
- } else
- if (data_pda->startSector <
- (disks[rowData][colData].numBlocks / 2)) {
- usemirror = 0;
- } else {
- usemirror = 1;
- }
-
- if (usemirror) {
- /* use mirror (parity) disk, swap params 0 & 4 */
- tmp_pda = data_pda;
- node->params[0].p = mirror_pda;
- node->params[4].p = tmp_pda;
- } else {
- /* use data disk, leave param 0 unchanged */
- }
-}
diff --git a/sys/dev/raidframe/rf_dagutils.h b/sys/dev/raidframe/rf_dagutils.h
deleted file mode 100644
index bad2c76..0000000
--- a/sys/dev/raidframe/rf_dagutils.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_dagutils.h,v 1.3 1999/02/05 00:06:08 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland, William V. Courtright II
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*************************************************************************
- *
- * rf_dagutils.h -- header file for utility routines for manipulating DAGs
- *
- *************************************************************************/
-
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_dagfuncs.h>
-#include <dev/raidframe/rf_general.h>
-
-#ifndef _RF__RF_DAGUTILS_H_
-#define _RF__RF_DAGUTILS_H_
-
-struct RF_RedFuncs_s {
- int (*regular) (RF_DagNode_t *);
- char *RegularName;
- int (*simple) (RF_DagNode_t *);
- char *SimpleName;
-};
-
-extern RF_RedFuncs_t rf_xorFuncs;
-extern RF_RedFuncs_t rf_xorRecoveryFuncs;
-
-void
-rf_InitNode(RF_DagNode_t * node, RF_NodeStatus_t initstatus,
- int commit,
- int (*doFunc) (RF_DagNode_t * node),
- int (*undoFunc) (RF_DagNode_t * node),
- int (*wakeFunc) (RF_DagNode_t * node, int status),
- int nSucc, int nAnte, int nParam, int nResult,
- RF_DagHeader_t * hdr, char *name, RF_AllocListElem_t * alist);
-
- void rf_FreeDAG(RF_DagHeader_t * dag_h);
-
- RF_PropHeader_t *rf_MakePropListEntry(RF_DagHeader_t * dag_h, int resultNum,
- int paramNum, RF_PropHeader_t * next, RF_AllocListElem_t * allocList);
-
- int rf_ConfigureDAGs(RF_ShutdownList_t ** listp);
-
- RF_DagHeader_t *rf_AllocDAGHeader(void);
-
- void rf_FreeDAGHeader(RF_DagHeader_t * dh);
-
- void *rf_AllocBuffer(RF_Raid_t * raidPtr, RF_DagHeader_t * dag_h,
- RF_PhysDiskAddr_t * pda, RF_AllocListElem_t * allocList);
-
- char *rf_NodeStatusString(RF_DagNode_t * node);
-
- void rf_PrintNodeInfoString(RF_DagNode_t * node);
-
- int rf_AssignNodeNums(RF_DagHeader_t * dag_h);
-
- int rf_RecurAssignNodeNums(RF_DagNode_t * node, int num, int unvisited);
-
- void rf_ResetDAGHeaderPointers(RF_DagHeader_t * dag_h, RF_DagHeader_t * newptr);
-
- void rf_RecurResetDAGHeaderPointers(RF_DagNode_t * node, RF_DagHeader_t * newptr);
-
- void rf_PrintDAGList(RF_DagHeader_t * dag_h);
-
- int rf_ValidateDAG(RF_DagHeader_t * dag_h);
-
- void rf_redirect_asm(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap);
-
- void rf_MapUnaccessedPortionOfStripe(RF_Raid_t * raidPtr,
- RF_RaidLayout_t * layoutPtr,
- RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h,
- RF_AccessStripeMapHeader_t ** new_asm_h, int *nRodNodes, char **sosBuffer,
- char **eosBuffer, RF_AllocListElem_t * allocList);
-
- int rf_PDAOverlap(RF_RaidLayout_t * layoutPtr, RF_PhysDiskAddr_t * src,
- RF_PhysDiskAddr_t * dest);
-
- void rf_GenerateFailedAccessASMs(RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap, RF_PhysDiskAddr_t * failedPDA,
- RF_DagHeader_t * dag_h, RF_AccessStripeMapHeader_t ** new_asm_h,
- int *nXorBufs, char **rpBufPtr, char *overlappingPDAs,
- RF_AllocListElem_t * allocList);
-
-/* flags used by RangeRestrictPDA */
-#define RF_RESTRICT_NOBUFFER 0
-#define RF_RESTRICT_DOBUFFER 1
-
- void rf_RangeRestrictPDA(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * src,
- RF_PhysDiskAddr_t * dest, int dobuffer, int doraidaddr);
-
- int rf_compute_workload_shift(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * pda);
- void rf_SelectMirrorDiskIdle(RF_DagNode_t * node);
- void rf_SelectMirrorDiskPartition(RF_DagNode_t * node);
-
-#endif /* !_RF__RF_DAGUTILS_H_ */
diff --git a/sys/dev/raidframe/rf_debugMem.c b/sys/dev/raidframe/rf_debugMem.c
deleted file mode 100644
index a138021..0000000
--- a/sys/dev/raidframe/rf_debugMem.c
+++ /dev/null
@@ -1,208 +0,0 @@
-/* $NetBSD: rf_debugMem.c,v 1.7 2000/01/07 03:40:59 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Daniel Stodolsky, Mark Holland, Jim Zelenka
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/* debugMem.c: memory usage debugging stuff.
- * Malloc, Calloc, and Free are #defined everywhere
- * to do_malloc, do_calloc, and do_free.
- *
- * if RF_UTILITY is nonzero, it means were compiling one of the
- * raidframe utility programs, such as rfctrl or smd. In this
- * case, we eliminate all references to the threads package
- * and to the allocation list stuff.
- */
-
-#include <dev/raidframe/rf_types.h>
-
-#include <dev/raidframe/rf_threadstuff.h>
-#include <dev/raidframe/rf_options.h>
-#include <dev/raidframe/rf_debugMem.h>
-#include <dev/raidframe/rf_general.h>
-
-#if defined(__FreeBSD__)
-#include <sys/kernel.h>
-MALLOC_DEFINE(M_RAIDFRAME, "rfbuf", "Buffers for RAIDframe operation");
-#endif
-
-static long tot_mem_in_use = 0;
-
-/* Hash table of information about memory allocations */
-#define RF_MH_TABLESIZE 1000
-
-struct mh_struct {
- void *address;
- int size;
- int line;
- char *filen;
- char allocated;
- struct mh_struct *next;
-};
-static struct mh_struct *mh_table[RF_MH_TABLESIZE];
-RF_DECLARE_MUTEX(rf_debug_mem_mutex)
- static int mh_table_initialized = 0;
-
- static void memory_hash_insert(void *addr, int size, int line, char *filen);
- static int memory_hash_remove(void *addr, int sz);
-
-void
-rf_record_malloc(p, size, line, filen)
- void *p;
- int size, line;
- char *filen;
-{
- RF_ASSERT(size != 0);
-
- /* RF_LOCK_MUTEX(rf_debug_mem_mutex); */
- memory_hash_insert(p, size, line, filen);
- tot_mem_in_use += size;
- /* RF_UNLOCK_MUTEX(rf_debug_mem_mutex); */
- if ((long) p == rf_memDebugAddress) {
- printf("Allocate: debug address allocated from line %d file %s\n", line, filen);
- }
-}
-
-void
-rf_unrecord_malloc(p, sz)
- void *p;
- int sz;
-{
- int size;
-
- /* RF_LOCK_MUTEX(rf_debug_mem_mutex); */
- size = memory_hash_remove(p, sz);
- tot_mem_in_use -= size;
- /* RF_UNLOCK_MUTEX(rf_debug_mem_mutex); */
- if ((long) p == rf_memDebugAddress) {
- printf("Free: Found debug address\n"); /* this is really only a
- * flag line for gdb */
- }
-}
-
-void
-rf_print_unfreed()
-{
- int i, foundone = 0;
- struct mh_struct *p;
-
- for (i = 0; i < RF_MH_TABLESIZE; i++) {
- for (p = mh_table[i]; p; p = p->next)
- if (p->allocated) {
- if (!foundone)
- printf("\n\nThere are unfreed memory locations at program shutdown:\n");
- foundone = 1;
- printf("Addr 0x%lx Size %d line %d file %s\n",
- (long) p->address, p->size, p->line, p->filen);
- }
- }
- if (tot_mem_in_use) {
- printf("%ld total bytes in use\n", tot_mem_in_use);
- }
-}
-
-int
-rf_ConfigureDebugMem(listp)
- RF_ShutdownList_t **listp;
-{
- int i, rc;
-
- rc = rf_create_managed_mutex(listp, &rf_debug_mem_mutex);
- if (rc) {
- RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- return (rc);
- }
- if (rf_memDebug) {
- for (i = 0; i < RF_MH_TABLESIZE; i++)
- mh_table[i] = NULL;
- mh_table_initialized = 1;
- }
- return (0);
-}
-#define HASHADDR(_a_) ( (((unsigned long) _a_)>>3) % RF_MH_TABLESIZE )
-
-static void
-memory_hash_insert(addr, size, line, filen)
- void *addr;
- int size, line;
- char *filen;
-{
- unsigned long bucket = HASHADDR(addr);
- struct mh_struct *p;
-
- RF_ASSERT(mh_table_initialized);
-
- /* search for this address in the hash table */
- for (p = mh_table[bucket]; p && (p->address != addr); p = p->next);
- if (!p) {
- RF_Malloc(p, sizeof(struct mh_struct), (struct mh_struct *));
- RF_ASSERT(p);
- p->next = mh_table[bucket];
- mh_table[bucket] = p;
- p->address = addr;
- p->allocated = 0;
- }
- if (p->allocated) {
- printf("ERROR: reallocated address 0x%lx from line %d, file %s without intervening free\n", (long) addr, line, filen);
- printf(" last allocated from line %d file %s\n", p->line, p->filen);
- RF_ASSERT(0);
- }
- p->size = size;
- p->line = line;
- p->filen = filen;
- p->allocated = 1;
-}
-
-static int
-memory_hash_remove(addr, sz)
- void *addr;
- int sz;
-{
- unsigned long bucket = HASHADDR(addr);
- struct mh_struct *p;
-
- RF_ASSERT(mh_table_initialized);
- for (p = mh_table[bucket]; p && (p->address != addr); p = p->next);
- if (!p) {
- printf("ERROR: freeing never-allocated address 0x%lx\n", (long) addr);
- RF_PANIC();
- }
- if (!p->allocated) {
- printf("ERROR: freeing unallocated address 0x%lx. Last allocation line %d file %s\n", (long) addr, p->line, p->filen);
- RF_PANIC();
- }
- if (sz > 0 && p->size != sz) { /* you can suppress this error by
- * using a negative value as the size
- * to free */
- printf("ERROR: incorrect size at free for address 0x%lx: is %d should be %d. Alloc at line %d of file %s\n", (unsigned long) addr, sz, p->size, p->line, p->filen);
- RF_PANIC();
- }
- p->allocated = 0;
- return (p->size);
-}
diff --git a/sys/dev/raidframe/rf_debugMem.h b/sys/dev/raidframe/rf_debugMem.h
deleted file mode 100644
index e6d8c60..0000000
--- a/sys/dev/raidframe/rf_debugMem.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_debugMem.h,v 1.7 1999/09/05 01:58:11 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Daniel Stodolsky, Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*
- * rf_debugMem.h -- memory leak debugging module
- *
- * IMPORTANT: if you put the lock/unlock mutex stuff back in here, you
- * need to take it out of the routines in debugMem.c
- *
- */
-
-#ifndef _RF__RF_DEBUGMEM_H_
-#define _RF__RF_DEBUGMEM_H_
-
-#include <dev/raidframe/rf_alloclist.h>
-
-#ifdef _KERNEL
-#include <sys/types.h>
-#include <sys/malloc.h>
-
-#if defined(__FreeBSD__)
-MALLOC_DECLARE(M_RAIDFRAME);
-#endif
-
-#define RF_Malloc(_p_, _size_, _cast_) \
- { \
- _p_ = _cast_ malloc((u_long)_size_, M_RAIDFRAME, M_NOWAIT | M_ZERO); \
- if (_p_ == NULL) panic("out of memory\n"); \
- if (rf_memDebug) rf_record_malloc(_p_, _size_, __LINE__, __FILE__); \
- }
-
-#define RF_MallocAndAdd(__p_, __size_, __cast_, __alist_) \
- { \
- RF_Malloc(__p_, __size_, __cast_); \
- if (__alist_) rf_AddToAllocList(__alist_, __p_, __size_); \
- }
-
-#define RF_Calloc(_p_, _nel_, _elsz_, _cast_) \
- { \
- RF_Malloc( _p_, (_nel_) * (_elsz_), _cast_); \
- }
-
-#define RF_CallocAndAdd(__p,__nel,__elsz,__cast,__alist) \
- { \
- RF_Calloc(__p, __nel, __elsz, __cast); \
- if (__alist) rf_AddToAllocList(__alist, __p, (__nel)*(__elsz)); \
- }
-
-#define RF_Free(_p_, _sz_) \
- { \
- free((void *)(_p_), M_RAIDFRAME); \
- if (rf_memDebug) rf_unrecord_malloc(_p_, (u_int32_t) (_sz_)); \
- }
-
-#endif /* _KERNEL */
-
-void rf_record_malloc(void *p, int size, int line, char *filen);
-void rf_unrecord_malloc(void *p, int sz);
-void rf_print_unfreed(void);
-int rf_ConfigureDebugMem(RF_ShutdownList_t ** listp);
-
-#endif /* !_RF__RF_DEBUGMEM_H_ */
diff --git a/sys/dev/raidframe/rf_debugprint.c b/sys/dev/raidframe/rf_debugprint.c
deleted file mode 100644
index 02adee7..0000000
--- a/sys/dev/raidframe/rf_debugprint.c
+++ /dev/null
@@ -1,136 +0,0 @@
-/* $NetBSD: rf_debugprint.c,v 1.3 1999/02/05 00:06:08 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*
- * Code to do debug printfs. Calls to rf_debug_printf cause the corresponding
- * information to be printed to a circular buffer rather than the screen.
- * The point is to try and minimize the timing variations induced by the
- * printfs, and to capture only the printf's immediately preceding a failure.
- */
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_threadstuff.h>
-#include <dev/raidframe/rf_debugprint.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_options.h>
-
-#include <sys/param.h>
-
-struct RF_Entry_s {
- char *cstring;
- void *a1, *a2, *a3, *a4, *a5, *a6, *a7, *a8;
-};
-/* space for 1k lines */
-#define BUFSHIFT 10
-#define BUFSIZE (1<<BUFSHIFT)
-#define BUFMASK (BUFSIZE-1)
-
-static struct RF_Entry_s rf_debugprint_buf[BUFSIZE];
-static int rf_debugprint_index = 0;
-RF_DECLARE_STATIC_MUTEX(rf_debug_print_mutex)
- int rf_ConfigureDebugPrint(listp)
- RF_ShutdownList_t **listp;
-{
- int rc;
-
- rc = rf_create_managed_mutex(listp, &rf_debug_print_mutex);
- if (rc) {
- RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- return (rc);
- }
- rf_clear_debug_print_buffer();
- return (0);
-}
-
-void
-rf_clear_debug_print_buffer()
-{
- int i;
-
- for (i = 0; i < BUFSIZE; i++)
- rf_debugprint_buf[i].cstring = NULL;
- rf_debugprint_index = 0;
-}
-
-void
-rf_debug_printf(s, a1, a2, a3, a4, a5, a6, a7, a8)
- char *s;
- void *a1, *a2, *a3, *a4, *a5, *a6, *a7, *a8;
-{
- int idx;
-
- if (rf_debugPrintUseBuffer) {
-
- RF_LOCK_MUTEX(rf_debug_print_mutex);
- idx = rf_debugprint_index;
- rf_debugprint_index = (rf_debugprint_index + 1) & BUFMASK;
- RF_UNLOCK_MUTEX(rf_debug_print_mutex);
-
- rf_debugprint_buf[idx].cstring = s;
- rf_debugprint_buf[idx].a1 = a1;
- rf_debugprint_buf[idx].a2 = a2;
- rf_debugprint_buf[idx].a3 = a3;
- rf_debugprint_buf[idx].a4 = a4;
- rf_debugprint_buf[idx].a5 = a5;
- rf_debugprint_buf[idx].a6 = a6;
- rf_debugprint_buf[idx].a7 = a7;
- rf_debugprint_buf[idx].a8 = a8;
- } else {
- printf(s, a1, a2, a3, a4, a5, a6, a7, a8);
- }
-}
-
-void
-rf_print_debug_buffer()
-{
- rf_spill_debug_buffer(NULL);
-}
-
-void
-rf_spill_debug_buffer(fname)
- char *fname;
-{
- int i;
-
- if (!rf_debugPrintUseBuffer)
- return;
-
- RF_LOCK_MUTEX(rf_debug_print_mutex);
-
- for (i = rf_debugprint_index + 1; i != rf_debugprint_index; i = (i + 1) & BUFMASK)
- if (rf_debugprint_buf[i].cstring)
- printf(rf_debugprint_buf[i].cstring, rf_debugprint_buf[i].a1, rf_debugprint_buf[i].a2, rf_debugprint_buf[i].a3,
- rf_debugprint_buf[i].a4, rf_debugprint_buf[i].a5, rf_debugprint_buf[i].a6, rf_debugprint_buf[i].a7, rf_debugprint_buf[i].a8);
- printf(rf_debugprint_buf[i].cstring, rf_debugprint_buf[i].a1, rf_debugprint_buf[i].a2, rf_debugprint_buf[i].a3,
- rf_debugprint_buf[i].a4, rf_debugprint_buf[i].a5, rf_debugprint_buf[i].a6, rf_debugprint_buf[i].a7, rf_debugprint_buf[i].a8);
- RF_UNLOCK_MUTEX(rf_debug_print_mutex);
-}
diff --git a/sys/dev/raidframe/rf_debugprint.h b/sys/dev/raidframe/rf_debugprint.h
deleted file mode 100644
index 318f620..0000000
--- a/sys/dev/raidframe/rf_debugprint.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_debugprint.h,v 1.3 1999/02/05 00:06:08 oster Exp $ */
-/*
- * rf_debugprint.h
- */
-/*
- * Copyright (c) 1996 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#ifndef _RF__RF_DEBUGPRINT_H_
-#define _RF__RF_DEBUGPRINT_H_
-
-int rf_ConfigureDebugPrint(RF_ShutdownList_t ** listp);
-void rf_clear_debug_print_buffer(void);
-void
-rf_debug_printf(char *s, void *a1, void *a2, void *a3, void *a4,
- void *a5, void *a6, void *a7, void *a8);
-void rf_print_debug_buffer(void);
-void rf_spill_debug_buffer(char *fname);
-
-#endif /* !_RF__RF_DEBUGPRINT_H_ */
diff --git a/sys/dev/raidframe/rf_decluster.c b/sys/dev/raidframe/rf_decluster.c
deleted file mode 100644
index 646a5ad..0000000
--- a/sys/dev/raidframe/rf_decluster.c
+++ /dev/null
@@ -1,747 +0,0 @@
-/* $NetBSD: rf_decluster.c,v 1.6 2001/01/26 04:40:03 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*----------------------------------------------------------------------
- *
- * rf_decluster.c -- code related to the declustered layout
- *
- * Created 10-21-92 (MCH)
- *
- * Nov 93: adding support for distributed sparing. This code is a little
- * complex: the basic layout used is as follows:
- * let F = (v-1)/GCD(r,v-1). The spare space for each set of
- * F consecutive fulltables is grouped together and placed after
- * that set of tables.
- * +------------------------------+
- * | F fulltables |
- * | Spare Space |
- * | F fulltables |
- * | Spare Space |
- * | ... |
- * +------------------------------+
- *
- *--------------------------------------------------------------------*/
-
-#include <dev/raidframe/rf_archs.h>
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_raidframe.h>
-#include <dev/raidframe/rf_configure.h>
-#include <dev/raidframe/rf_decluster.h>
-#include <dev/raidframe/rf_debugMem.h>
-#include <dev/raidframe/rf_utils.h>
-#include <dev/raidframe/rf_alloclist.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_shutdown.h>
-
-
-extern int rf_copyback_in_progress; /* debug only */
-
-/* found in rf_kintf.c */
-extern int rf_GetSpareTableFromDaemon(RF_SparetWait_t * req);
-
-#if (RF_INCLUDE_PARITY_DECLUSTERING > 0) || (RF_INCLUDE_PARITY_DECLUSTERING_PQ > 0)
-
-/* configuration code */
-
-int
-rf_ConfigureDeclustered(
- RF_ShutdownList_t ** listp,
- RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr)
-{
- RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
- int b, v, k, r, lambda; /* block design params */
- int i, j;
- RF_RowCol_t *first_avail_slot;
- RF_StripeCount_t complete_FT_count, numCompleteFullTablesPerDisk;
- RF_DeclusteredConfigInfo_t *info;
- RF_StripeCount_t PUsPerDisk, spareRegionDepthInPUs, numCompleteSpareRegionsPerDisk,
- extraPUsPerDisk;
- RF_StripeCount_t totSparePUsPerDisk;
- RF_SectorNum_t diskOffsetOfLastFullTableInSUs;
- RF_SectorCount_t SpareSpaceInSUs;
- char *cfgBuf = (char *) (cfgPtr->layoutSpecific);
- RF_StripeNum_t l, SUID;
-
- SUID = l = 0;
- numCompleteSpareRegionsPerDisk = 0;
-
- /* 1. create layout specific structure */
- RF_MallocAndAdd(info, sizeof(RF_DeclusteredConfigInfo_t), (RF_DeclusteredConfigInfo_t *), raidPtr->cleanupList);
- if (info == NULL)
- return (ENOMEM);
- layoutPtr->layoutSpecificInfo = (void *) info;
- info->SpareTable = NULL;
-
- /* 2. extract parameters from the config structure */
- if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) {
- (void) bcopy(cfgBuf, info->sparemap_fname, RF_SPAREMAP_NAME_LEN);
- }
- cfgBuf += RF_SPAREMAP_NAME_LEN;
-
- b = *((int *) cfgBuf);
- cfgBuf += sizeof(int);
- v = *((int *) cfgBuf);
- cfgBuf += sizeof(int);
- k = *((int *) cfgBuf);
- cfgBuf += sizeof(int);
- r = *((int *) cfgBuf);
- cfgBuf += sizeof(int);
- lambda = *((int *) cfgBuf);
- cfgBuf += sizeof(int);
- raidPtr->noRotate = *((int *) cfgBuf);
- cfgBuf += sizeof(int);
-
- /* the sparemaps are generated assuming that parity is rotated, so we
- * issue a warning if both distributed sparing and no-rotate are on at
- * the same time */
- if ((layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) && raidPtr->noRotate) {
- RF_ERRORMSG("Warning: distributed sparing specified without parity rotation.\n");
- }
- if (raidPtr->numCol != v) {
- RF_ERRORMSG2("RAID: config error: table element count (%d) not equal to no. of cols (%d)\n", v, raidPtr->numCol);
- return (EINVAL);
- }
- /* 3. set up the values used in the mapping code */
- info->BlocksPerTable = b;
- info->Lambda = lambda;
- info->NumParityReps = info->groupSize = k;
- info->SUsPerTable = b * (k - 1) * layoutPtr->SUsPerPU; /* b blks, k-1 SUs each */
- info->SUsPerFullTable = k * info->SUsPerTable; /* rot k times */
- info->PUsPerBlock = k - 1;
- info->SUsPerBlock = info->PUsPerBlock * layoutPtr->SUsPerPU;
- info->TableDepthInPUs = (b * k) / v;
- info->FullTableDepthInPUs = info->TableDepthInPUs * k; /* k repetitions */
-
- /* used only in distributed sparing case */
- info->FullTablesPerSpareRegion = (v - 1) / rf_gcd(r, v - 1); /* (v-1)/gcd fulltables */
- info->TablesPerSpareRegion = k * info->FullTablesPerSpareRegion;
- info->SpareSpaceDepthPerRegionInSUs = (r * info->TablesPerSpareRegion / (v - 1)) * layoutPtr->SUsPerPU;
-
- /* check to make sure the block design is sufficiently small */
- if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
- if (info->FullTableDepthInPUs * layoutPtr->SUsPerPU + info->SpareSpaceDepthPerRegionInSUs > layoutPtr->stripeUnitsPerDisk) {
- RF_ERRORMSG3("RAID: config error: Full Table depth (%d) + Spare Space (%d) larger than disk size (%d) (BD too big)\n",
- (int) info->FullTableDepthInPUs,
- (int) info->SpareSpaceDepthPerRegionInSUs,
- (int) layoutPtr->stripeUnitsPerDisk);
- return (EINVAL);
- }
- } else {
- if (info->TableDepthInPUs * layoutPtr->SUsPerPU > layoutPtr->stripeUnitsPerDisk) {
- RF_ERRORMSG2("RAID: config error: Table depth (%d) larger than disk size (%d) (BD too big)\n",
- (int) (info->TableDepthInPUs * layoutPtr->SUsPerPU), \
- (int) layoutPtr->stripeUnitsPerDisk);
- return (EINVAL);
- }
- }
-
-
- /* compute the size of each disk, and the number of tables in the last
- * fulltable (which need not be complete) */
- if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
-
- PUsPerDisk = layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU;
- spareRegionDepthInPUs = (info->TablesPerSpareRegion * info->TableDepthInPUs +
- (info->TablesPerSpareRegion * info->TableDepthInPUs) / (v - 1));
- info->SpareRegionDepthInSUs = spareRegionDepthInPUs * layoutPtr->SUsPerPU;
-
- numCompleteSpareRegionsPerDisk = PUsPerDisk / spareRegionDepthInPUs;
- info->NumCompleteSRs = numCompleteSpareRegionsPerDisk;
- extraPUsPerDisk = PUsPerDisk % spareRegionDepthInPUs;
-
- /* assume conservatively that we need the full amount of spare
- * space in one region in order to provide spares for the
- * partial spare region at the end of the array. We set "i"
- * to the number of tables in the partial spare region. This
- * may actually include some fulltables. */
- extraPUsPerDisk -= (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU);
- if (extraPUsPerDisk <= 0)
- i = 0;
- else
- i = extraPUsPerDisk / info->TableDepthInPUs;
-
- complete_FT_count = raidPtr->numRow * (numCompleteSpareRegionsPerDisk * (info->TablesPerSpareRegion / k) + i / k);
- info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable;
- info->ExtraTablesPerDisk = i % k;
-
- /* note that in the last spare region, the spare space is
- * complete even though data/parity space is not */
- totSparePUsPerDisk = (numCompleteSpareRegionsPerDisk + 1) * (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU);
- info->TotSparePUsPerDisk = totSparePUsPerDisk;
-
- layoutPtr->stripeUnitsPerDisk =
- ((complete_FT_count / raidPtr->numRow) * info->FullTableDepthInPUs + /* data & parity space */
- info->ExtraTablesPerDisk * info->TableDepthInPUs +
- totSparePUsPerDisk /* spare space */
- ) * layoutPtr->SUsPerPU;
- layoutPtr->dataStripeUnitsPerDisk =
- (complete_FT_count * info->FullTableDepthInPUs + info->ExtraTablesPerDisk * info->TableDepthInPUs)
- * layoutPtr->SUsPerPU * (k - 1) / k;
-
- } else {
- /* non-dist spare case: force each disk to contain an
- * integral number of tables */
- layoutPtr->stripeUnitsPerDisk /= (info->TableDepthInPUs * layoutPtr->SUsPerPU);
- layoutPtr->stripeUnitsPerDisk *= (info->TableDepthInPUs * layoutPtr->SUsPerPU);
-
- /* compute the number of tables in the last fulltable, which
- * need not be complete */
- complete_FT_count =
- ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) / info->FullTableDepthInPUs) * raidPtr->numRow;
-
- info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable;
- info->ExtraTablesPerDisk =
- ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) / info->TableDepthInPUs) % k;
- }
-
- raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
-
- /* find the disk offset of the stripe unit where the last fulltable
- * starts */
- numCompleteFullTablesPerDisk = complete_FT_count / raidPtr->numRow;
- diskOffsetOfLastFullTableInSUs = numCompleteFullTablesPerDisk * info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
- if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
- SpareSpaceInSUs = numCompleteSpareRegionsPerDisk * info->SpareSpaceDepthPerRegionInSUs;
- diskOffsetOfLastFullTableInSUs += SpareSpaceInSUs;
- info->DiskOffsetOfLastSpareSpaceChunkInSUs =
- diskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU;
- }
- info->DiskOffsetOfLastFullTableInSUs = diskOffsetOfLastFullTableInSUs;
- info->numCompleteFullTablesPerDisk = numCompleteFullTablesPerDisk;
-
- /* 4. create and initialize the lookup tables */
- info->LayoutTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
- if (info->LayoutTable == NULL)
- return (ENOMEM);
- info->OffsetTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
- if (info->OffsetTable == NULL)
- return (ENOMEM);
- info->BlockTable = rf_make_2d_array(info->TableDepthInPUs * layoutPtr->SUsPerPU, raidPtr->numCol, raidPtr->cleanupList);
- if (info->BlockTable == NULL)
- return (ENOMEM);
-
- first_avail_slot = rf_make_1d_array(v, NULL);
- if (first_avail_slot == NULL)
- return (ENOMEM);
-
- for (i = 0; i < b; i++)
- for (j = 0; j < k; j++)
- info->LayoutTable[i][j] = *cfgBuf++;
-
- /* initialize offset table */
- for (i = 0; i < b; i++)
- for (j = 0; j < k; j++) {
- info->OffsetTable[i][j] = first_avail_slot[info->LayoutTable[i][j]];
- first_avail_slot[info->LayoutTable[i][j]]++;
- }
-
- /* initialize block table */
- for (SUID = l = 0; l < layoutPtr->SUsPerPU; l++) {
- for (i = 0; i < b; i++) {
- for (j = 0; j < k; j++) {
- info->BlockTable[(info->OffsetTable[i][j] * layoutPtr->SUsPerPU) + l]
- [info->LayoutTable[i][j]] = SUID;
- }
- SUID++;
- }
- }
-
- rf_free_1d_array(first_avail_slot, v);
-
- /* 5. set up the remaining redundant-but-useful parameters */
-
- raidPtr->totalSectors = (k * complete_FT_count + raidPtr->numRow * info->ExtraTablesPerDisk) *
- info->SUsPerTable * layoutPtr->sectorsPerStripeUnit;
- layoutPtr->numStripe = (raidPtr->totalSectors / layoutPtr->sectorsPerStripeUnit) / (k - 1);
-
- /* strange evaluation order below to try and minimize overflow
- * problems */
-
- layoutPtr->dataSectorsPerStripe = (k - 1) * layoutPtr->sectorsPerStripeUnit;
- layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
- layoutPtr->numDataCol = k - 1;
- layoutPtr->numParityCol = 1;
-
- return (0);
-}
-/* declustering with distributed sparing */
-static void rf_ShutdownDeclusteredDS(RF_ThreadArg_t);
-static void
-rf_ShutdownDeclusteredDS(arg)
- RF_ThreadArg_t arg;
-{
- RF_DeclusteredConfigInfo_t *info;
- RF_Raid_t *raidPtr;
-
- raidPtr = (RF_Raid_t *) arg;
- info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
- if (info->SpareTable)
- rf_FreeSpareTable(raidPtr);
-}
-
-int
-rf_ConfigureDeclusteredDS(
- RF_ShutdownList_t ** listp,
- RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr)
-{
- int rc;
-
- rc = rf_ConfigureDeclustered(listp, raidPtr, cfgPtr);
- if (rc)
- return (rc);
- rc = rf_ShutdownCreate(listp, rf_ShutdownDeclusteredDS, raidPtr);
- if (rc) {
- RF_ERRORMSG1("Got %d adding shutdown event for DeclusteredDS\n", rc);
- rf_ShutdownDeclusteredDS(raidPtr);
- return (rc);
- }
- return (0);
-}
-
-void
-rf_MapSectorDeclustered(raidPtr, raidSector, row, col, diskSector, remap)
- RF_Raid_t *raidPtr;
- RF_RaidAddr_t raidSector;
- RF_RowCol_t *row;
- RF_RowCol_t *col;
- RF_SectorNum_t *diskSector;
- int remap;
-{
- RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
- RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
- RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
- RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
- RF_StripeNum_t BlockID, BlockOffset, RepIndex;
- RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
- RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
- RF_StripeNum_t base_suid = 0, outSU, SpareRegion = 0, SpareSpace = 0;
-
- rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
-
- FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array
- * (across rows) */
- if (raidPtr->numRow == 1)
- *row = 0; /* avoid a mod and a div in the common case */
- else {
- *row = FullTableID % raidPtr->numRow;
- FullTableID /= raidPtr->numRow; /* convert to fulltable ID on
- * this disk */
- }
- if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
- SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
- SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
- }
- FullTableOffset = SUID % sus_per_fulltable;
- TableID = FullTableOffset / info->SUsPerTable;
- TableOffset = FullTableOffset - TableID * info->SUsPerTable;
- BlockID = TableOffset / info->PUsPerBlock;
- BlockOffset = TableOffset - BlockID * info->PUsPerBlock;
- BlockID %= info->BlocksPerTable;
- RepIndex = info->PUsPerBlock - TableID;
- if (!raidPtr->noRotate)
- BlockOffset += ((BlockOffset >= RepIndex) ? 1 : 0);
- *col = info->LayoutTable[BlockID][BlockOffset];
-
- /* remap to distributed spare space if indicated */
- if (remap) {
- RF_ASSERT(raidPtr->Disks[*row][*col].status == rf_ds_reconstructing || raidPtr->Disks[*row][*col].status == rf_ds_dist_spared ||
- (rf_copyback_in_progress && raidPtr->Disks[*row][*col].status == rf_ds_optimal));
- rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU);
- } else {
-
- outSU = base_suid;
- outSU += FullTableID * fulltable_depth; /* offs to strt of FT */
- outSU += SpareSpace; /* skip rsvd spare space */
- outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; /* offs to strt of tble */
- outSU += info->OffsetTable[BlockID][BlockOffset] * layoutPtr->SUsPerPU; /* offs to the PU */
- }
- outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); /* offs to the SU within
- * a PU */
-
- /* convert SUs to sectors, and, if not aligned to SU boundary, add in
- * offset to sector. */
- *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit);
-
- RF_ASSERT(*col != -1);
-}
-
-
-/* prototyping this inexplicably causes the compile of the layout table (rf_layout.c) to fail */
-void
-rf_MapParityDeclustered(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t raidSector,
- RF_RowCol_t * row,
- RF_RowCol_t * col,
- RF_SectorNum_t * diskSector,
- int remap)
-{
- RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
- RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
- RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
- RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
- RF_StripeNum_t BlockID, BlockOffset, RepIndex;
- RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
- RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
- RF_StripeNum_t base_suid = 0, outSU, SpareRegion = 0, SpareSpace = 0;
-
- rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
-
- /* compute row & (possibly) spare space exactly as before */
- FullTableID = SUID / sus_per_fulltable;
- if (raidPtr->numRow == 1)
- *row = 0; /* avoid a mod and a div in the common case */
- else {
- *row = FullTableID % raidPtr->numRow;
- FullTableID /= raidPtr->numRow; /* convert to fulltable ID on
- * this disk */
- }
- if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
- SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
- SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
- }
- /* compute BlockID and RepIndex exactly as before */
- FullTableOffset = SUID % sus_per_fulltable;
- TableID = FullTableOffset / info->SUsPerTable;
- TableOffset = FullTableOffset - TableID * info->SUsPerTable;
- /* TableOffset = FullTableOffset % info->SUsPerTable; */
- /* BlockID = (TableOffset / info->PUsPerBlock) %
- * info->BlocksPerTable; */
- BlockID = TableOffset / info->PUsPerBlock;
- /* BlockOffset = TableOffset % info->PUsPerBlock; */
- BlockOffset = TableOffset - BlockID * info->PUsPerBlock;
- BlockID %= info->BlocksPerTable;
-
- /* the parity block is in the position indicated by RepIndex */
- RepIndex = (raidPtr->noRotate) ? info->PUsPerBlock : info->PUsPerBlock - TableID;
- *col = info->LayoutTable[BlockID][RepIndex];
-
- if (remap) {
- RF_ASSERT(raidPtr->Disks[*row][*col].status == rf_ds_reconstructing || raidPtr->Disks[*row][*col].status == rf_ds_dist_spared ||
- (rf_copyback_in_progress && raidPtr->Disks[*row][*col].status == rf_ds_optimal));
- rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU);
- } else {
-
- /* compute sector as before, except use RepIndex instead of
- * BlockOffset */
- outSU = base_suid;
- outSU += FullTableID * fulltable_depth;
- outSU += SpareSpace; /* skip rsvd spare space */
- outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU;
- outSU += info->OffsetTable[BlockID][RepIndex] * layoutPtr->SUsPerPU;
- }
-
- outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock);
- *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit);
-
- RF_ASSERT(*col != -1);
-}
-/* returns an array of ints identifying the disks that comprise the stripe containing the indicated address.
- * the caller must _never_ attempt to modify this array.
- */
-void
-rf_IdentifyStripeDeclustered(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t addr,
- RF_RowCol_t ** diskids,
- RF_RowCol_t * outRow)
-{
- RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
- RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
- RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
- RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
- RF_StripeNum_t base_suid = 0;
- RF_StripeNum_t SUID = rf_RaidAddressToStripeUnitID(layoutPtr, addr);
- RF_StripeNum_t stripeID, FullTableID;
- int tableOffset;
-
- rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
- FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array
- * (across rows) */
- *outRow = FullTableID % raidPtr->numRow;
- stripeID = rf_StripeUnitIDToStripeID(layoutPtr, SUID); /* find stripe offset
- * into array */
- tableOffset = (stripeID % info->BlocksPerTable); /* find offset into
- * block design table */
- *diskids = info->LayoutTable[tableOffset];
-}
-/* This returns the default head-separation limit, which is measured
- * in "required units for reconstruction". Each time a disk fetches
- * a unit, it bumps a counter. The head-sep code prohibits any disk
- * from getting more than headSepLimit counter values ahead of any
- * other.
- *
- * We assume here that the number of floating recon buffers is already
- * set. There are r stripes to be reconstructed in each table, and so
- * if we have a total of B buffers, we can have at most B/r tables
- * under recon at any one time. In each table, lambda units are required
- * from each disk, so given B buffers, the head sep limit has to be
- * (lambda*B)/r units. We subtract one to avoid weird boundary cases.
- *
- * for example, suppose were given 50 buffers, r=19, and lambda=4 as in
- * the 20.5 design. There are 19 stripes/table to be reconstructed, so
- * we can have 50/19 tables concurrently under reconstruction, which means
- * we can allow the fastest disk to get 50/19 tables ahead of the slower
- * disk. There are lambda "required units" for each disk, so the fastest
- * disk can get 4*50/19 = 10 counter values ahead of the slowest.
- *
- * If numBufsToAccumulate is not 1, we need to limit the head sep further
- * because multiple bufs will be required for each stripe under recon.
- */
-RF_HeadSepLimit_t
-rf_GetDefaultHeadSepLimitDeclustered(
- RF_Raid_t * raidPtr)
-{
- RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
-
- return (info->Lambda * raidPtr->numFloatingReconBufs / info->TableDepthInPUs / rf_numBufsToAccumulate);
-}
-/* returns the default number of recon buffers to use. The value
- * is somewhat arbitrary...it's intended to be large enough to allow
- * for a reasonably large head-sep limit, but small enough that you
- * don't use up all your system memory with buffers.
- */
-int
-rf_GetDefaultNumFloatingReconBuffersDeclustered(RF_Raid_t * raidPtr)
-{
- return (100 * rf_numBufsToAccumulate);
-}
-/* sectors in the last fulltable of the array need to be handled
- * specially since this fulltable can be incomplete. this function
- * changes the values of certain params to handle this.
- *
- * the idea here is that MapSector et. al. figure out which disk the
- * addressed unit lives on by computing the modulos of the unit number
- * with the number of units per fulltable, table, etc. In the last
- * fulltable, there are fewer units per fulltable, so we need to adjust
- * the number of user data units per fulltable to reflect this.
- *
- * so, we (1) convert the fulltable size and depth parameters to
- * the size of the partial fulltable at the end, (2) compute the
- * disk sector offset where this fulltable starts, and (3) convert
- * the users stripe unit number from an offset into the array to
- * an offset into the last fulltable.
- */
-void
-rf_decluster_adjust_params(
- RF_RaidLayout_t * layoutPtr,
- RF_StripeNum_t * SUID,
- RF_StripeCount_t * sus_per_fulltable,
- RF_StripeCount_t * fulltable_depth,
- RF_StripeNum_t * base_suid)
-{
- RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
-
- if (*SUID >= info->FullTableLimitSUID) {
- /* new full table size is size of last full table on disk */
- *sus_per_fulltable = info->ExtraTablesPerDisk * info->SUsPerTable;
-
- /* new full table depth is corresponding depth */
- *fulltable_depth = info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU;
-
- /* set up the new base offset */
- *base_suid = info->DiskOffsetOfLastFullTableInSUs;
-
- /* convert users array address to an offset into the last
- * fulltable */
- *SUID -= info->FullTableLimitSUID;
- }
-}
-/*
- * map a stripe ID to a parity stripe ID.
- * See comment above RaidAddressToParityStripeID in layout.c.
- */
-void
-rf_MapSIDToPSIDDeclustered(
- RF_RaidLayout_t * layoutPtr,
- RF_StripeNum_t stripeID,
- RF_StripeNum_t * psID,
- RF_ReconUnitNum_t * which_ru)
-{
- RF_DeclusteredConfigInfo_t *info;
-
- info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
-
- *psID = (stripeID / (layoutPtr->SUsPerPU * info->BlocksPerTable))
- * info->BlocksPerTable + (stripeID % info->BlocksPerTable);
- *which_ru = (stripeID % (info->BlocksPerTable * layoutPtr->SUsPerPU))
- / info->BlocksPerTable;
- RF_ASSERT((*which_ru) < layoutPtr->SUsPerPU / layoutPtr->SUsPerRU);
-}
-/*
- * Called from MapSector and MapParity to retarget an access at the spare unit.
- * Modifies the "col" and "outSU" parameters only.
- */
-void
-rf_remap_to_spare_space(
- RF_RaidLayout_t * layoutPtr,
- RF_DeclusteredConfigInfo_t * info,
- RF_RowCol_t row,
- RF_StripeNum_t FullTableID,
- RF_StripeNum_t TableID,
- RF_SectorNum_t BlockID,
- RF_StripeNum_t base_suid,
- RF_StripeNum_t SpareRegion,
- RF_RowCol_t * outCol,
- RF_StripeNum_t * outSU)
-{
- RF_StripeNum_t ftID, spareTableStartSU, TableInSpareRegion, lastSROffset,
- which_ft;
-
- /*
- * note that FullTableID and hence SpareRegion may have gotten
- * tweaked by rf_decluster_adjust_params. We detect this by
- * noticing that base_suid is not 0.
- */
- if (base_suid == 0) {
- ftID = FullTableID;
- } else {
- /*
- * There may be > 1.0 full tables in the last (i.e. partial)
- * spare region. find out which of these we're in.
- */
- lastSROffset = info->NumCompleteSRs * info->SpareRegionDepthInSUs;
- which_ft = (info->DiskOffsetOfLastFullTableInSUs - lastSROffset) / (info->FullTableDepthInPUs * layoutPtr->SUsPerPU);
-
- /* compute the actual full table ID */
- ftID = info->DiskOffsetOfLastFullTableInSUs / (info->FullTableDepthInPUs * layoutPtr->SUsPerPU) + which_ft;
- SpareRegion = info->NumCompleteSRs;
- }
- TableInSpareRegion = (ftID * info->NumParityReps + TableID) % info->TablesPerSpareRegion;
-
- *outCol = info->SpareTable[TableInSpareRegion][BlockID].spareDisk;
- RF_ASSERT(*outCol != -1);
-
- spareTableStartSU = (SpareRegion == info->NumCompleteSRs) ?
- info->DiskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU :
- (SpareRegion + 1) * info->SpareRegionDepthInSUs - info->SpareSpaceDepthPerRegionInSUs;
- *outSU = spareTableStartSU + info->SpareTable[TableInSpareRegion][BlockID].spareBlockOffsetInSUs;
- if (*outSU >= layoutPtr->stripeUnitsPerDisk) {
- printf("rf_remap_to_spare_space: invalid remapped disk SU offset %ld\n", (long) *outSU);
- }
-}
-
-#endif /* (RF_INCLUDE_PARITY_DECLUSTERING > 0) || (RF_INCLUDE_PARITY_DECLUSTERING_PQ > 0) */
-
-
-int
-rf_InstallSpareTable(
- RF_Raid_t * raidPtr,
- RF_RowCol_t frow,
- RF_RowCol_t fcol)
-{
- RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
- RF_SparetWait_t *req;
- int retcode;
-
- RF_Malloc(req, sizeof(*req), (RF_SparetWait_t *));
- req->C = raidPtr->numCol;
- req->G = raidPtr->Layout.numDataCol + raidPtr->Layout.numParityCol;
- req->fcol = fcol;
- req->SUsPerPU = raidPtr->Layout.SUsPerPU;
- req->TablesPerSpareRegion = info->TablesPerSpareRegion;
- req->BlocksPerTable = info->BlocksPerTable;
- req->TableDepthInPUs = info->TableDepthInPUs;
- req->SpareSpaceDepthPerRegionInSUs = info->SpareSpaceDepthPerRegionInSUs;
-
- retcode = rf_GetSpareTableFromDaemon(req);
- RF_ASSERT(!retcode); /* XXX -- fix this to recover gracefully --
- * XXX */
- return (retcode);
-}
-/*
- * Invoked via ioctl to install a spare table in the kernel.
- */
-int
-rf_SetSpareTable(raidPtr, data)
- RF_Raid_t *raidPtr;
- void *data;
-{
- RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
- RF_SpareTableEntry_t **ptrs;
- int i, retcode;
-
- /* what we need to copyin is a 2-d array, so first copyin the user
- * pointers to the rows in the table */
- RF_Malloc(ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **));
- retcode = copyin((caddr_t) data, (caddr_t) ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *));
-
- if (retcode)
- return (retcode);
-
- /* now allocate kernel space for the row pointers */
- RF_Malloc(info->SpareTable, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **));
-
- /* now allocate kernel space for each row in the table, and copy it in
- * from user space */
- for (i = 0; i < info->TablesPerSpareRegion; i++) {
- RF_Malloc(info->SpareTable[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t), (RF_SpareTableEntry_t *));
- retcode = copyin(ptrs[i], info->SpareTable[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t));
- if (retcode) {
- info->SpareTable = NULL; /* blow off the memory
- * we've allocated */
- return (retcode);
- }
- }
-
- /* free up the temporary array we used */
- RF_Free(ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *));
-
- return (0);
-}
-
-RF_ReconUnitCount_t
-rf_GetNumSpareRUsDeclustered(raidPtr)
- RF_Raid_t *raidPtr;
-{
- RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
-
- return (((RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo)->TotSparePUsPerDisk);
-}
-
-void
-rf_FreeSpareTable(raidPtr)
- RF_Raid_t *raidPtr;
-{
- long i;
- RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
- RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
- RF_SpareTableEntry_t **table = info->SpareTable;
-
- for (i = 0; i < info->TablesPerSpareRegion; i++) {
- RF_Free(table[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t));
- }
- RF_Free(table, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *));
- info->SpareTable = (RF_SpareTableEntry_t **) NULL;
-}
diff --git a/sys/dev/raidframe/rf_decluster.h b/sys/dev/raidframe/rf_decluster.h
deleted file mode 100644
index a630298..0000000
--- a/sys/dev/raidframe/rf_decluster.h
+++ /dev/null
@@ -1,141 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_decluster.h,v 1.3 1999/02/05 00:06:09 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*----------------------------------------------------------------------
- *
- * decluster.h -- header file for declustered layout code
- *
- * Adapted from raidSim version July 1994
- * Created 10-21-92 (MCH)
- *
- *--------------------------------------------------------------------*/
-
-#ifndef _RF__RF_DECLUSTER_H_
-#define _RF__RF_DECLUSTER_H_
-
-#include <dev/raidframe/rf_types.h>
-
-/*
- * These structures define the tables used to locate the spare unit
- * associated with a particular data or parity unit, and to perform
- * the associated inverse mapping.
- */
-struct RF_SpareTableEntry_s {
- u_int spareDisk; /* disk to which this block is spared */
- u_int spareBlockOffsetInSUs; /* offset into spare table for that
- * disk */
-};
-#define RF_SPAREMAP_NAME_LEN 128
-
-/* this is the layout-specific info structure for the declustered layout.
- */
-struct RF_DeclusteredConfigInfo_s {
- RF_StripeCount_t groupSize; /* no. of stripe units per parity
- * stripe */
- RF_RowCol_t **LayoutTable; /* the block design table */
- RF_RowCol_t **OffsetTable; /* the sector offset table */
- RF_RowCol_t **BlockTable; /* the block membership table */
- RF_StripeCount_t SUsPerFullTable; /* stripe units per full table */
- RF_StripeCount_t SUsPerTable; /* stripe units per table */
- RF_StripeCount_t PUsPerBlock; /* parity units per block */
- RF_StripeCount_t SUsPerBlock; /* stripe units per block */
- RF_StripeCount_t BlocksPerTable; /* block design tuples per
- * table */
- RF_StripeCount_t NumParityReps; /* tables per full table */
- RF_StripeCount_t TableDepthInPUs; /* PUs on one disk in 1 table */
- RF_StripeCount_t FullTableDepthInPUs; /* PUs on one disk in 1
- * fulltable */
- RF_StripeCount_t FullTableLimitSUID; /* SU where partial fulltables
- * start */
- RF_StripeCount_t ExtraTablesPerDisk; /* # of tables in last
- * fulltable */
- RF_SectorNum_t DiskOffsetOfLastFullTableInSUs; /* disk offs of partial
- * ft, if any */
- RF_StripeCount_t numCompleteFullTablesPerDisk; /* ft identifier of
- * partial ft, if any */
- u_int Lambda; /* the pair count in the block design */
-
- /* these are used only in the distributed-sparing case */
- RF_StripeCount_t FullTablesPerSpareRegion; /* # of ft's comprising
- * 1 spare region */
- RF_StripeCount_t TablesPerSpareRegion; /* # of tables */
- RF_SectorCount_t SpareSpaceDepthPerRegionInSUs; /* spare
- * space/disk/region */
- RF_SectorCount_t SpareRegionDepthInSUs; /* # of units/disk/region */
- RF_SectorNum_t DiskOffsetOfLastSpareSpaceChunkInSUs; /* locates sp space
- * after partial ft */
- RF_StripeCount_t TotSparePUsPerDisk; /* total number of spare PUs
- * per disk */
- RF_StripeCount_t NumCompleteSRs;
- RF_SpareTableEntry_t **SpareTable; /* remap table for spare space */
- char sparemap_fname[RF_SPAREMAP_NAME_LEN]; /* where to find
- * sparemap. not used in
- * kernel */
-};
-
-int
-rf_ConfigureDeclustered(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr);
-int
-rf_ConfigureDeclusteredDS(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr);
-
-void
-rf_MapSectorDeclustered(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
- RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap);
-void
-rf_MapParityDeclustered(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
- RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap);
-void
-rf_IdentifyStripeDeclustered(RF_Raid_t * raidPtr, RF_RaidAddr_t addr,
- RF_RowCol_t ** diskids, RF_RowCol_t * outRow);
-void
-rf_MapSIDToPSIDDeclustered(RF_RaidLayout_t * layoutPtr,
- RF_StripeNum_t stripeID, RF_StripeNum_t * psID,
- RF_ReconUnitNum_t * which_ru);
-int rf_InstallSpareTable(RF_Raid_t * raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol);
-void rf_FreeSpareTable(RF_Raid_t * raidPtr);
-
-RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitDeclustered(RF_Raid_t * raidPtr);
-int rf_GetDefaultNumFloatingReconBuffersDeclustered(RF_Raid_t * raidPtr);
-
-void
-rf_decluster_adjust_params(RF_RaidLayout_t * layoutPtr,
- RF_StripeNum_t * SUID, RF_StripeCount_t * sus_per_fulltable,
- RF_StripeCount_t * fulltable_depth, RF_StripeNum_t * base_suid);
-void
-rf_remap_to_spare_space(
- RF_RaidLayout_t * layoutPtr,
- RF_DeclusteredConfigInfo_t * info, RF_RowCol_t row, RF_StripeNum_t FullTableID,
- RF_StripeNum_t TableID, RF_SectorNum_t BlockID, RF_StripeNum_t base_suid,
- RF_StripeNum_t SpareRegion, RF_RowCol_t * outCol, RF_StripeNum_t * outSU);
-int rf_SetSpareTable(RF_Raid_t * raidPtr, void *data);
-RF_ReconUnitCount_t rf_GetNumSpareRUsDeclustered(RF_Raid_t * raidPtr);
-
-#endif /* !_RF__RF_DECLUSTER_H_ */
diff --git a/sys/dev/raidframe/rf_declusterPQ.c b/sys/dev/raidframe/rf_declusterPQ.c
deleted file mode 100644
index dc539a3..0000000
--- a/sys/dev/raidframe/rf_declusterPQ.c
+++ /dev/null
@@ -1,493 +0,0 @@
-/* $NetBSD: rf_declusterPQ.c,v 1.5 2001/01/26 14:06:17 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Authors: Daniel Stodolsky, Mark Holland, Jim Zelenka
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*--------------------------------------------------
- * rf_declusterPQ.c
- *
- * mapping code for declustered P & Q or declustered EvenOdd
- * much code borrowed from rf_decluster.c
- *
- *--------------------------------------------------*/
-
-#include <dev/raidframe/rf_archs.h>
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_configure.h>
-#include <dev/raidframe/rf_decluster.h>
-#include <dev/raidframe/rf_declusterPQ.h>
-#include <dev/raidframe/rf_debugMem.h>
-#include <dev/raidframe/rf_utils.h>
-#include <dev/raidframe/rf_alloclist.h>
-#include <dev/raidframe/rf_general.h>
-
-#if (RF_INCLUDE_PARITY_DECLUSTERING_PQ > 0) || (RF_INCLUDE_EVENODD > 0)
-/* configuration code */
-
-int
-rf_ConfigureDeclusteredPQ(
- RF_ShutdownList_t ** listp,
- RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr)
-{
- RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
- int b, v, k, r, lambda; /* block design params */
- int i, j, l;
- int *first_avail_slot;
- int complete_FT_count, SUID;
- RF_DeclusteredConfigInfo_t *info;
- int numCompleteFullTablesPerDisk;
- int PUsPerDisk, spareRegionDepthInPUs, numCompleteSpareRegionsPerDisk = 0,
- extraPUsPerDisk;
- int totSparePUsPerDisk;
- int diskOffsetOfLastFullTableInSUs, SpareSpaceInSUs;
- char *cfgBuf = (char *) (cfgPtr->layoutSpecific);
-
- cfgBuf += RF_SPAREMAP_NAME_LEN;
-
- b = *((int *) cfgBuf);
- cfgBuf += sizeof(int);
- v = *((int *) cfgBuf);
- cfgBuf += sizeof(int);
- k = *((int *) cfgBuf);
- cfgBuf += sizeof(int);
- r = *((int *) cfgBuf);
- cfgBuf += sizeof(int);
- lambda = *((int *) cfgBuf);
- cfgBuf += sizeof(int);
- raidPtr->noRotate = *((int *) cfgBuf);
- cfgBuf += sizeof(int);
-
- if (k <= 2) {
- printf("RAIDFRAME: k=%d, minimum value 2\n", k);
- return (EINVAL);
- }
- /* 1. create layout specific structure */
- RF_MallocAndAdd(info, sizeof(RF_DeclusteredConfigInfo_t), (RF_DeclusteredConfigInfo_t *), raidPtr->cleanupList);
- if (info == NULL)
- return (ENOMEM);
- layoutPtr->layoutSpecificInfo = (void *) info;
-
- /* the sparemaps are generated assuming that parity is rotated, so we
- * issue a warning if both distributed sparing and no-rotate are on at
- * the same time */
- if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) && raidPtr->noRotate) {
- RF_ERRORMSG("Warning: distributed sparing specified without parity rotation.\n");
- }
- if (raidPtr->numCol != v) {
- RF_ERRORMSG2("RAID: config error: table element count (%d) not equal to no. of cols (%d)\n", v, raidPtr->numCol);
- return (EINVAL);
- }
- /* 3. set up the values used in devRaidMap */
- info->BlocksPerTable = b;
- info->NumParityReps = info->groupSize = k;
- info->PUsPerBlock = k - 2; /* PQ */
- info->SUsPerTable = b * info->PUsPerBlock * layoutPtr->SUsPerPU; /* b blks, k-1 SUs each */
- info->SUsPerFullTable = k * info->SUsPerTable; /* rot k times */
- info->SUsPerBlock = info->PUsPerBlock * layoutPtr->SUsPerPU;
- info->TableDepthInPUs = (b * k) / v;
- info->FullTableDepthInPUs = info->TableDepthInPUs * k; /* k repetitions */
-
- /* used only in distributed sparing case */
- info->FullTablesPerSpareRegion = (v - 1) / rf_gcd(r, v - 1); /* (v-1)/gcd fulltables */
- info->TablesPerSpareRegion = k * info->FullTablesPerSpareRegion;
- info->SpareSpaceDepthPerRegionInSUs = (r * info->TablesPerSpareRegion / (v - 1)) * layoutPtr->SUsPerPU;
-
- /* check to make sure the block design is sufficiently small */
- if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
- if (info->FullTableDepthInPUs * layoutPtr->SUsPerPU + info->SpareSpaceDepthPerRegionInSUs > layoutPtr->stripeUnitsPerDisk) {
- RF_ERRORMSG3("RAID: config error: Full Table depth (%d) + Spare Space (%d) larger than disk size (%d) (BD too big)\n",
- (int) info->FullTableDepthInPUs,
- (int) info->SpareSpaceDepthPerRegionInSUs,
- (int) layoutPtr->stripeUnitsPerDisk);
- return (EINVAL);
- }
- } else {
- if (info->TableDepthInPUs * layoutPtr->SUsPerPU > layoutPtr->stripeUnitsPerDisk) {
- RF_ERRORMSG2("RAID: config error: Table depth (%d) larger than disk size (%d) (BD too big)\n",
- (int) (info->TableDepthInPUs * layoutPtr->SUsPerPU),
- (int) layoutPtr->stripeUnitsPerDisk);
- return (EINVAL);
- }
- }
-
-
- /* compute the size of each disk, and the number of tables in the last
- * fulltable (which need not be complete) */
- if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
-
- PUsPerDisk = layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU;
- spareRegionDepthInPUs = (info->TablesPerSpareRegion * info->TableDepthInPUs +
- (info->TablesPerSpareRegion * info->TableDepthInPUs) / (v - 1));
- info->SpareRegionDepthInSUs = spareRegionDepthInPUs * layoutPtr->SUsPerPU;
-
- numCompleteSpareRegionsPerDisk = PUsPerDisk / spareRegionDepthInPUs;
- info->NumCompleteSRs = numCompleteSpareRegionsPerDisk;
- extraPUsPerDisk = PUsPerDisk % spareRegionDepthInPUs;
-
- /* assume conservatively that we need the full amount of spare
- * space in one region in order to provide spares for the
- * partial spare region at the end of the array. We set "i"
- * to the number of tables in the partial spare region. This
- * may actually include some fulltables. */
- extraPUsPerDisk -= (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU);
- if (extraPUsPerDisk <= 0)
- i = 0;
- else
- i = extraPUsPerDisk / info->TableDepthInPUs;
-
- complete_FT_count = raidPtr->numRow * (numCompleteSpareRegionsPerDisk * (info->TablesPerSpareRegion / k) + i / k);
- info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable;
- info->ExtraTablesPerDisk = i % k;
-
- /* note that in the last spare region, the spare space is
- * complete even though data/parity space is not */
- totSparePUsPerDisk = (numCompleteSpareRegionsPerDisk + 1) * (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU);
- info->TotSparePUsPerDisk = totSparePUsPerDisk;
-
- layoutPtr->stripeUnitsPerDisk =
- ((complete_FT_count / raidPtr->numRow) * info->FullTableDepthInPUs + /* data & parity space */
- info->ExtraTablesPerDisk * info->TableDepthInPUs +
- totSparePUsPerDisk /* spare space */
- ) * layoutPtr->SUsPerPU;
- layoutPtr->dataStripeUnitsPerDisk =
- (complete_FT_count * info->FullTableDepthInPUs + info->ExtraTablesPerDisk * info->TableDepthInPUs)
- * layoutPtr->SUsPerPU * (k - 1) / k;
-
- } else {
- /* non-dist spare case: force each disk to contain an
- * integral number of tables */
- layoutPtr->stripeUnitsPerDisk /= (info->TableDepthInPUs * layoutPtr->SUsPerPU);
- layoutPtr->stripeUnitsPerDisk *= (info->TableDepthInPUs * layoutPtr->SUsPerPU);
-
- /* compute the number of tables in the last fulltable, which
- * need not be complete */
- complete_FT_count =
- ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) / info->FullTableDepthInPUs) * raidPtr->numRow;
-
- info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable;
- info->ExtraTablesPerDisk =
- ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) / info->TableDepthInPUs) % k;
- }
-
- raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
-
- /* find the disk offset of the stripe unit where the last fulltable
- * starts */
- numCompleteFullTablesPerDisk = complete_FT_count / raidPtr->numRow;
- diskOffsetOfLastFullTableInSUs = numCompleteFullTablesPerDisk * info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
- if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
- SpareSpaceInSUs = numCompleteSpareRegionsPerDisk * info->SpareSpaceDepthPerRegionInSUs;
- diskOffsetOfLastFullTableInSUs += SpareSpaceInSUs;
- info->DiskOffsetOfLastSpareSpaceChunkInSUs =
- diskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU;
- }
- info->DiskOffsetOfLastFullTableInSUs = diskOffsetOfLastFullTableInSUs;
- info->numCompleteFullTablesPerDisk = numCompleteFullTablesPerDisk;
-
- /* 4. create and initialize the lookup tables */
- info->LayoutTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
- if (info->LayoutTable == NULL)
- return (ENOMEM);
- info->OffsetTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
- if (info->OffsetTable == NULL)
- return (ENOMEM);
- info->BlockTable = rf_make_2d_array(info->TableDepthInPUs * layoutPtr->SUsPerPU, raidPtr->numCol, raidPtr->cleanupList);
- if (info->BlockTable == NULL)
- return (ENOMEM);
-
- first_avail_slot = (int *) rf_make_1d_array(v, NULL);
- if (first_avail_slot == NULL)
- return (ENOMEM);
-
- for (i = 0; i < b; i++)
- for (j = 0; j < k; j++)
- info->LayoutTable[i][j] = *cfgBuf++;
-
- /* initialize offset table */
- for (i = 0; i < b; i++)
- for (j = 0; j < k; j++) {
- info->OffsetTable[i][j] = first_avail_slot[info->LayoutTable[i][j]];
- first_avail_slot[info->LayoutTable[i][j]]++;
- }
-
- /* initialize block table */
- for (SUID = l = 0; l < layoutPtr->SUsPerPU; l++) {
- for (i = 0; i < b; i++) {
- for (j = 0; j < k; j++) {
- info->BlockTable[(info->OffsetTable[i][j] * layoutPtr->SUsPerPU) + l]
- [info->LayoutTable[i][j]] = SUID;
- }
- SUID++;
- }
- }
-
- rf_free_1d_array(first_avail_slot, v);
-
- /* 5. set up the remaining redundant-but-useful parameters */
-
- raidPtr->totalSectors = (k * complete_FT_count + raidPtr->numRow * info->ExtraTablesPerDisk) *
- info->SUsPerTable * layoutPtr->sectorsPerStripeUnit;
- layoutPtr->numStripe = (raidPtr->totalSectors / layoutPtr->sectorsPerStripeUnit) / (k - 2);
-
- /* strange evaluation order below to try and minimize overflow
- * problems */
-
- layoutPtr->dataSectorsPerStripe = (k - 2) * layoutPtr->sectorsPerStripeUnit;
- layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
- layoutPtr->numDataCol = k - 2;
- layoutPtr->numParityCol = 2;
-
- return (0);
-}
-
-int
-rf_GetDefaultNumFloatingReconBuffersPQ(RF_Raid_t * raidPtr)
-{
- int def_decl;
-
- def_decl = rf_GetDefaultNumFloatingReconBuffersDeclustered(raidPtr);
- return (RF_MAX(3 * raidPtr->numCol, def_decl));
-}
-
-void
-rf_MapSectorDeclusteredPQ(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t raidSector,
- RF_RowCol_t * row,
- RF_RowCol_t * col,
- RF_SectorNum_t * diskSector,
- int remap)
-{
- RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
- RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
- RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
- RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
- RF_StripeNum_t BlockID, BlockOffset, RepIndex;
- RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
- RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
- RF_StripeNum_t base_suid = 0, outSU, SpareRegion = 0, SpareSpace = 0;
-
- rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
-
- FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array
- * (across rows) */
- *row = FullTableID % raidPtr->numRow;
- FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this
- * disk */
- if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
- SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
- SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
- }
- FullTableOffset = SUID % sus_per_fulltable;
- TableID = FullTableOffset / info->SUsPerTable;
- TableOffset = FullTableOffset - TableID * info->SUsPerTable;
- BlockID = TableOffset / info->PUsPerBlock;
- BlockOffset = TableOffset - BlockID * info->PUsPerBlock;
- BlockID %= info->BlocksPerTable;
- RF_ASSERT(BlockOffset < info->groupSize - 2);
- /*
- TableIDs go from 0 .. GroupSize-1 inclusive.
- PUsPerBlock is k-2.
- We want the tableIDs to rotate from the
- right, so use GroupSize
- */
- RepIndex = info->groupSize - 1 - TableID;
- RF_ASSERT(RepIndex >= 0);
- if (!raidPtr->noRotate) {
- if (TableID == 0)
- BlockOffset++; /* P on last drive, Q on first */
- else
- BlockOffset += ((BlockOffset >= RepIndex) ? 2 : 0); /* skip over PQ */
- RF_ASSERT(BlockOffset < info->groupSize);
- *col = info->LayoutTable[BlockID][BlockOffset];
- }
- /* remap to distributed spare space if indicated */
- if (remap) {
- rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU);
- } else {
-
- outSU = base_suid;
- outSU += FullTableID * fulltable_depth; /* offs to strt of FT */
- outSU += SpareSpace; /* skip rsvd spare space */
- outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; /* offs to strt of tble */
- outSU += info->OffsetTable[BlockID][BlockOffset] * layoutPtr->SUsPerPU; /* offs to the PU */
- }
- outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); /* offs to the SU within
- * a PU */
-
- /* convert SUs to sectors, and, if not aligned to SU boundary, add in
- * offset to sector */
- *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit);
-}
-
-
-void
-rf_MapParityDeclusteredPQ(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t raidSector,
- RF_RowCol_t * row,
- RF_RowCol_t * col,
- RF_SectorNum_t * diskSector,
- int remap)
-{
- RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
- RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
- RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
- RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
- RF_StripeNum_t BlockID, BlockOffset, RepIndex;
- RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
- RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
- RF_StripeNum_t base_suid = 0, outSU, SpareRegion, SpareSpace = 0;
-
- rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
-
- /* compute row & (possibly) spare space exactly as before */
- FullTableID = SUID / sus_per_fulltable;
- *row = FullTableID % raidPtr->numRow;
- FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this
- * disk */
- if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
- SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
- SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
- }
- /* compute BlockID and RepIndex exactly as before */
- FullTableOffset = SUID % sus_per_fulltable;
- TableID = FullTableOffset / info->SUsPerTable;
- TableOffset = FullTableOffset - TableID * info->SUsPerTable;
- BlockID = TableOffset / info->PUsPerBlock;
- BlockOffset = TableOffset - BlockID * info->PUsPerBlock;
- BlockID %= info->BlocksPerTable;
-
- /* the parity block is in the position indicated by RepIndex */
- RepIndex = (raidPtr->noRotate) ? info->PUsPerBlock : info->groupSize - 1 - TableID;
- *col = info->LayoutTable[BlockID][RepIndex];
-
- if (remap)
- RF_PANIC();
-
- /* compute sector as before, except use RepIndex instead of
- * BlockOffset */
- outSU = base_suid;
- outSU += FullTableID * fulltable_depth;
- outSU += SpareSpace; /* skip rsvd spare space */
- outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU;
- outSU += info->OffsetTable[BlockID][RepIndex] * layoutPtr->SUsPerPU;
- outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock);
-
- *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit);
-}
-
-void
-rf_MapQDeclusteredPQ(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t raidSector,
- RF_RowCol_t * row,
- RF_RowCol_t * col,
- RF_SectorNum_t * diskSector,
- int remap)
-{
- RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
- RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
- RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
- RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
- RF_StripeNum_t BlockID, BlockOffset, RepIndex, RepIndexQ;
- RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
- RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
- RF_StripeNum_t base_suid = 0, outSU, SpareRegion, SpareSpace = 0;
-
- rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
-
- /* compute row & (possibly) spare space exactly as before */
- FullTableID = SUID / sus_per_fulltable;
- *row = FullTableID % raidPtr->numRow;
- FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this
- * disk */
- if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
- SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
- SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
- }
- /* compute BlockID and RepIndex exactly as before */
- FullTableOffset = SUID % sus_per_fulltable;
- TableID = FullTableOffset / info->SUsPerTable;
- TableOffset = FullTableOffset - TableID * info->SUsPerTable;
- BlockID = TableOffset / info->PUsPerBlock;
- BlockOffset = TableOffset - BlockID * info->PUsPerBlock;
- BlockID %= info->BlocksPerTable;
-
- /* the q block is in the position indicated by RepIndex */
- RepIndex = (raidPtr->noRotate) ? info->PUsPerBlock : info->groupSize - 1 - TableID;
- RepIndexQ = ((RepIndex == (info->groupSize - 1)) ? 0 : RepIndex + 1);
- *col = info->LayoutTable[BlockID][RepIndexQ];
-
- if (remap)
- RF_PANIC();
-
- /* compute sector as before, except use RepIndex instead of
- * BlockOffset */
- outSU = base_suid;
- outSU += FullTableID * fulltable_depth;
- outSU += SpareSpace; /* skip rsvd spare space */
- outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU;
- outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock);
-
- outSU += info->OffsetTable[BlockID][RepIndexQ] * layoutPtr->SUsPerPU;
- *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit);
-}
-/* returns an array of ints identifying the disks that comprise the stripe containing the indicated address.
- * the caller must _never_ attempt to modify this array.
- */
-void
-rf_IdentifyStripeDeclusteredPQ(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t addr,
- RF_RowCol_t ** diskids,
- RF_RowCol_t * outRow)
-{
- RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
- RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
- RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
- RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
- RF_StripeNum_t base_suid = 0;
- RF_StripeNum_t SUID = rf_RaidAddressToStripeUnitID(layoutPtr, addr);
- RF_StripeNum_t stripeID, FullTableID;
- int tableOffset;
-
- rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
- FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array
- * (across rows) */
- *outRow = FullTableID % raidPtr->numRow;
- stripeID = rf_StripeUnitIDToStripeID(layoutPtr, SUID); /* find stripe offset
- * into array */
- tableOffset = (stripeID % info->BlocksPerTable); /* find offset into
- * block design table */
- *diskids = info->LayoutTable[tableOffset];
-}
-#endif /* (RF_INCLUDE_PARITY_DECLUSTERING_PQ > 0) || (RF_INCLUDE_EVENODD > 0) */
diff --git a/sys/dev/raidframe/rf_declusterPQ.h b/sys/dev/raidframe/rf_declusterPQ.h
deleted file mode 100644
index 6edef0b..0000000
--- a/sys/dev/raidframe/rf_declusterPQ.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_declusterPQ.h,v 1.3 1999/02/05 00:06:09 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Daniel Stodolsky, Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#ifndef _RF__RF_DECLUSTERPQ_H_
-#define _RF__RF_DECLUSTERPQ_H_
-
-#include <dev/raidframe/rf_types.h>
-
-int
-rf_ConfigureDeclusteredPQ(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr);
-int rf_GetDefaultNumFloatingReconBuffersPQ(RF_Raid_t * raidPtr);
-void
-rf_MapSectorDeclusteredPQ(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
- RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap);
-void
-rf_MapParityDeclusteredPQ(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
- RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap);
-void
-rf_MapQDeclusteredPQ(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
- RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap);
-void
-rf_IdentifyStripeDeclusteredPQ(RF_Raid_t * raidPtr, RF_RaidAddr_t addr,
- RF_RowCol_t ** diskids, RF_RowCol_t * outRow);
-
-#endif /* !_RF__RF_DECLUSTERPQ_H_ */
diff --git a/sys/dev/raidframe/rf_desc.h b/sys/dev/raidframe/rf_desc.h
deleted file mode 100644
index 8a6951b..0000000
--- a/sys/dev/raidframe/rf_desc.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_desc.h,v 1.5 2000/01/09 00:00:18 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#ifndef _RF__RF_DESC_H_
-#define _RF__RF_DESC_H_
-
-#include <dev/raidframe/rf_archs.h>
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_etimer.h>
-#include <dev/raidframe/rf_dag.h>
-
-struct RF_RaidReconDesc_s {
- RF_Raid_t *raidPtr; /* raid device descriptor */
- RF_RowCol_t row; /* row of failed disk */
- RF_RowCol_t col; /* col of failed disk */
- int state; /* how far along the reconstruction operation
- * has gotten */
- RF_RaidDisk_t *spareDiskPtr; /* describes target disk for recon
- * (not used in dist sparing) */
- int numDisksDone; /* the number of surviving disks that have
- * completed their work */
- RF_RowCol_t srow; /* row ID of the spare disk (not used in dist
- * sparing) */
- RF_RowCol_t scol; /* col ID of the spare disk (not used in dist
- * sparing) */
- /*
- * Prevent recon from hogging CPU
- */
- RF_Etimer_t recon_exec_timer;
- RF_uint64 reconExecTimerRunning;
- RF_uint64 reconExecTicks;
- RF_uint64 maxReconExecTicks;
-
-#if RF_RECON_STATS > 0
- RF_uint64 hsStallCount; /* head sep stall count */
- RF_uint64 numReconExecDelays;
- RF_uint64 numReconEventWaits;
-#endif /* RF_RECON_STATS > 0 */
- RF_RaidReconDesc_t *next;
-};
-
-struct RF_RaidAccessDesc_s {
- RF_Raid_t *raidPtr; /* raid device descriptor */
- RF_IoType_t type; /* read or write */
- RF_RaidAddr_t raidAddress; /* starting address in raid address
- * space */
- RF_SectorCount_t numBlocks; /* number of blocks (sectors) to
- * transfer */
- RF_StripeCount_t numStripes; /* number of stripes involved in
- * access */
- caddr_t bufPtr; /* pointer to data buffer */
- RF_RaidAccessFlags_t flags; /* flags controlling operation */
- int state; /* index into states telling how far along the
- * RAID operation has gotten */
- RF_AccessState_t *states; /* array of states to be run */
- int status; /* pass/fail status of the last operation */
- RF_DagList_t *dagArray; /* array of dag lists, one list per stripe */
- RF_AccessStripeMapHeader_t *asmap; /* the asm for this I/O */
- void *bp; /* buf pointer for this RAID acc. ignored
- * outside the kernel */
- RF_DagHeader_t **paramDAG; /* allows the DAG to be returned to
- * the caller after I/O completion */
- RF_AccessStripeMapHeader_t **paramASM; /* allows the ASM to be
- * returned to the caller
- * after I/O completion */
- RF_AccTraceEntry_t tracerec; /* perf monitoring information for a
- * user access (not for dag stats) */
- void (*callbackFunc) (RF_CBParam_t); /* callback function for this
- * I/O */
- void *callbackArg; /* arg to give to callback func */
-
- RF_AllocListElem_t *cleanupList; /* memory to be freed at the
- * end of the access */
-
- RF_RaidAccessDesc_t *next;
- RF_RaidAccessDesc_t *head;
-
- int numPending;
-
- RF_DECLARE_MUTEX(mutex) /* these are used to implement
- * blocking I/O */
- RF_DECLARE_COND(cond)
- int async_flag;
-
- RF_Etimer_t timer; /* used for timing this access */
-};
-#endif /* !_RF__RF_DESC_H_ */
diff --git a/sys/dev/raidframe/rf_diskqueue.c b/sys/dev/raidframe/rf_diskqueue.c
deleted file mode 100644
index c03e6cd..0000000
--- a/sys/dev/raidframe/rf_diskqueue.c
+++ /dev/null
@@ -1,593 +0,0 @@
-/* $NetBSD: rf_diskqueue.c,v 1.13 2000/03/04 04:22:34 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/****************************************************************************
- *
- * rf_diskqueue.c -- higher-level disk queue code
- *
- * the routines here are a generic wrapper around the actual queueing
- * routines. The code here implements thread scheduling, synchronization,
- * and locking ops (see below) on top of the lower-level queueing code.
- *
- * to support atomic RMW, we implement "locking operations". When a
- * locking op is dispatched to the lower levels of the driver, the
- * queue is locked, and no further I/Os are dispatched until the queue
- * receives & completes a corresponding "unlocking operation". This
- * code relies on the higher layers to guarantee that a locking op
- * will always be eventually followed by an unlocking op. The model
- * is that the higher layers are structured so locking and unlocking
- * ops occur in pairs, i.e. an unlocking op cannot be generated until
- * after a locking op reports completion. There is no good way to
- * check to see that an unlocking op "corresponds" to the op that
- * currently has the queue locked, so we make no such attempt. Since
- * by definition there can be only one locking op outstanding on a
- * disk, this should not be a problem.
- *
- * In the kernel, we allow multiple I/Os to be concurrently dispatched
- * to the disk driver. In order to support locking ops in this
- * environment, when we decide to do a locking op, we stop dispatching
- * new I/Os and wait until all dispatched I/Os have completed before
- * dispatching the locking op.
- *
- * Unfortunately, the code is different in the 3 different operating
- * states (user level, kernel, simulator). In the kernel, I/O is
- * non-blocking, and we have no disk threads to dispatch for us.
- * Therefore, we have to dispatch new I/Os to the scsi driver at the
- * time of enqueue, and also at the time of completion. At user
- * level, I/O is blocking, and so only the disk threads may dispatch
- * I/Os. Thus at user level, all we can do at enqueue time is enqueue
- * and wake up the disk thread to do the dispatch.
- *
- ****************************************************************************/
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_threadstuff.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_diskqueue.h>
-#include <dev/raidframe/rf_alloclist.h>
-#include <dev/raidframe/rf_acctrace.h>
-#include <dev/raidframe/rf_etimer.h>
-#include <dev/raidframe/rf_configure.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_freelist.h>
-#include <dev/raidframe/rf_debugprint.h>
-#include <dev/raidframe/rf_shutdown.h>
-#include <dev/raidframe/rf_cvscan.h>
-#include <dev/raidframe/rf_sstf.h>
-#include <dev/raidframe/rf_fifo.h>
-#include <dev/raidframe/rf_kintf.h>
-
-static int init_dqd(RF_DiskQueueData_t *);
-static void clean_dqd(RF_DiskQueueData_t *);
-static void rf_ShutdownDiskQueueSystem(void *);
-
-#define Dprintf1(s,a) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL)
-#define Dprintf2(s,a,b) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL)
-#define Dprintf3(s,a,b,c) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL)
-
-/*****************************************************************************
- *
- * the disk queue switch defines all the functions used in the
- * different queueing disciplines queue ID, init routine, enqueue
- * routine, dequeue routine
- *
- ****************************************************************************/
-
-static RF_DiskQueueSW_t diskqueuesw[] = {
- {"fifo", /* FIFO */
- rf_FifoCreate,
- rf_FifoEnqueue,
- rf_FifoDequeue,
- rf_FifoPeek,
- rf_FifoPromote},
-
- {"cvscan", /* cvscan */
- rf_CvscanCreate,
- rf_CvscanEnqueue,
- rf_CvscanDequeue,
- rf_CvscanPeek,
- rf_CvscanPromote},
-
- {"sstf", /* shortest seek time first */
- rf_SstfCreate,
- rf_SstfEnqueue,
- rf_SstfDequeue,
- rf_SstfPeek,
- rf_SstfPromote},
-
- {"scan", /* SCAN (two-way elevator) */
- rf_ScanCreate,
- rf_SstfEnqueue,
- rf_ScanDequeue,
- rf_ScanPeek,
- rf_SstfPromote},
-
- {"cscan", /* CSCAN (one-way elevator) */
- rf_CscanCreate,
- rf_SstfEnqueue,
- rf_CscanDequeue,
- rf_CscanPeek,
- rf_SstfPromote},
-
-};
-#define NUM_DISK_QUEUE_TYPES (sizeof(diskqueuesw)/sizeof(RF_DiskQueueSW_t))
-
-static RF_FreeList_t *rf_dqd_freelist;
-
-#define RF_MAX_FREE_DQD 256
-#define RF_DQD_INC 16
-#define RF_DQD_INITIAL 64
-
-#if defined(__FreeBSD__) && __FreeBSD_version > 500005
-#include <sys/bio.h>
-#endif
-
-#include <sys/buf.h>
-
-static int
-init_dqd(dqd)
- RF_DiskQueueData_t *dqd;
-{
-
- dqd->bp = (RF_Buf_t) malloc(sizeof(*dqd->bp), M_RAIDFRAME, M_NOWAIT);
- if (dqd->bp == NULL) {
- return (ENOMEM);
- }
- memset(dqd->bp, 0, sizeof(*dqd->bp)); /* if you don't do it, nobody
- * else will.. */
- return (0);
-}
-
-static void
-clean_dqd(dqd)
- RF_DiskQueueData_t *dqd;
-{
- free(dqd->bp, M_RAIDFRAME);
-}
-/* configures a single disk queue */
-
-int
-rf_ConfigureDiskQueue(
- RF_Raid_t * raidPtr,
- RF_DiskQueue_t * diskqueue,
- RF_RowCol_t r, /* row & col -- debug only. BZZT not any
- * more... */
- RF_RowCol_t c,
- RF_DiskQueueSW_t * p,
- RF_SectorCount_t sectPerDisk,
- dev_t dev,
- int maxOutstanding,
- RF_ShutdownList_t ** listp,
- RF_AllocListElem_t * clList)
-{
- int rc;
-
- diskqueue->row = r;
- diskqueue->col = c;
- diskqueue->qPtr = p;
- diskqueue->qHdr = (p->Create) (sectPerDisk, clList, listp);
- diskqueue->dev = dev;
- diskqueue->numOutstanding = 0;
- diskqueue->queueLength = 0;
- diskqueue->maxOutstanding = maxOutstanding;
- diskqueue->curPriority = RF_IO_NORMAL_PRIORITY;
- diskqueue->nextLockingOp = NULL;
- diskqueue->unlockingOp = NULL;
- diskqueue->numWaiting = 0;
- diskqueue->flags = 0;
- diskqueue->raidPtr = raidPtr;
- diskqueue->rf_cinfo = &raidPtr->raid_cinfo[r][c];
- rc = rf_create_managed_mutex(listp, &diskqueue->mutex);
- if (rc) {
- RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- return (rc);
- }
- rc = rf_create_managed_cond(listp, &diskqueue->cond);
- if (rc) {
- RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- return (rc);
- }
- return (0);
-}
-
-static void
-rf_ShutdownDiskQueueSystem(ignored)
- void *ignored;
-{
- RF_FREELIST_DESTROY_CLEAN(rf_dqd_freelist, next, (RF_DiskQueueData_t *), clean_dqd);
-}
-
-int
-rf_ConfigureDiskQueueSystem(listp)
- RF_ShutdownList_t **listp;
-{
- int rc;
-
- RF_FREELIST_CREATE(rf_dqd_freelist, RF_MAX_FREE_DQD,
- RF_DQD_INC, sizeof(RF_DiskQueueData_t));
- if (rf_dqd_freelist == NULL)
- return (ENOMEM);
- rc = rf_ShutdownCreate(listp, rf_ShutdownDiskQueueSystem, NULL);
- if (rc) {
- RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n",
- __FILE__, __LINE__, rc);
- rf_ShutdownDiskQueueSystem(NULL);
- return (rc);
- }
- RF_FREELIST_PRIME_INIT(rf_dqd_freelist, RF_DQD_INITIAL, next,
- (RF_DiskQueueData_t *), init_dqd);
- return (0);
-}
-
-int
-rf_ConfigureDiskQueues(
- RF_ShutdownList_t ** listp,
- RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr)
-{
- RF_DiskQueue_t **diskQueues, *spareQueues;
- RF_DiskQueueSW_t *p;
- RF_RowCol_t r, c;
- int rc, i;
-
- raidPtr->maxQueueDepth = cfgPtr->maxOutstandingDiskReqs;
-
- for (p = NULL, i = 0; i < NUM_DISK_QUEUE_TYPES; i++) {
- if (!strcmp(diskqueuesw[i].queueType, cfgPtr->diskQueueType)) {
- p = &diskqueuesw[i];
- break;
- }
- }
- if (p == NULL) {
- RF_ERRORMSG2("Unknown queue type \"%s\". Using %s\n", cfgPtr->diskQueueType, diskqueuesw[0].queueType);
- p = &diskqueuesw[0];
- }
- raidPtr->qType = p;
- RF_CallocAndAdd(diskQueues, raidPtr->numRow, sizeof(RF_DiskQueue_t *), (RF_DiskQueue_t **), raidPtr->cleanupList);
- if (diskQueues == NULL) {
- return (ENOMEM);
- }
- raidPtr->Queues = diskQueues;
- for (r = 0; r < raidPtr->numRow; r++) {
- RF_CallocAndAdd(diskQueues[r], raidPtr->numCol +
- ((r == 0) ? RF_MAXSPARE : 0),
- sizeof(RF_DiskQueue_t), (RF_DiskQueue_t *),
- raidPtr->cleanupList);
- if (diskQueues[r] == NULL)
- return (ENOMEM);
- for (c = 0; c < raidPtr->numCol; c++) {
- rc = rf_ConfigureDiskQueue(raidPtr, &diskQueues[r][c],
- r, c, p,
- raidPtr->sectorsPerDisk,
- raidPtr->Disks[r][c].dev,
- cfgPtr->maxOutstandingDiskReqs,
- listp, raidPtr->cleanupList);
- if (rc)
- return (rc);
- }
- }
-
- spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
- for (r = 0; r < raidPtr->numSpare; r++) {
- rc = rf_ConfigureDiskQueue(raidPtr, &spareQueues[r],
- 0, raidPtr->numCol + r, p,
- raidPtr->sectorsPerDisk,
- raidPtr->Disks[0][raidPtr->numCol + r].dev,
- cfgPtr->maxOutstandingDiskReqs, listp,
- raidPtr->cleanupList);
- if (rc)
- return (rc);
- }
- return (0);
-}
-/* Enqueue a disk I/O
- *
- * Unfortunately, we have to do things differently in the different
- * environments (simulator, user-level, kernel).
- * At user level, all I/O is blocking, so we have 1 or more threads/disk
- * and the thread that enqueues is different from the thread that dequeues.
- * In the kernel, I/O is non-blocking and so we'd like to have multiple
- * I/Os outstanding on the physical disks when possible.
- *
- * when any request arrives at a queue, we have two choices:
- * dispatch it to the lower levels
- * queue it up
- *
- * kernel rules for when to do what:
- * locking request: queue empty => dispatch and lock queue,
- * else queue it
- * unlocking req : always dispatch it
- * normal req : queue empty => dispatch it & set priority
- * queue not full & priority is ok => dispatch it
- * else queue it
- *
- * user-level rules:
- * always enqueue. In the special case of an unlocking op, enqueue
- * in a special way that will cause the unlocking op to be the next
- * thing dequeued.
- *
- * simulator rules:
- * Do the same as at user level, with the sleeps and wakeups suppressed.
- */
-void
-rf_DiskIOEnqueue(queue, req, pri)
- RF_DiskQueue_t *queue;
- RF_DiskQueueData_t *req;
- int pri;
-{
- RF_ETIMER_START(req->qtime);
- RF_ASSERT(req->type == RF_IO_TYPE_NOP || req->numSector);
- req->priority = pri;
-
- if (rf_queueDebug && (req->numSector == 0)) {
- printf("Warning: Enqueueing zero-sector access\n");
- }
- /*
- * kernel
- */
- RF_LOCK_QUEUE_MUTEX(queue, "DiskIOEnqueue");
- /* locking request */
- if (RF_LOCKING_REQ(req)) {
- if (RF_QUEUE_EMPTY(queue)) {
- Dprintf3("Dispatching pri %d locking op to r %d c %d (queue empty)\n", pri, queue->row, queue->col);
- RF_LOCK_QUEUE(queue);
- rf_DispatchKernelIO(queue, req);
- } else {
- queue->queueLength++; /* increment count of number
- * of requests waiting in this
- * queue */
- Dprintf3("Enqueueing pri %d locking op to r %d c %d (queue not empty)\n", pri, queue->row, queue->col);
- req->queue = (void *) queue;
- (queue->qPtr->Enqueue) (queue->qHdr, req, pri);
- }
- }
- /* unlocking request */
- else
- if (RF_UNLOCKING_REQ(req)) { /* we'll do the actual unlock
- * when this I/O completes */
- Dprintf3("Dispatching pri %d unlocking op to r %d c %d\n", pri, queue->row, queue->col);
- RF_ASSERT(RF_QUEUE_LOCKED(queue));
- rf_DispatchKernelIO(queue, req);
- }
- /* normal request */
- else
- if (RF_OK_TO_DISPATCH(queue, req)) {
- Dprintf3("Dispatching pri %d regular op to r %d c %d (ok to dispatch)\n", pri, queue->row, queue->col);
- rf_DispatchKernelIO(queue, req);
- } else {
- queue->queueLength++; /* increment count of
- * number of requests
- * waiting in this queue */
- Dprintf3("Enqueueing pri %d regular op to r %d c %d (not ok to dispatch)\n", pri, queue->row, queue->col);
- req->queue = (void *) queue;
- (queue->qPtr->Enqueue) (queue->qHdr, req, pri);
- }
- RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOEnqueue");
-}
-
-
-/* get the next set of I/Os started, kernel version only */
-void
-rf_DiskIOComplete(queue, req, status)
- RF_DiskQueue_t *queue;
- RF_DiskQueueData_t *req;
- int status;
-{
- int done = 0;
-
- RF_LOCK_QUEUE_MUTEX(queue, "DiskIOComplete");
-
- /* unlock the queue: (1) after an unlocking req completes (2) after a
- * locking req fails */
- if (RF_UNLOCKING_REQ(req) || (RF_LOCKING_REQ(req) && status)) {
- Dprintf2("DiskIOComplete: unlocking queue at r %d c %d\n", queue->row, queue->col);
- RF_ASSERT(RF_QUEUE_LOCKED(queue) && (queue->unlockingOp == NULL));
- RF_UNLOCK_QUEUE(queue);
- }
- queue->numOutstanding--;
- RF_ASSERT(queue->numOutstanding >= 0);
-
- /* dispatch requests to the disk until we find one that we can't. */
- /* no reason to continue once we've filled up the queue */
- /* no reason to even start if the queue is locked */
-
- while (!done && !RF_QUEUE_FULL(queue) && !RF_QUEUE_LOCKED(queue)) {
- if (queue->nextLockingOp) {
- req = queue->nextLockingOp;
- queue->nextLockingOp = NULL;
- Dprintf3("DiskIOComplete: a pri %d locking req was pending at r %d c %d\n", req->priority, queue->row, queue->col);
- } else {
- req = (queue->qPtr->Dequeue) (queue->qHdr);
- if (req != NULL) {
- Dprintf3("DiskIOComplete: extracting pri %d req from queue at r %d c %d\n", req->priority, queue->row, queue->col);
- } else {
- Dprintf1("DiskIOComplete: no more requests to extract.\n", "");
- }
- }
- if (req) {
- queue->queueLength--; /* decrement count of number
- * of requests waiting in this
- * queue */
- RF_ASSERT(queue->queueLength >= 0);
- }
- if (!req)
- done = 1;
- else
- if (RF_LOCKING_REQ(req)) {
- if (RF_QUEUE_EMPTY(queue)) { /* dispatch it */
- Dprintf3("DiskIOComplete: dispatching pri %d locking req to r %d c %d (queue empty)\n", req->priority, queue->row, queue->col);
- RF_LOCK_QUEUE(queue);
- rf_DispatchKernelIO(queue, req);
- done = 1;
- } else { /* put it aside to wait for
- * the queue to drain */
- Dprintf3("DiskIOComplete: postponing pri %d locking req to r %d c %d\n", req->priority, queue->row, queue->col);
- RF_ASSERT(queue->nextLockingOp == NULL);
- queue->nextLockingOp = req;
- done = 1;
- }
- } else
- if (RF_UNLOCKING_REQ(req)) { /* should not happen:
- * unlocking ops should
- * not get queued */
- RF_ASSERT(RF_QUEUE_LOCKED(queue)); /* support it anyway for
- * the future */
- Dprintf3("DiskIOComplete: dispatching pri %d unl req to r %d c %d (SHOULD NOT SEE THIS)\n", req->priority, queue->row, queue->col);
- rf_DispatchKernelIO(queue, req);
- done = 1;
- } else
- if (RF_OK_TO_DISPATCH(queue, req)) {
- Dprintf3("DiskIOComplete: dispatching pri %d regular req to r %d c %d (ok to dispatch)\n", req->priority, queue->row, queue->col);
- rf_DispatchKernelIO(queue, req);
- } else { /* we can't dispatch it,
- * so just re-enqueue
- * it. */
- /* potential trouble here if
- * disk queues batch reqs */
- Dprintf3("DiskIOComplete: re-enqueueing pri %d regular req to r %d c %d\n", req->priority, queue->row, queue->col);
- queue->queueLength++;
- (queue->qPtr->Enqueue) (queue->qHdr, req, req->priority);
- done = 1;
- }
- }
-
- RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOComplete");
-}
-/* promotes accesses tagged with the given parityStripeID from low priority
- * to normal priority. This promotion is optional, meaning that a queue
- * need not implement it. If there is no promotion routine associated with
- * a queue, this routine does nothing and returns -1.
- */
-int
-rf_DiskIOPromote(queue, parityStripeID, which_ru)
- RF_DiskQueue_t *queue;
- RF_StripeNum_t parityStripeID;
- RF_ReconUnitNum_t which_ru;
-{
- int retval;
-
- if (!queue->qPtr->Promote)
- return (-1);
- RF_LOCK_QUEUE_MUTEX(queue, "DiskIOPromote");
- retval = (queue->qPtr->Promote) (queue->qHdr, parityStripeID, which_ru);
- RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOPromote");
- return (retval);
-}
-
-RF_DiskQueueData_t *
-rf_CreateDiskQueueData(
- RF_IoType_t typ,
- RF_SectorNum_t ssect,
- RF_SectorCount_t nsect,
- caddr_t buf,
- RF_StripeNum_t parityStripeID,
- RF_ReconUnitNum_t which_ru,
- int (*wakeF) (void *, int),
- void *arg,
- RF_DiskQueueData_t * next,
- RF_AccTraceEntry_t * tracerec,
- void *raidPtr,
- RF_DiskQueueDataFlags_t flags,
- void *kb_proc)
-{
- RF_DiskQueueData_t *p;
-
- RF_FREELIST_GET_INIT(rf_dqd_freelist, p, next, (RF_DiskQueueData_t *), init_dqd);
-
- p->sectorOffset = ssect + rf_protectedSectors;
- p->numSector = nsect;
- p->type = typ;
- p->buf = buf;
- p->parityStripeID = parityStripeID;
- p->which_ru = which_ru;
- p->CompleteFunc = wakeF;
- p->argument = arg;
- p->next = next;
- p->tracerec = tracerec;
- p->priority = RF_IO_NORMAL_PRIORITY;
- p->AuxFunc = NULL;
- p->buf2 = NULL;
- p->raidPtr = raidPtr;
- p->flags = flags;
- p->b_proc = kb_proc;
- return (p);
-}
-
-RF_DiskQueueData_t *
-rf_CreateDiskQueueDataFull(
- RF_IoType_t typ,
- RF_SectorNum_t ssect,
- RF_SectorCount_t nsect,
- caddr_t buf,
- RF_StripeNum_t parityStripeID,
- RF_ReconUnitNum_t which_ru,
- int (*wakeF) (void *, int),
- void *arg,
- RF_DiskQueueData_t * next,
- RF_AccTraceEntry_t * tracerec,
- int priority,
- int (*AuxFunc) (void *,...),
- caddr_t buf2,
- void *raidPtr,
- RF_DiskQueueDataFlags_t flags,
- void *kb_proc)
-{
- RF_DiskQueueData_t *p;
-
- RF_FREELIST_GET_INIT(rf_dqd_freelist, p, next, (RF_DiskQueueData_t *), init_dqd);
-
- p->sectorOffset = ssect + rf_protectedSectors;
- p->numSector = nsect;
- p->type = typ;
- p->buf = buf;
- p->parityStripeID = parityStripeID;
- p->which_ru = which_ru;
- p->CompleteFunc = wakeF;
- p->argument = arg;
- p->next = next;
- p->tracerec = tracerec;
- p->priority = priority;
- p->AuxFunc = AuxFunc;
- p->buf2 = buf2;
- p->raidPtr = raidPtr;
- p->flags = flags;
- p->b_proc = kb_proc;
- return (p);
-}
-
-void
-rf_FreeDiskQueueData(p)
- RF_DiskQueueData_t *p;
-{
- RF_FREELIST_FREE_CLEAN(rf_dqd_freelist, p, next, clean_dqd);
-}
diff --git a/sys/dev/raidframe/rf_diskqueue.h b/sys/dev/raidframe/rf_diskqueue.h
deleted file mode 100644
index 7b162b0..0000000
--- a/sys/dev/raidframe/rf_diskqueue.h
+++ /dev/null
@@ -1,208 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_diskqueue.h,v 1.5 2000/02/13 04:53:57 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*****************************************************************************************
- *
- * rf_diskqueue.h -- header file for disk queues
- *
- * see comments in rf_diskqueue.c
- *
- ****************************************************************************************/
-
-
-#ifndef _RF__RF_DISKQUEUE_H_
-#define _RF__RF_DISKQUEUE_H_
-
-#include <dev/raidframe/rf_threadstuff.h>
-#include <dev/raidframe/rf_acctrace.h>
-#include <dev/raidframe/rf_alloclist.h>
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_etimer.h>
-
-#include <dev/raidframe/rf_bsd.h>
-
-#define RF_IO_NORMAL_PRIORITY 1
-#define RF_IO_LOW_PRIORITY 0
-
-/* the data held by a disk queue entry */
-struct RF_DiskQueueData_s {
- RF_SectorNum_t sectorOffset; /* sector offset into the disk */
- RF_SectorCount_t numSector; /* number of sectors to read/write */
- RF_IoType_t type; /* read/write/nop */
- caddr_t buf; /* buffer pointer */
- RF_StripeNum_t parityStripeID; /* the RAID parity stripe ID this
- * access is for */
- RF_ReconUnitNum_t which_ru; /* which RU within this parity stripe */
- int priority; /* the priority of this request */
- int (*CompleteFunc) (void *, int); /* function to be called upon
- * completion */
- int (*AuxFunc) (void *,...); /* function called upon
- * completion of the first I/O
- * of a Read_Op_Write pair */
- void *argument; /* argument to be passed to CompleteFunc */
- RF_Raid_t *raidPtr; /* needed for simulation */
- RF_AccTraceEntry_t *tracerec; /* perf mon only */
- RF_Etimer_t qtime; /* perf mon only - time request is in queue */
- long entryTime;
- RF_DiskQueueData_t *next;
- RF_DiskQueueData_t *prev;
- caddr_t buf2; /* for read-op-write */
- dev_t dev; /* the device number for in-kernel version */
- RF_DiskQueue_t *queue; /* the disk queue to which this req is
- * targeted */
- RF_DiskQueueDataFlags_t flags; /* flags controlling operation */
-
- struct proc *b_proc; /* the b_proc from the original bp passed into
- * the driver for this I/O */
- /* XXX Should this be changed to the opaque
- * RF_Thread_t ? */
- RF_Buf_t bp; /* a bp to use to get this I/O done */
-};
-#define RF_LOCK_DISK_QUEUE 0x01
-#define RF_UNLOCK_DISK_QUEUE 0x02
-
-/* note: "Create" returns type-specific queue header pointer cast to (void *) */
-struct RF_DiskQueueSW_s {
- RF_DiskQueueType_t queueType;
- void *(*Create) (RF_SectorCount_t, RF_AllocListElem_t *, RF_ShutdownList_t **); /* creation routine --
- * one call per queue in
- * system */
- void (*Enqueue) (void *, RF_DiskQueueData_t *, int); /* enqueue routine */
- RF_DiskQueueData_t *(*Dequeue) (void *); /* dequeue routine */
- RF_DiskQueueData_t *(*Peek) (void *); /* peek at head of queue */
-
- /* the rest are optional: they improve performance, but the driver
- * will deal with it if they don't exist */
- int (*Promote) (void *, RF_StripeNum_t, RF_ReconUnitNum_t); /* promotes priority of
- * tagged accesses */
-};
-
-struct RF_DiskQueue_s {
- RF_DiskQueueSW_t *qPtr; /* access point to queue functions */
- void *qHdr; /* queue header, of whatever type */
- RF_DECLARE_MUTEX(mutex) /* mutex locking data structures */
- RF_DECLARE_COND(cond) /* condition variable for
- * synchronization */
- long numOutstanding; /* number of I/Os currently outstanding on
- * disk */
- long maxOutstanding; /* max # of I/Os that can be outstanding on a
- * disk (in-kernel only) */
- int curPriority; /* the priority of accs all that are currently
- * outstanding */
- long queueLength; /* number of requests in queue */
- RF_DiskQueueData_t *nextLockingOp; /* a locking op that has
- * arrived at the head of the
- * queue & is waiting for
- * drainage */
- RF_DiskQueueData_t *unlockingOp; /* used at user level to
- * communicate unlocking op
- * b/w user (or dag exec) &
- * disk threads */
- int numWaiting; /* number of threads waiting on this variable.
- * user-level only */
- RF_DiskQueueFlags_t flags; /* terminate, locked */
- RF_Raid_t *raidPtr; /* associated array */
- dev_t dev; /* device number for kernel version */
- RF_SectorNum_t last_deq_sector; /* last sector number dequeued or
- * dispatched */
- int row, col; /* debug only */
- struct raidcinfo *rf_cinfo; /* disks component info.. */
-};
-#define RF_DQ_LOCKED 0x02 /* no new accs allowed until queue is
- * explicitly unlocked */
-
-/* macros setting & returning information about queues and requests */
-#define RF_QUEUE_LOCKED(_q) ((_q)->flags & RF_DQ_LOCKED)
-#define RF_QUEUE_EMPTY(_q) (((_q)->numOutstanding == 0) && ((_q)->nextLockingOp == NULL) && !RF_QUEUE_LOCKED(_q))
-#define RF_QUEUE_FULL(_q) ((_q)->numOutstanding == (_q)->maxOutstanding)
-
-#define RF_LOCK_QUEUE(_q) (_q)->flags |= RF_DQ_LOCKED
-#define RF_UNLOCK_QUEUE(_q) (_q)->flags &= ~RF_DQ_LOCKED
-
-#define RF_LOCK_QUEUE_MUTEX(_q_,_wh_) RF_LOCK_MUTEX((_q_)->mutex)
-#define RF_UNLOCK_QUEUE_MUTEX(_q_,_wh_) RF_UNLOCK_MUTEX((_q_)->mutex)
-
-#define RF_LOCKING_REQ(_r) ((_r)->flags & RF_LOCK_DISK_QUEUE)
-#define RF_UNLOCKING_REQ(_r) ((_r)->flags & RF_UNLOCK_DISK_QUEUE)
-
-/* whether it is ok to dispatch a regular request */
-#define RF_OK_TO_DISPATCH(_q_,_r_) \
- (RF_QUEUE_EMPTY(_q_) || \
- (!RF_QUEUE_FULL(_q_) && ((_r_)->priority >= (_q_)->curPriority)))
-
-int rf_ConfigureDiskQueueSystem(RF_ShutdownList_t ** listp);
-
-void rf_TerminateDiskQueues(RF_Raid_t * raidPtr);
-
-int
-rf_ConfigureDiskQueues(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr);
-
-void rf_DiskIOEnqueue(RF_DiskQueue_t * queue, RF_DiskQueueData_t * req, int pri);
-
-
-void rf_DiskIOComplete(RF_DiskQueue_t * queue, RF_DiskQueueData_t * req, int status);
-
-int
-rf_DiskIOPromote(RF_DiskQueue_t * queue, RF_StripeNum_t parityStripeID,
- RF_ReconUnitNum_t which_ru);
-
-RF_DiskQueueData_t *
-rf_CreateDiskQueueData(RF_IoType_t typ, RF_SectorNum_t ssect,
- RF_SectorCount_t nsect, caddr_t buf,
- RF_StripeNum_t parityStripeID,
- RF_ReconUnitNum_t which_ru,
- int (*wakeF) (void *, int),
- void *arg, RF_DiskQueueData_t * next,
- RF_AccTraceEntry_t * tracerec,
- void *raidPtr, RF_DiskQueueDataFlags_t flags,
- void *kb_proc);
-
-RF_DiskQueueData_t *
-rf_CreateDiskQueueDataFull(RF_IoType_t typ, RF_SectorNum_t ssect,
- RF_SectorCount_t nsect, caddr_t buf,
- RF_StripeNum_t parityStripeID,
- RF_ReconUnitNum_t which_ru,
- int (*wakeF) (void *, int),
- void *arg, RF_DiskQueueData_t * next,
- RF_AccTraceEntry_t * tracerec,
- int priority, int (*AuxFunc) (void *,...),
- caddr_t buf2, void *raidPtr,
- RF_DiskQueueDataFlags_t flags, void *kb_proc);
-
-void
-rf_FreeDiskQueueData(RF_DiskQueueData_t * p);
-
-int
-rf_ConfigureDiskQueue(RF_Raid_t *, RF_DiskQueue_t *, RF_RowCol_t,
- RF_RowCol_t, RF_DiskQueueSW_t *,
- RF_SectorCount_t, dev_t, int,
- RF_ShutdownList_t **,
- RF_AllocListElem_t *);
-#endif /* !_RF__RF_DISKQUEUE_H_ */
diff --git a/sys/dev/raidframe/rf_disks.c b/sys/dev/raidframe/rf_disks.c
deleted file mode 100644
index 14f72c2..0000000
--- a/sys/dev/raidframe/rf_disks.c
+++ /dev/null
@@ -1,1140 +0,0 @@
-/* $NetBSD: rf_disks.c,v 1.34 2000/12/05 01:35:56 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*-
- * Copyright (c) 1999 The NetBSD Foundation, Inc.
- * All rights reserved.
- *
- * This code is derived from software contributed to The NetBSD Foundation
- * by Greg Oster
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the NetBSD
- * Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/***************************************************************
- * rf_disks.c -- code to perform operations on the actual disks
- ***************************************************************/
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_alloclist.h>
-#include <dev/raidframe/rf_utils.h>
-#include <dev/raidframe/rf_configure.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_options.h>
-#include <dev/raidframe/rf_kintf.h>
-#include <dev/raidframe/rf_bsd.h>
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/proc.h>
-#if defined(__NetBSD__)
-#include <sys/ioctl.h>
-#elif defined(__FreeBSD__)
-#include <sys/ioccom.h>
-#include <sys/filio.h>
-#endif
-#include <sys/fcntl.h>
-#include <sys/vnode.h>
-
-static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
-static void rf_print_label_status( RF_Raid_t *, int, int, char *,
- RF_ComponentLabel_t *);
-static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
- RF_ComponentLabel_t *, int, int );
-
-#define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
-#define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
-
-/**************************************************************************
- *
- * initialize the disks comprising the array
- *
- * We want the spare disks to have regular row,col numbers so that we can
- * easily substitue a spare for a failed disk. But, the driver code assumes
- * throughout that the array contains numRow by numCol _non-spare_ disks, so
- * it's not clear how to fit in the spares. This is an unfortunate holdover
- * from raidSim. The quick and dirty fix is to make row zero bigger than the
- * rest, and put all the spares in it. This probably needs to get changed
- * eventually.
- *
- **************************************************************************/
-
-int
-rf_ConfigureDisks( listp, raidPtr, cfgPtr )
- RF_ShutdownList_t **listp;
- RF_Raid_t *raidPtr;
- RF_Config_t *cfgPtr;
-{
- RF_RaidDisk_t **disks;
- RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
- RF_RowCol_t r, c;
- int bs, ret;
- unsigned i, count, foundone = 0, numFailuresThisRow;
- int force;
-
- force = cfgPtr->force;
-
- ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
- if (ret)
- goto fail;
-
- disks = raidPtr->Disks;
-
- for (r = 0; r < raidPtr->numRow; r++) {
- numFailuresThisRow = 0;
- for (c = 0; c < raidPtr->numCol; c++) {
- ret = rf_ConfigureDisk(raidPtr,
- &cfgPtr->devnames[r][c][0],
- &disks[r][c], r, c);
-
- if (ret)
- goto fail;
-
- if (disks[r][c].status == rf_ds_optimal) {
- raidread_component_label(
- raidPtr->raid_cinfo[r][c].ci_dev,
- raidPtr->raid_cinfo[r][c].ci_vp,
- &raidPtr->raid_cinfo[r][c].ci_label);
- }
-
- if (disks[r][c].status != rf_ds_optimal) {
- numFailuresThisRow++;
- } else {
- if (disks[r][c].numBlocks < min_numblks)
- min_numblks = disks[r][c].numBlocks;
- DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
- r, c, disks[r][c].devname,
- (long int) disks[r][c].numBlocks,
- disks[r][c].blockSize,
- (long int) disks[r][c].numBlocks *
- disks[r][c].blockSize / 1024 / 1024);
- }
- }
- /* XXX fix for n-fault tolerant */
- /* XXX this should probably check to see how many failures
- we can handle for this configuration! */
- if (numFailuresThisRow > 0)
- raidPtr->status[r] = rf_rs_degraded;
- }
-
- /* all disks must be the same size & have the same block size, bs must
- * be a power of 2 */
- bs = 0;
- for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
- for (c = 0; !foundone && c < raidPtr->numCol; c++) {
- if (disks[r][c].status == rf_ds_optimal) {
- bs = disks[r][c].blockSize;
- foundone = 1;
- }
- }
- }
- if (!foundone) {
- RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
- ret = EINVAL;
- goto fail;
- }
- for (count = 0, i = 1; i; i <<= 1)
- if (bs & i)
- count++;
- if (count != 1) {
- RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
- ret = EINVAL;
- goto fail;
- }
-
- if (rf_CheckLabels( raidPtr, cfgPtr )) {
- printf("raid%d: There were fatal errors\n", raidPtr->raidid);
- if (force != 0) {
- printf("raid%d: Fatal errors being ignored.\n",
- raidPtr->raidid);
- } else {
- ret = EINVAL;
- goto fail;
- }
- }
-
- for (r = 0; r < raidPtr->numRow; r++) {
- for (c = 0; c < raidPtr->numCol; c++) {
- if (disks[r][c].status == rf_ds_optimal) {
- if (disks[r][c].blockSize != bs) {
- RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
- ret = EINVAL;
- goto fail;
- }
- if (disks[r][c].numBlocks != min_numblks) {
- RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
- r, c, (int) min_numblks);
- disks[r][c].numBlocks = min_numblks;
- }
- }
- }
- }
-
- raidPtr->sectorsPerDisk = min_numblks;
- raidPtr->logBytesPerSector = ffs(bs) - 1;
- raidPtr->bytesPerSector = bs;
- raidPtr->sectorMask = bs - 1;
- return (0);
-
-fail:
-
- rf_UnconfigureVnodes( raidPtr );
-
- return (ret);
-}
-
-
-/****************************************************************************
- * set up the data structures describing the spare disks in the array
- * recall from the above comment that the spare disk descriptors are stored
- * in row zero, which is specially expanded to hold them.
- ****************************************************************************/
-int
-rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr )
- RF_ShutdownList_t ** listp;
- RF_Raid_t * raidPtr;
- RF_Config_t * cfgPtr;
-{
- int i, ret;
- unsigned int bs;
- RF_RaidDisk_t *disks;
- int num_spares_done;
-
- num_spares_done = 0;
-
- /* The space for the spares should have already been allocated by
- * ConfigureDisks() */
-
- disks = &raidPtr->Disks[0][raidPtr->numCol];
- for (i = 0; i < raidPtr->numSpare; i++) {
- ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
- &disks[i], 0, raidPtr->numCol + i);
- if (ret)
- goto fail;
- if (disks[i].status != rf_ds_optimal) {
- RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
- &cfgPtr->spare_names[i][0]);
- } else {
- disks[i].status = rf_ds_spare; /* change status to
- * spare */
- DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
- disks[i].devname,
- (long int) disks[i].numBlocks, disks[i].blockSize,
- (long int) disks[i].numBlocks *
- disks[i].blockSize / 1024 / 1024);
- }
- num_spares_done++;
- }
-
- /* check sizes and block sizes on spare disks */
- bs = 1 << raidPtr->logBytesPerSector;
- for (i = 0; i < raidPtr->numSpare; i++) {
- if (disks[i].blockSize != bs) {
- RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
- ret = EINVAL;
- goto fail;
- }
- if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
- RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
- disks[i].devname, disks[i].blockSize,
- (long int) raidPtr->sectorsPerDisk);
- ret = EINVAL;
- goto fail;
- } else
- if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
- RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
-
- disks[i].numBlocks = raidPtr->sectorsPerDisk;
- }
- }
-
- return (0);
-
-fail:
-
- /* Release the hold on the main components. We've failed to allocate
- * a spare, and since we're failing, we need to free things..
-
- XXX failing to allocate a spare is *not* that big of a deal...
- We *can* survive without it, if need be, esp. if we get hot
- adding working.
-
- If we don't fail out here, then we need a way to remove this spare...
- that should be easier to do here than if we are "live"...
-
- */
-
- rf_UnconfigureVnodes( raidPtr );
-
- return (ret);
-}
-
-static int
-rf_AllocDiskStructures(raidPtr, cfgPtr)
- RF_Raid_t *raidPtr;
- RF_Config_t *cfgPtr;
-{
- RF_RaidDisk_t **disks;
- int ret;
- int r;
-
- RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
- (RF_RaidDisk_t **), raidPtr->cleanupList);
- if (disks == NULL) {
- ret = ENOMEM;
- goto fail;
- }
- raidPtr->Disks = disks;
- /* get space for the device-specific stuff... */
- RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
- sizeof(struct raidcinfo *), (struct raidcinfo **),
- raidPtr->cleanupList);
- if (raidPtr->raid_cinfo == NULL) {
- ret = ENOMEM;
- goto fail;
- }
-
- for (r = 0; r < raidPtr->numRow; r++) {
- /* We allocate RF_MAXSPARE on the first row so that we
- have room to do hot-swapping of spares */
- RF_CallocAndAdd(disks[r], raidPtr->numCol
- + ((r == 0) ? RF_MAXSPARE : 0),
- sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
- raidPtr->cleanupList);
- if (disks[r] == NULL) {
- ret = ENOMEM;
- goto fail;
- }
- /* get more space for device specific stuff.. */
- RF_CallocAndAdd(raidPtr->raid_cinfo[r],
- raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
- sizeof(struct raidcinfo), (struct raidcinfo *),
- raidPtr->cleanupList);
- if (raidPtr->raid_cinfo[r] == NULL) {
- ret = ENOMEM;
- goto fail;
- }
- }
- return(0);
-fail:
- rf_UnconfigureVnodes( raidPtr );
-
- return(ret);
-}
-
-
-/* configure a single disk during auto-configuration at boot */
-int
-rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config)
- RF_Raid_t *raidPtr;
- RF_Config_t *cfgPtr;
- RF_AutoConfig_t *auto_config;
-{
- RF_RaidDisk_t **disks;
- RF_RaidDisk_t *diskPtr;
- RF_RowCol_t r, c;
- RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
- int bs, ret;
- int numFailuresThisRow;
- int force;
- RF_AutoConfig_t *ac;
- int parity_good;
- int mod_counter;
- int mod_counter_found;
-
- rf_printf(0, "Starting autoconfiguration of RAID set...\n");
- force = cfgPtr->force;
-
- ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
- if (ret)
- goto fail;
-
- disks = raidPtr->Disks;
-
- /* assume the parity will be fine.. */
- parity_good = RF_RAID_CLEAN;
-
- /* Check for mod_counters that are too low */
- mod_counter_found = 0;
- mod_counter = 0;
- ac = auto_config;
- while(ac!=NULL) {
- if (mod_counter_found==0) {
- mod_counter = ac->clabel->mod_counter;
- mod_counter_found = 1;
- } else {
- if (ac->clabel->mod_counter > mod_counter) {
- mod_counter = ac->clabel->mod_counter;
- }
- }
- ac->flag = 0; /* clear the general purpose flag */
- ac = ac->next;
- }
-
- bs = 0;
- for (r = 0; r < raidPtr->numRow; r++) {
- numFailuresThisRow = 0;
- for (c = 0; c < raidPtr->numCol; c++) {
- diskPtr = &disks[r][c];
-
- /* find this row/col in the autoconfig */
- rf_printf(1, "Looking for %d,%d in autoconfig\n",r,c);
- ac = auto_config;
- while(ac!=NULL) {
- if (ac->clabel==NULL) {
- /* big-time bad news. */
- goto fail;
- }
- if ((ac->clabel->row == r) &&
- (ac->clabel->column == c) &&
- (ac->clabel->mod_counter == mod_counter)) {
- /* it's this one... */
- /* flag it as 'used', so we don't
- free it later. */
- ac->flag = 1;
- rf_printf(1, "Found: %s at %d,%d\n",
- ac->devname, r, c);
- break;
- }
- ac=ac->next;
- }
-
- if (ac==NULL) {
- /* we didn't find an exact match with a
- correct mod_counter above... can we
- find one with an incorrect mod_counter
- to use instead? (this one, if we find
- it, will be marked as failed once the
- set configures)
- */
-
- ac = auto_config;
- while(ac!=NULL) {
- if (ac->clabel==NULL) {
- /* big-time bad news. */
- goto fail;
- }
- if ((ac->clabel->row == r) &&
- (ac->clabel->column == c)) {
- /* it's this one...
- flag it as 'used', so we
- don't free it later. */
- ac->flag = 1;
- rf_printf(1, "Found(low mod_counter): %s at %d,%d\n",
- ac->devname,r,c);
-
- break;
- }
- ac=ac->next;
- }
- }
-
-
-
- if (ac!=NULL) {
- /* Found it. Configure it.. */
- diskPtr->blockSize = ac->clabel->blockSize;
- diskPtr->numBlocks = ac->clabel->numBlocks;
- /* Note: rf_protectedSectors is already
- factored into numBlocks here */
- raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
- raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
-
- memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
- ac->clabel, sizeof(*ac->clabel));
- sprintf(diskPtr->devname, "/dev/%s",
- ac->devname);
-
- /* note the fact that this component was
- autoconfigured. You'll need this info
- later. Trust me :) */
- diskPtr->auto_configured = 1;
- diskPtr->dev = ac->dev;
-
- /*
- * we allow the user to specify that
- * only a fraction of the disks should
- * be used this is just for debug: it
- * speeds up the parity scan
- */
-
- diskPtr->numBlocks = diskPtr->numBlocks *
- rf_sizePercentage / 100;
-
- /* XXX these will get set multiple times,
- but since we're autoconfiguring, they'd
- better be always the same each time!
- If not, this is the least of your worries */
-
- bs = diskPtr->blockSize;
- min_numblks = diskPtr->numBlocks;
-
- /* this gets done multiple times, but that's
- fine -- the serial number will be the same
- for all components, guaranteed */
- raidPtr->serial_number =
- ac->clabel->serial_number;
- /* check the last time the label
- was modified */
- if (ac->clabel->mod_counter !=
- mod_counter) {
- /* Even though we've filled in all
- of the above, we don't trust
- this component since it's
- modification counter is not
- in sync with the rest, and we really
- consider it to be failed. */
- disks[r][c].status = rf_ds_failed;
- numFailuresThisRow++;
- } else {
- if (ac->clabel->clean !=
- RF_RAID_CLEAN) {
- parity_good = RF_RAID_DIRTY;
- }
- }
- } else {
- /* Didn't find it at all!!
- Component must really be dead */
- disks[r][c].status = rf_ds_failed;
- sprintf(disks[r][c].devname,"component%d",
- r * raidPtr->numCol + c);
- numFailuresThisRow++;
- }
- }
- /* XXX fix for n-fault tolerant */
- /* XXX this should probably check to see how many failures
- we can handle for this configuration! */
- if (numFailuresThisRow > 0)
- raidPtr->status[r] = rf_rs_degraded;
- }
-
- /* close the device for the ones that didn't get used */
-
- ac = auto_config;
- while(ac!=NULL) {
- if (ac->flag == 0) {
-#if defined(__NetBSD__)
- vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
-#elif defined(__FreeBSD__)
- vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY,
- raidPtr->engine_thread);
-#endif
- VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED, 0);
- vput(ac->vp);
- ac->vp = NULL;
- rf_printf(1, "Released %s from auto-config set.\n",
- ac->devname);
- }
- ac = ac->next;
- }
-
- raidPtr->mod_counter = mod_counter;
-
- /* note the state of the parity, if any */
- raidPtr->parity_good = parity_good;
- raidPtr->sectorsPerDisk = min_numblks;
- raidPtr->logBytesPerSector = ffs(bs) - 1;
- raidPtr->bytesPerSector = bs;
- raidPtr->sectorMask = bs - 1;
- return (0);
-
-fail:
-
- rf_UnconfigureVnodes( raidPtr );
-
- return (ret);
-
-}
-
-/* configure a single disk in the array */
-int
-rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col)
- RF_Raid_t *raidPtr;
- char *buf;
- RF_RaidDisk_t *diskPtr;
- RF_RowCol_t row;
- RF_RowCol_t col;
-{
- char *p;
- int retcode;
-
- int error;
-
- retcode = 0;
- p = rf_find_non_white(buf);
- if (p[strlen(p) - 1] == '\n') {
- /* strip off the newline */
- p[strlen(p) - 1] = '\0';
- }
- (void) strcpy(diskPtr->devname, p);
-
- /* Let's start by claiming the component is fine and well... */
- diskPtr->status = rf_ds_optimal;
-
- raidPtr->raid_cinfo[row][col].ci_vp = NULL;
- raidPtr->raid_cinfo[row][col].ci_dev = NULL;
-
- error = raid_getcomponentsize(raidPtr, row, col);
- if (error) {
- printf("raidlookup on device: %s failed!\n", diskPtr->devname);
- if (error == ENXIO) {
- /* the component isn't there... must be dead :-( */
- diskPtr->status = rf_ds_failed;
- return (error);
- }
- }
- return (0);
-}
-
-static void
-rf_print_label_status( raidPtr, row, column, dev_name, ci_label )
- RF_Raid_t *raidPtr;
- int row;
- int column;
- char *dev_name;
- RF_ComponentLabel_t *ci_label;
-{
-
- printf("raid%d: Component %s being configured at row: %d col: %d\n",
- raidPtr->raidid, dev_name, row, column );
- printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
- ci_label->row, ci_label->column,
- ci_label->num_rows, ci_label->num_columns);
- printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
- ci_label->version, ci_label->serial_number,
- ci_label->mod_counter);
- printf(" Clean: %s Status: %d\n",
- ci_label->clean ? "Yes" : "No", ci_label->status );
-}
-
-static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label,
- serial_number, mod_counter )
- RF_Raid_t *raidPtr;
- int row;
- int column;
- char *dev_name;
- RF_ComponentLabel_t *ci_label;
- int serial_number;
- int mod_counter;
-{
- int fatal_error = 0;
-
- if (serial_number != ci_label->serial_number) {
- printf("%s has a different serial number: %d %d\n",
- dev_name, serial_number, ci_label->serial_number);
- fatal_error = 1;
- }
- if (mod_counter != ci_label->mod_counter) {
- printf("%s has a different modfication count: %d %d\n",
- dev_name, mod_counter, ci_label->mod_counter);
- }
-
- if (row != ci_label->row) {
- printf("Row out of alignment for: %s\n", dev_name);
- fatal_error = 1;
- }
- if (column != ci_label->column) {
- printf("Column out of alignment for: %s\n", dev_name);
- fatal_error = 1;
- }
- if (raidPtr->numRow != ci_label->num_rows) {
- printf("Number of rows do not match for: %s\n", dev_name);
- fatal_error = 1;
- }
- if (raidPtr->numCol != ci_label->num_columns) {
- printf("Number of columns do not match for: %s\n", dev_name);
- fatal_error = 1;
- }
- if (ci_label->clean == 0) {
- /* it's not clean, but that's not fatal */
- printf("%s is not clean!\n", dev_name);
- }
- return(fatal_error);
-}
-
-
-/*
-
- rf_CheckLabels() - check all the component labels for consistency.
- Return an error if there is anything major amiss.
-
- */
-
-int
-rf_CheckLabels( raidPtr, cfgPtr )
- RF_Raid_t *raidPtr;
- RF_Config_t *cfgPtr;
-{
- int r,c;
- char *dev_name;
- RF_ComponentLabel_t *ci_label;
- int serial_number = 0;
- int mod_number = 0;
- int fatal_error = 0;
- int mod_values[4];
- int mod_count[4];
- int ser_values[4];
- int ser_count[4];
- int num_ser;
- int num_mod;
- int i;
- int found;
- int hosed_row;
- int hosed_column;
- int too_fatal;
- int parity_good;
- int force;
-
- hosed_row = -1;
- hosed_column = -1;
- too_fatal = 0;
- force = cfgPtr->force;
-
- /*
- We're going to try to be a little intelligent here. If one
- component's label is bogus, and we can identify that it's the
- *only* one that's gone, we'll mark it as "failed" and allow
- the configuration to proceed. This will be the *only* case
- that we'll proceed if there would be (otherwise) fatal errors.
-
- Basically we simply keep a count of how many components had
- what serial number. If all but one agree, we simply mark
- the disagreeing component as being failed, and allow
- things to come up "normally".
-
- We do this first for serial numbers, and then for "mod_counter".
-
- */
-
- num_ser = 0;
- num_mod = 0;
- for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) {
- for (c = 0; c < raidPtr->numCol; c++) {
- ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
- found=0;
- for(i=0;i<num_ser;i++) {
- if (ser_values[i] == ci_label->serial_number) {
- ser_count[i]++;
- found=1;
- break;
- }
- }
- if (!found) {
- ser_values[num_ser] = ci_label->serial_number;
- ser_count[num_ser] = 1;
- num_ser++;
- if (num_ser>2) {
- fatal_error = 1;
- break;
- }
- }
- found=0;
- for(i=0;i<num_mod;i++) {
- if (mod_values[i] == ci_label->mod_counter) {
- mod_count[i]++;
- found=1;
- break;
- }
- }
- if (!found) {
- mod_values[num_mod] = ci_label->mod_counter;
- mod_count[num_mod] = 1;
- num_mod++;
- if (num_mod>2) {
- fatal_error = 1;
- break;
- }
- }
- }
- }
- rf_printf(1, "raid%d: Summary of serial numbers:\n", raidPtr->raidid);
- for(i=0;i<num_ser;i++) {
- rf_printf(1, "%d %d\n", ser_values[i], ser_count[i]);
- }
- rf_printf(1, "raid%d: Summary of mod counters:\n", raidPtr->raidid);
- for(i=0;i<num_mod;i++) {
- rf_printf(1, "%d %d\n", mod_values[i], mod_count[i]);
- }
- serial_number = ser_values[0];
- if (num_ser == 2) {
- if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
- /* Locate the maverick component */
- if (ser_count[1] > ser_count[0]) {
- serial_number = ser_values[1];
- }
- for (r = 0; r < raidPtr->numRow; r++) {
- for (c = 0; c < raidPtr->numCol; c++) {
- ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
- if (serial_number !=
- ci_label->serial_number) {
- hosed_row = r;
- hosed_column = c;
- break;
- }
- }
- }
- printf("Hosed component: %s\n",
- &cfgPtr->devnames[hosed_row][hosed_column][0]);
- if (!force) {
- /* we'll fail this component, as if there are
- other major errors, we arn't forcing things
- and we'll abort the config anyways */
- raidPtr->Disks[hosed_row][hosed_column].status
- = rf_ds_failed;
- raidPtr->numFailures++;
- raidPtr->status[hosed_row] = rf_rs_degraded;
- }
- } else {
- too_fatal = 1;
- }
- if (cfgPtr->parityConfig == '0') {
- /* We've identified two different serial numbers.
- RAID 0 can't cope with that, so we'll punt */
- too_fatal = 1;
- }
-
- }
-
- /* record the serial number for later. If we bail later, setting
- this doesn't matter, otherwise we've got the best guess at the
- correct serial number */
- raidPtr->serial_number = serial_number;
-
- mod_number = mod_values[0];
- if (num_mod == 2) {
- if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
- /* Locate the maverick component */
- if (mod_count[1] > mod_count[0]) {
- mod_number = mod_values[1];
- } else if (mod_count[1] < mod_count[0]) {
- mod_number = mod_values[0];
- } else {
- /* counts of different modification values
- are the same. Assume greater value is
- the correct one, all other things
- considered */
- if (mod_values[0] > mod_values[1]) {
- mod_number = mod_values[0];
- } else {
- mod_number = mod_values[1];
- }
-
- }
- for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) {
- for (c = 0; c < raidPtr->numCol; c++) {
- ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
- if (mod_number !=
- ci_label->mod_counter) {
- if ( ( hosed_row == r ) &&
- ( hosed_column == c )) {
- /* same one. Can
- deal with it. */
- } else {
- hosed_row = r;
- hosed_column = c;
- if (num_ser != 1) {
- too_fatal = 1;
- break;
- }
- }
- }
- }
- }
- printf("Hosed component: %s\n",
- &cfgPtr->devnames[hosed_row][hosed_column][0]);
- if (!force) {
- /* we'll fail this component, as if there are
- other major errors, we arn't forcing things
- and we'll abort the config anyways */
- if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) {
- raidPtr->Disks[hosed_row][hosed_column].status
- = rf_ds_failed;
- raidPtr->numFailures++;
- raidPtr->status[hosed_row] = rf_rs_degraded;
- }
- }
- } else {
- too_fatal = 1;
- }
- if (cfgPtr->parityConfig == '0') {
- /* We've identified two different mod counters.
- RAID 0 can't cope with that, so we'll punt */
- too_fatal = 1;
- }
- }
-
- raidPtr->mod_counter = mod_number;
-
- if (too_fatal) {
- /* we've had both a serial number mismatch, and a mod_counter
- mismatch -- and they involved two different components!!
- Bail -- make things fail so that the user must force
- the issue... */
- hosed_row = -1;
- hosed_column = -1;
- }
-
- if (num_ser > 2) {
- printf("raid%d: Too many different serial numbers!\n",
- raidPtr->raidid);
- }
-
- if (num_mod > 2) {
- printf("raid%d: Too many different mod counters!\n",
- raidPtr->raidid);
- }
-
- /* we start by assuming the parity will be good, and flee from
- that notion at the slightest sign of trouble */
-
- parity_good = RF_RAID_CLEAN;
- for (r = 0; r < raidPtr->numRow; r++) {
- for (c = 0; c < raidPtr->numCol; c++) {
- dev_name = &cfgPtr->devnames[r][c][0];
- ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
-
- if ((r == hosed_row) && (c == hosed_column)) {
- printf("raid%d: Ignoring %s\n",
- raidPtr->raidid, dev_name);
- } else {
- rf_print_label_status( raidPtr, r, c,
- dev_name, ci_label );
- if (rf_check_label_vitals( raidPtr, r, c,
- dev_name, ci_label,
- serial_number,
- mod_number )) {
- fatal_error = 1;
- }
- if (ci_label->clean != RF_RAID_CLEAN) {
- parity_good = RF_RAID_DIRTY;
- }
- }
- }
- }
- if (fatal_error) {
- parity_good = RF_RAID_DIRTY;
- }
-
- /* we note the state of the parity */
- raidPtr->parity_good = parity_good;
-
- return(fatal_error);
-}
-
-int
-rf_add_hot_spare(raidPtr, sparePtr)
- RF_Raid_t *raidPtr;
- RF_SingleComponent_t *sparePtr;
-{
- RF_RaidDisk_t *disks;
- RF_DiskQueue_t *spareQueues;
- int ret;
- unsigned int bs;
- int spare_number;
-
-#if 0
- printf("Just in rf_add_hot_spare: %d\n",raidPtr->numSpare);
- printf("Num col: %d\n",raidPtr->numCol);
-#endif
- if (raidPtr->numSpare >= RF_MAXSPARE) {
- RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
- return(EINVAL);
- }
-
- RF_LOCK_MUTEX(raidPtr->mutex);
-
- /* the beginning of the spares... */
- disks = &raidPtr->Disks[0][raidPtr->numCol];
-
- spare_number = raidPtr->numSpare;
-
- ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
- &disks[spare_number], 0,
- raidPtr->numCol + spare_number);
-
- if (ret)
- goto fail;
- if (disks[spare_number].status != rf_ds_optimal) {
- RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
- sparePtr->component_name);
- ret=EINVAL;
- goto fail;
- } else {
- disks[spare_number].status = rf_ds_spare;
- DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
- disks[spare_number].devname,
- (long int) disks[spare_number].numBlocks,
- disks[spare_number].blockSize,
- (long int) disks[spare_number].numBlocks *
- disks[spare_number].blockSize / 1024 / 1024);
- }
-
-
- /* check sizes and block sizes on the spare disk */
- bs = 1 << raidPtr->logBytesPerSector;
- if (disks[spare_number].blockSize != bs) {
- RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
- ret = EINVAL;
- goto fail;
- }
- if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
- RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
- disks[spare_number].devname,
- disks[spare_number].blockSize,
- (long int) raidPtr->sectorsPerDisk);
- ret = EINVAL;
- goto fail;
- } else {
- if (disks[spare_number].numBlocks >
- raidPtr->sectorsPerDisk) {
- RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
- (long int) raidPtr->sectorsPerDisk);
-
- disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
- }
- }
-
- spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
- ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
- 0, raidPtr->numCol + spare_number,
- raidPtr->qType,
- raidPtr->sectorsPerDisk,
- raidPtr->Disks[0][raidPtr->numCol +
- spare_number].dev,
- raidPtr->maxOutstanding,
- &raidPtr->shutdownList,
- raidPtr->cleanupList);
-
-
- raidPtr->numSpare++;
- RF_UNLOCK_MUTEX(raidPtr->mutex);
- return (0);
-
-fail:
- RF_UNLOCK_MUTEX(raidPtr->mutex);
- return(ret);
-}
-
-int
-rf_remove_hot_spare(raidPtr,sparePtr)
- RF_Raid_t *raidPtr;
- RF_SingleComponent_t *sparePtr;
-{
- int spare_number;
-
-
- if (raidPtr->numSpare==0) {
- printf("No spares to remove!\n");
- return(EINVAL);
- }
-
- spare_number = sparePtr->column;
-
- return(EINVAL); /* XXX not implemented yet */
-#if 0
- if (spare_number < 0 || spare_number > raidPtr->numSpare) {
- return(EINVAL);
- }
-
- /* verify that this spare isn't in use... */
-
-
-
-
- /* it's gone.. */
-
- raidPtr->numSpare--;
-
- return(0);
-#endif
-}
-
-
-int
-rf_delete_component(raidPtr,component)
- RF_Raid_t *raidPtr;
- RF_SingleComponent_t *component;
-{
- RF_RaidDisk_t *disks;
-
- if ((component->row < 0) ||
- (component->row >= raidPtr->numRow) ||
- (component->column < 0) ||
- (component->column >= raidPtr->numCol)) {
- return(EINVAL);
- }
-
- disks = &raidPtr->Disks[component->row][component->column];
-
- /* 1. This component must be marked as 'failed' */
-
- return(EINVAL); /* Not implemented yet. */
-}
-
-int
-rf_incorporate_hot_spare(raidPtr,component)
- RF_Raid_t *raidPtr;
- RF_SingleComponent_t *component;
-{
-
- /* Issues here include how to 'move' this in if there is IO
- taking place (e.g. component queues and such) */
-
- return(EINVAL); /* Not implemented yet. */
-}
diff --git a/sys/dev/raidframe/rf_disks.h b/sys/dev/raidframe/rf_disks.h
deleted file mode 100644
index b57c4f8..0000000
--- a/sys/dev/raidframe/rf_disks.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_disks.h,v 1.8 2000/03/27 03:25:17 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*
- * rf_disks.h -- header file for code related to physical disks
- */
-
-#ifndef _RF__RF_DISKS_H_
-#define _RF__RF_DISKS_H_
-
-#include <sys/types.h>
-
-#include <dev/raidframe/rf_archs.h>
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_bsd.h>
-
-/*
- * A physical disk can be in one of several states:
- * IF YOU ADD A STATE, CHECK TO SEE IF YOU NEED TO MODIFY RF_DEAD_DISK() BELOW.
- */
-enum RF_DiskStatus_e {
- rf_ds_optimal, /* no problems */
- rf_ds_failed, /* reconstruction ongoing */
- rf_ds_reconstructing, /* reconstruction complete to spare, dead disk
- * not yet replaced */
- rf_ds_dist_spared, /* reconstruction complete to distributed
- * spare space, dead disk not yet replaced */
- rf_ds_spared, /* reconstruction complete to distributed
- * spare space, dead disk not yet replaced */
- rf_ds_spare, /* an available spare disk */
- rf_ds_used_spare /* a spare which has been used, and hence is
- * not available */
-};
-typedef enum RF_DiskStatus_e RF_DiskStatus_t;
-
-struct RF_RaidDisk_s {
- char devname[56]; /* name of device file */
- RF_DiskStatus_t status; /* whether it is up or down */
- RF_RowCol_t spareRow; /* if in status "spared", this identifies the
- * spare disk */
- RF_RowCol_t spareCol; /* if in status "spared", this identifies the
- * spare disk */
- RF_SectorCount_t numBlocks; /* number of blocks, obtained via READ
- * CAPACITY */
- int blockSize;
- RF_SectorCount_t partitionSize; /* The *actual* and *full* size of
- the partition, from the disklabel */
- int auto_configured;/* 1 if this component was autoconfigured.
- 0 otherwise. */
- dev_t dev;
-};
-/*
- * An RF_DiskOp_t ptr is really a pointer to a UAGT_CCB, but I want
- * to isolate the cam layer from all other layers, so I typecast to/from
- * RF_DiskOp_t * (i.e. void *) at the interfaces.
- */
-typedef void RF_DiskOp_t;
-
-/* if a disk is in any of these states, it is inaccessible */
-#define RF_DEAD_DISK(_dstat_) (((_dstat_) == rf_ds_spared) || \
- ((_dstat_) == rf_ds_reconstructing) || ((_dstat_) == rf_ds_failed) || \
- ((_dstat_) == rf_ds_dist_spared))
-
-#ifdef _KERNEL
-#include <dev/raidframe/rf_bsd.h>
-
-int rf_ConfigureDisks(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr);
-int rf_ConfigureSpareDisks(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr);
-int rf_ConfigureDisk(RF_Raid_t * raidPtr, char *buf, RF_RaidDisk_t * diskPtr,
- RF_RowCol_t row, RF_RowCol_t col);
-int rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
- RF_AutoConfig_t *auto_config);
-int rf_CheckLabels( RF_Raid_t *, RF_Config_t *);
-int rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr);
-int rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr);
-int rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component);
-int rf_incorporate_hot_spare(RF_Raid_t *raidPtr,
- RF_SingleComponent_t *component);
-#endif /* _KERNEL */
-#endif /* !_RF__RF_DISKS_H_ */
diff --git a/sys/dev/raidframe/rf_driver.c b/sys/dev/raidframe/rf_driver.c
deleted file mode 100644
index 9534132..0000000
--- a/sys/dev/raidframe/rf_driver.c
+++ /dev/null
@@ -1,1050 +0,0 @@
-/* $NetBSD: rf_driver.c,v 1.39 2000/12/15 02:12:58 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*-
- * Copyright (c) 1999 The NetBSD Foundation, Inc.
- * All rights reserved.
- *
- * This code is derived from software contributed to The NetBSD Foundation
- * by Greg Oster
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the NetBSD
- * Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland, Khalil Amiri, Claudson Bornstein, William V. Courtright II,
- * Robby Findler, Daniel Stodolsky, Rachad Youssef, Jim Zelenka
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/******************************************************************************
- *
- * rf_driver.c -- main setup, teardown, and access routines for the RAID driver
- *
- * all routines are prefixed with rf_ (raidframe), to avoid conficts.
- *
- ******************************************************************************/
-
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/systm.h>
-#if defined(__NetBSD__)
-#include <sys/ioctl.h>
-#elif defined(__FreeBSD__)
-#include <sys/ioccom.h>
-#include <sys/filio.h>
-#endif
-#include <sys/fcntl.h>
-#include <sys/vnode.h>
-
-
-#include <dev/raidframe/rf_archs.h>
-#include <dev/raidframe/rf_threadstuff.h>
-
-#include <sys/errno.h>
-
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_aselect.h>
-#include <dev/raidframe/rf_diskqueue.h>
-#include <dev/raidframe/rf_parityscan.h>
-#include <dev/raidframe/rf_alloclist.h>
-#include <dev/raidframe/rf_dagutils.h>
-#include <dev/raidframe/rf_utils.h>
-#include <dev/raidframe/rf_etimer.h>
-#include <dev/raidframe/rf_acctrace.h>
-#include <dev/raidframe/rf_configure.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_desc.h>
-#include <dev/raidframe/rf_states.h>
-#include <dev/raidframe/rf_freelist.h>
-#include <dev/raidframe/rf_decluster.h>
-#include <dev/raidframe/rf_map.h>
-#include <dev/raidframe/rf_revent.h>
-#include <dev/raidframe/rf_callback.h>
-#include <dev/raidframe/rf_engine.h>
-#include <dev/raidframe/rf_memchunk.h>
-#include <dev/raidframe/rf_mcpair.h>
-#include <dev/raidframe/rf_nwayxor.h>
-#include <dev/raidframe/rf_debugprint.h>
-#include <dev/raidframe/rf_copyback.h>
-#include <dev/raidframe/rf_driver.h>
-#include <dev/raidframe/rf_options.h>
-#include <dev/raidframe/rf_shutdown.h>
-#include <dev/raidframe/rf_kintf.h>
-
-#if defined(__FreeBSD__) && __FreeBSD_version > 500005
-#include <sys/bio.h>
-#endif
-
-#include <sys/buf.h>
-
-/* rad == RF_RaidAccessDesc_t */
-static RF_FreeList_t *rf_rad_freelist;
-#define RF_MAX_FREE_RAD 128
-#define RF_RAD_INC 16
-#define RF_RAD_INITIAL 32
-
-/* debug variables */
-char rf_panicbuf[2048]; /* a buffer to hold an error msg when we panic */
-
-/* main configuration routines */
-static int raidframe_booted = 0;
-
-static void rf_ConfigureDebug(RF_Config_t * cfgPtr);
-static void set_debug_option(char *name, long val);
-static void rf_UnconfigureArray(void);
-static int init_rad(RF_RaidAccessDesc_t *);
-static void clean_rad(RF_RaidAccessDesc_t *);
-static void rf_ShutdownRDFreeList(void *);
-static int rf_ConfigureRDFreeList(RF_ShutdownList_t **);
-
-RF_DECLARE_MUTEX(rf_printf_mutex) /* debug only: avoids interleaved
- * printfs by different stripes */
-
-#define SIGNAL_QUIESCENT_COND(_raid_) wakeup(&((_raid_)->accesses_suspended))
-#define WAIT_FOR_QUIESCENCE(_raid_) \
- RF_LTSLEEP(&((_raid_)->accesses_suspended), PRIBIO, \
- "raidframe quiesce", 0, &((_raid_)->access_suspend_mutex))
-
-#if defined(__FreeBSD__) && __FreeBSD_version > 500005
-#define IO_BUF_ERR(bp, err) { \
- bp->bio_flags |= BIO_ERROR; \
- bp->bio_resid = bp->bio_bcount; \
- bp->bio_error = err; \
- biodone(bp); \
-};
-#else
-#define IO_BUF_ERR(bp, err) { \
- bp->b_flags |= B_ERROR; \
- bp->b_resid = bp->b_bcount; \
- bp->b_error = err; \
- biodone(bp); \
-}
-#endif
-
-static int configureCount = 0; /* number of active configurations */
-static int configInProgress = 0; /* configuration is in progress and code
- * needs to be serialized. */
-static int isconfigged = 0; /* is basic raidframe (non per-array)
- * stuff configged */
-RF_DECLARE_STATIC_MUTEX(configureMutex) /* used to lock the configuration
- * stuff */
-static RF_ShutdownList_t *globalShutdown; /* non array-specific
- * stuff */
-
-/* called at system boot time */
-int
-rf_BootRaidframe()
-{
- int rc;
-
- if (raidframe_booted)
- return (EBUSY);
- raidframe_booted = 1;
-
- rc = rf_mutex_init(&configureMutex, __FUNCTION__);
- if (rc) {
- RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- RF_PANIC();
- }
- configureCount = 0;
- isconfigged = 0;
- globalShutdown = NULL;
- return (0);
-}
-/*
- * This function is really just for debugging user-level stuff: it
- * frees up all memory, other RAIDframe resources which might otherwise
- * be kept around. This is used with systems like "sentinel" to detect
- * memory leaks.
- */
-int
-rf_UnbootRaidframe()
-{
- int rc;
-
- RF_LOCK_MUTEX(configureMutex);
- if (configureCount) {
- RF_UNLOCK_MUTEX(configureMutex);
- return (EBUSY);
- }
- raidframe_booted = 0;
- RF_UNLOCK_MUTEX(configureMutex);
- rc = rf_mutex_destroy(&configureMutex);
- if (rc) {
- RF_ERRORMSG3("Unable to destroy mutex file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- RF_PANIC();
- }
- return (0);
-}
-/*
- * Called whenever an array is shutdown
- */
-static void
-rf_UnconfigureArray()
-{
- int rc;
-
- RF_LOCK_MUTEX(configureMutex);
- if (--configureCount == 0) { /* if no active configurations, shut
- * everything down */
- isconfigged = 0;
-
- rc = rf_ShutdownList(&globalShutdown);
- if (rc) {
- RF_ERRORMSG1("RAIDFRAME: unable to do global shutdown, rc=%d\n", rc);
- }
-
- /*
- * We must wait until now, because the AllocList module
- * uses the DebugMem module.
- */
- if (rf_memDebug)
- rf_print_unfreed();
- }
- RF_UNLOCK_MUTEX(configureMutex);
-}
-
-/*
- * Called to shut down an array.
- */
-int
-rf_Shutdown(raidPtr)
- RF_Raid_t *raidPtr;
-{
-
- if (!raidPtr->valid) {
- RF_ERRORMSG("Attempt to shut down unconfigured RAIDframe driver. Aborting shutdown\n");
- return (EINVAL);
- }
- /*
- * wait for outstanding IOs to land
- * As described in rf_raid.h, we use the rad_freelist lock
- * to protect the per-array info about outstanding descs
- * since we need to do freelist locking anyway, and this
- * cuts down on the amount of serialization we've got going
- * on.
- */
- RF_FREELIST_DO_LOCK(rf_rad_freelist);
- if (raidPtr->waitShutdown) {
- RF_FREELIST_DO_UNLOCK(rf_rad_freelist);
- return (EBUSY);
- }
- raidPtr->waitShutdown = 1;
- while (raidPtr->nAccOutstanding) {
- RF_WAIT_COND(raidPtr->outstandingCond, RF_FREELIST_MUTEX_OF(rf_rad_freelist));
- }
- RF_FREELIST_DO_UNLOCK(rf_rad_freelist);
-
- /* Wait for any parity re-writes to stop... */
- while (raidPtr->parity_rewrite_in_progress) {
- printf("Waiting for parity re-write to exit...\n");
- tsleep(&raidPtr->parity_rewrite_in_progress, PRIBIO,
- "rfprwshutdown", 0);
- }
-
- raidPtr->valid = 0;
-
- rf_update_component_labels(raidPtr, RF_FINAL_COMPONENT_UPDATE);
-
- rf_UnconfigureVnodes(raidPtr);
-
- rf_ShutdownList(&raidPtr->shutdownList);
-
- rf_UnconfigureArray();
-
- return (0);
-}
-
-
-#define DO_INIT_CONFIGURE(f) { \
- rc = f (&globalShutdown); \
- if (rc) { \
- RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \
- rf_ShutdownList(&globalShutdown); \
- RF_LOCK_MUTEX(configureMutex); \
- configInProgress = 0; \
- configureCount--; \
- RF_UNLOCK_MUTEX(configureMutex); \
- return(rc); \
- } \
-}
-
-#define DO_RAID_FAIL() { \
- rf_UnconfigureVnodes(raidPtr); \
- rf_ShutdownList(&raidPtr->shutdownList); \
- rf_UnconfigureArray(); \
-}
-
-#define DO_RAID_INIT_CONFIGURE(f) { \
- rc = f (&raidPtr->shutdownList, raidPtr, cfgPtr); \
- if (rc) { \
- RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \
- DO_RAID_FAIL(); \
- return(rc); \
- } \
-}
-
-#define DO_RAID_MUTEX(_m_) { \
- rc = rf_create_managed_mutex(&raidPtr->shutdownList, (_m_)); \
- if (rc) { \
- RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", \
- __FILE__, __LINE__, rc); \
- DO_RAID_FAIL(); \
- return(rc); \
- } \
-}
-
-#define DO_RAID_COND(_c_) { \
- rc = rf_create_managed_cond(&raidPtr->shutdownList, (_c_)); \
- if (rc) { \
- RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", \
- __FILE__, __LINE__, rc); \
- DO_RAID_FAIL(); \
- return(rc); \
- } \
-}
-
-int
-rf_Configure(raidPtr, cfgPtr, ac)
- RF_Raid_t *raidPtr;
- RF_Config_t *cfgPtr;
- RF_AutoConfig_t *ac;
-{
- RF_RowCol_t row, col;
- int i, rc;
-
- /* XXX This check can probably be removed now, since
- RAIDFRAME_CONFIGURE now checks to make sure that the
- RAID set is not already valid
- */
- if (raidPtr->valid) {
- RF_ERRORMSG("RAIDframe configuration not shut down. Aborting configure.\n");
- return (EINVAL);
- }
- RF_LOCK_MUTEX(configureMutex);
- if (configInProgress == 1) {
- RF_UNLOCK_MUTEX(configureMutex);
- return (EBUSY);
- }
- configureCount++;
- if (isconfigged == 0) {
- configInProgress = 1;
- RF_UNLOCK_MUTEX(configureMutex);
- rc = rf_create_managed_mutex(&globalShutdown, &rf_printf_mutex);
- if (rc) {
- RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- rf_ShutdownList(&globalShutdown);
- return (rc);
- }
- /* initialize globals */
- printf("RAIDFRAME: protectedSectors is %ld\n",
- rf_protectedSectors);
-
- rf_clear_debug_print_buffer();
-
- DO_INIT_CONFIGURE(rf_ConfigureAllocList);
-
- /*
- * Yes, this does make debugging general to the whole
- * system instead of being array specific. Bummer, drag.
- */
- rf_ConfigureDebug(cfgPtr);
- DO_INIT_CONFIGURE(rf_ConfigureDebugMem);
- DO_INIT_CONFIGURE(rf_ConfigureAccessTrace);
- DO_INIT_CONFIGURE(rf_ConfigureMapModule);
- DO_INIT_CONFIGURE(rf_ConfigureReconEvent);
- DO_INIT_CONFIGURE(rf_ConfigureCallback);
- DO_INIT_CONFIGURE(rf_ConfigureMemChunk);
- DO_INIT_CONFIGURE(rf_ConfigureRDFreeList);
- DO_INIT_CONFIGURE(rf_ConfigureNWayXor);
- DO_INIT_CONFIGURE(rf_ConfigureStripeLockFreeList);
- DO_INIT_CONFIGURE(rf_ConfigureMCPair);
- DO_INIT_CONFIGURE(rf_ConfigureDAGs);
- DO_INIT_CONFIGURE(rf_ConfigureDAGFuncs);
- DO_INIT_CONFIGURE(rf_ConfigureDebugPrint);
- DO_INIT_CONFIGURE(rf_ConfigureReconstruction);
- DO_INIT_CONFIGURE(rf_ConfigureCopyback);
- DO_INIT_CONFIGURE(rf_ConfigureDiskQueueSystem);
-
- RF_LOCK_MUTEX(configureMutex);
- isconfigged = 1;
- configInProgress = 0;
- }
- RF_UNLOCK_MUTEX(configureMutex);
-
- DO_RAID_MUTEX(&raidPtr->mutex);
- /* set up the cleanup list. Do this after ConfigureDebug so that
- * value of memDebug will be set */
-
- rf_MakeAllocList(raidPtr->cleanupList);
- if (raidPtr->cleanupList == NULL) {
- DO_RAID_FAIL();
- return (ENOMEM);
- }
- rc = rf_ShutdownCreate(&raidPtr->shutdownList,
- (void (*) (void *)) rf_FreeAllocList,
- raidPtr->cleanupList);
- if (rc) {
- RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n",
- __FILE__, __LINE__, rc);
- DO_RAID_FAIL();
- return (rc);
- }
- raidPtr->numRow = cfgPtr->numRow;
- raidPtr->numCol = cfgPtr->numCol;
- raidPtr->numSpare = cfgPtr->numSpare;
-
- /* XXX we don't even pretend to support more than one row in the
- * kernel... */
- if (raidPtr->numRow != 1) {
- RF_ERRORMSG("Only one row supported in kernel.\n");
- DO_RAID_FAIL();
- return (EINVAL);
- }
- RF_CallocAndAdd(raidPtr->status, raidPtr->numRow, sizeof(RF_RowStatus_t),
- (RF_RowStatus_t *), raidPtr->cleanupList);
- if (raidPtr->status == NULL) {
- DO_RAID_FAIL();
- return (ENOMEM);
- }
- RF_CallocAndAdd(raidPtr->reconControl, raidPtr->numRow,
- sizeof(RF_ReconCtrl_t *), (RF_ReconCtrl_t **), raidPtr->cleanupList);
- if (raidPtr->reconControl == NULL) {
- DO_RAID_FAIL();
- return (ENOMEM);
- }
- for (i = 0; i < raidPtr->numRow; i++) {
- raidPtr->status[i] = rf_rs_optimal;
- raidPtr->reconControl[i] = NULL;
- }
-
- DO_RAID_INIT_CONFIGURE(rf_ConfigureEngine);
- DO_RAID_INIT_CONFIGURE(rf_ConfigureStripeLocks);
-
- DO_RAID_COND(&raidPtr->outstandingCond);
-
- raidPtr->nAccOutstanding = 0;
- raidPtr->waitShutdown = 0;
-
- DO_RAID_MUTEX(&raidPtr->access_suspend_mutex);
- DO_RAID_COND(&raidPtr->quiescent_cond);
-
- DO_RAID_COND(&raidPtr->waitForReconCond);
-
- DO_RAID_MUTEX(&raidPtr->recon_done_proc_mutex);
-
- if (ac!=NULL) {
- /* We have an AutoConfig structure.. Don't do the
- normal disk configuration... call the auto config
- stuff */
- rf_AutoConfigureDisks(raidPtr, cfgPtr, ac);
- } else {
- DO_RAID_INIT_CONFIGURE(rf_ConfigureDisks);
- DO_RAID_INIT_CONFIGURE(rf_ConfigureSpareDisks);
- }
- /* do this after ConfigureDisks & ConfigureSpareDisks to be sure dev
- * no. is set */
- DO_RAID_INIT_CONFIGURE(rf_ConfigureDiskQueues);
-
- DO_RAID_INIT_CONFIGURE(rf_ConfigureLayout);
-
- DO_RAID_INIT_CONFIGURE(rf_ConfigurePSStatus);
-
- for (row = 0; row < raidPtr->numRow; row++) {
- for (col = 0; col < raidPtr->numCol; col++) {
- /*
- * XXX better distribution
- */
- raidPtr->hist_diskreq[row][col] = 0;
- }
- }
-
- raidPtr->numNewFailures = 0;
- raidPtr->copyback_in_progress = 0;
- raidPtr->parity_rewrite_in_progress = 0;
- raidPtr->recon_in_progress = 0;
- raidPtr->maxOutstanding = cfgPtr->maxOutstandingDiskReqs;
-
- /* autoconfigure and root_partition will actually get filled in
- after the config is done */
- raidPtr->autoconfigure = 0;
- raidPtr->root_partition = 0;
- raidPtr->last_unit = raidPtr->raidid;
- raidPtr->config_order = 0;
-
- if (rf_keepAccTotals) {
- raidPtr->keep_acc_totals = 1;
- }
- rf_StartUserStats(raidPtr);
-
- raidPtr->valid = 1;
- return (0);
-}
-
-static int
-init_rad(desc)
- RF_RaidAccessDesc_t *desc;
-{
- int rc;
-
- rc = rf_mutex_init(&desc->mutex, __FUNCTION__);
- if (rc) {
- RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- return (rc);
- }
- rc = rf_cond_init(&desc->cond);
- if (rc) {
- RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- rf_mutex_destroy(&desc->mutex);
- return (rc);
- }
- return (0);
-}
-
-static void
-clean_rad(desc)
- RF_RaidAccessDesc_t *desc;
-{
- rf_mutex_destroy(&desc->mutex);
- rf_cond_destroy(&desc->cond);
-}
-
-static void
-rf_ShutdownRDFreeList(ignored)
- void *ignored;
-{
- RF_FREELIST_DESTROY_CLEAN(rf_rad_freelist, next, (RF_RaidAccessDesc_t *), clean_rad);
-}
-
-static int
-rf_ConfigureRDFreeList(listp)
- RF_ShutdownList_t **listp;
-{
- int rc;
-
- RF_FREELIST_CREATE(rf_rad_freelist, RF_MAX_FREE_RAD,
- RF_RAD_INC, sizeof(RF_RaidAccessDesc_t));
- if (rf_rad_freelist == NULL) {
- return (ENOMEM);
- }
- rc = rf_ShutdownCreate(listp, rf_ShutdownRDFreeList, NULL);
- if (rc) {
- RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- rf_ShutdownRDFreeList(NULL);
- return (rc);
- }
- RF_FREELIST_PRIME_INIT(rf_rad_freelist, RF_RAD_INITIAL, next,
- (RF_RaidAccessDesc_t *), init_rad);
- return (0);
-}
-
-RF_RaidAccessDesc_t *
-rf_AllocRaidAccDesc(
- RF_Raid_t * raidPtr,
- RF_IoType_t type,
- RF_RaidAddr_t raidAddress,
- RF_SectorCount_t numBlocks,
- caddr_t bufPtr,
- void *bp,
- RF_DagHeader_t ** paramDAG,
- RF_AccessStripeMapHeader_t ** paramASM,
- RF_RaidAccessFlags_t flags,
- void (*cbF) (RF_Buf_t),
- void *cbA,
- RF_AccessState_t * states)
-{
- RF_RaidAccessDesc_t *desc;
-
- RF_FREELIST_GET_INIT_NOUNLOCK(rf_rad_freelist, desc, next, (RF_RaidAccessDesc_t *), init_rad);
- if (raidPtr->waitShutdown) {
- /*
- * Actually, we're shutting the array down. Free the desc
- * and return NULL.
- */
- RF_FREELIST_DO_UNLOCK(rf_rad_freelist);
- RF_FREELIST_FREE_CLEAN(rf_rad_freelist, desc, next, clean_rad);
- return (NULL);
- }
- raidPtr->nAccOutstanding++;
- RF_FREELIST_DO_UNLOCK(rf_rad_freelist);
-
- desc->raidPtr = (void *) raidPtr;
- desc->type = type;
- desc->raidAddress = raidAddress;
- desc->numBlocks = numBlocks;
- desc->bufPtr = bufPtr;
- desc->bp = bp;
- desc->paramDAG = paramDAG;
- desc->paramASM = paramASM;
- desc->flags = flags;
- desc->states = states;
- desc->state = 0;
-
- desc->status = 0;
- bzero((char *) &desc->tracerec, sizeof(RF_AccTraceEntry_t));
- desc->callbackFunc = (void (*) (RF_CBParam_t)) cbF; /* XXX */
- desc->callbackArg = cbA;
- desc->next = NULL;
- desc->head = desc;
- desc->numPending = 0;
- desc->cleanupList = NULL;
- rf_MakeAllocList(desc->cleanupList);
- return (desc);
-}
-
-void
-rf_FreeRaidAccDesc(RF_RaidAccessDesc_t * desc)
-{
- RF_Raid_t *raidPtr = desc->raidPtr;
-
- RF_ASSERT(desc);
-
- rf_FreeAllocList(desc->cleanupList);
- RF_FREELIST_FREE_CLEAN_NOUNLOCK(rf_rad_freelist, desc, next, clean_rad);
- raidPtr->nAccOutstanding--;
- if (raidPtr->waitShutdown) {
- RF_SIGNAL_COND(raidPtr->outstandingCond);
- }
- RF_FREELIST_DO_UNLOCK(rf_rad_freelist);
-}
-/*********************************************************************
- * Main routine for performing an access.
- * Accesses are retried until a DAG can not be selected. This occurs
- * when either the DAG library is incomplete or there are too many
- * failures in a parity group.
- ********************************************************************/
-int
-rf_DoAccess(
- RF_Raid_t * raidPtr,
- RF_IoType_t type,
- int async_flag,
- RF_RaidAddr_t raidAddress,
- RF_SectorCount_t numBlocks,
- caddr_t bufPtr,
- void *bp_in,
- RF_DagHeader_t ** paramDAG,
- RF_AccessStripeMapHeader_t ** paramASM,
- RF_RaidAccessFlags_t flags,
- RF_RaidAccessDesc_t ** paramDesc,
- void (*cbF) (RF_Buf_t),
- void *cbA)
-/*
-type should be read or write
-async_flag should be RF_TRUE or RF_FALSE
-bp_in is a buf pointer. void * to facilitate ignoring it outside the kernel
-*/
-{
- RF_RaidAccessDesc_t *desc;
- caddr_t lbufPtr = bufPtr;
- RF_Buf_t bp = (RF_Buf_t) bp_in;
-
- raidAddress += rf_raidSectorOffset;
-
- if (!raidPtr->valid) {
- RF_ERRORMSG("RAIDframe driver not successfully configured. Rejecting access.\n");
- IO_BUF_ERR(bp, EINVAL);
- return (EINVAL);
- }
-
- if (rf_accessDebug) {
-
- printf("logBytes is: %d %d %d\n", raidPtr->raidid,
- raidPtr->logBytesPerSector,
- (int) rf_RaidAddressToByte(raidPtr, numBlocks));
- printf("raid%d: %s raidAddr %d (stripeid %d-%d) numBlocks %d (%d bytes) buf 0x%lx\n", raidPtr->raidid,
- (type == RF_IO_TYPE_READ) ? "READ" : "WRITE", (int) raidAddress,
- (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress),
- (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress + numBlocks - 1),
- (int) numBlocks,
- (int) rf_RaidAddressToByte(raidPtr, numBlocks),
- (long) bufPtr);
- }
- if (raidAddress + numBlocks > raidPtr->totalSectors) {
-
- printf("DoAccess: raid addr %lu too large to access %lu sectors. Max legal addr is %lu\n",
- (u_long) raidAddress, (u_long) numBlocks, (u_long) raidPtr->totalSectors);
-
- IO_BUF_ERR(bp, ENOSPC);
- return (ENOSPC);
- }
- desc = rf_AllocRaidAccDesc(raidPtr, type, raidAddress,
- numBlocks, lbufPtr, bp, paramDAG, paramASM,
- flags, cbF, cbA, raidPtr->Layout.map->states);
-
- if (desc == NULL) {
- return (ENOMEM);
- }
- RF_ETIMER_START(desc->tracerec.tot_timer);
-
- desc->async_flag = async_flag;
-
- rf_ContinueRaidAccess(desc);
-
- return (0);
-}
-/* force the array into reconfigured mode without doing reconstruction */
-int
-rf_SetReconfiguredMode(raidPtr, row, col)
- RF_Raid_t *raidPtr;
- int row;
- int col;
-{
- if (!(raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
- printf("Can't set reconfigured mode in dedicated-spare array\n");
- RF_PANIC();
- }
- RF_LOCK_MUTEX(raidPtr->mutex);
- raidPtr->numFailures++;
- raidPtr->Disks[row][col].status = rf_ds_dist_spared;
- raidPtr->status[row] = rf_rs_reconfigured;
- rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE);
- /* install spare table only if declustering + distributed sparing
- * architecture. */
- if (raidPtr->Layout.map->flags & RF_BD_DECLUSTERED)
- rf_InstallSpareTable(raidPtr, row, col);
- RF_UNLOCK_MUTEX(raidPtr->mutex);
- return (0);
-}
-
-extern int fail_row, fail_col, fail_time;
-extern int delayed_recon;
-
-int
-rf_FailDisk(
- RF_Raid_t * raidPtr,
- int frow,
- int fcol,
- int initRecon)
-{
- printf("raid%d: Failing disk r%d c%d\n", raidPtr->raidid, frow, fcol);
- RF_LOCK_MUTEX(raidPtr->mutex);
- raidPtr->numFailures++;
- raidPtr->Disks[frow][fcol].status = rf_ds_failed;
- raidPtr->status[frow] = rf_rs_degraded;
- rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE);
- RF_UNLOCK_MUTEX(raidPtr->mutex);
- if (initRecon)
- rf_ReconstructFailedDisk(raidPtr, frow, fcol);
- return (0);
-}
-/* releases a thread that is waiting for the array to become quiesced.
- * access_suspend_mutex should be locked upon calling this
- */
-void
-rf_SignalQuiescenceLock(raidPtr, reconDesc)
- RF_Raid_t *raidPtr;
- RF_RaidReconDesc_t *reconDesc;
-{
- if (rf_quiesceDebug) {
- printf("raid%d: Signalling quiescence lock\n",
- raidPtr->raidid);
- }
- raidPtr->access_suspend_release = 1;
-
- if (raidPtr->waiting_for_quiescence) {
- SIGNAL_QUIESCENT_COND(raidPtr);
- }
-}
-/* suspends all new requests to the array. No effect on accesses that are in flight. */
-int
-rf_SuspendNewRequestsAndWait(raidPtr)
- RF_Raid_t *raidPtr;
-{
- if (rf_quiesceDebug)
- printf("Suspending new reqs\n");
-
- RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
- raidPtr->accesses_suspended++;
- raidPtr->waiting_for_quiescence = (raidPtr->accs_in_flight == 0) ? 0 : 1;
-
- if (raidPtr->waiting_for_quiescence) {
- raidPtr->access_suspend_release = 0;
- while (!raidPtr->access_suspend_release) {
- printf("Suspending: Waiting for Quiescence\n");
- WAIT_FOR_QUIESCENCE(raidPtr);
- raidPtr->waiting_for_quiescence = 0;
- }
- }
- printf("Quiescence reached..\n");
-
- RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
- return (raidPtr->waiting_for_quiescence);
-}
-/* wake up everyone waiting for quiescence to be released */
-void
-rf_ResumeNewRequests(raidPtr)
- RF_Raid_t *raidPtr;
-{
- RF_CallbackDesc_t *t, *cb;
-
- if (rf_quiesceDebug)
- printf("Resuming new reqs\n");
-
- RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
- raidPtr->accesses_suspended--;
- if (raidPtr->accesses_suspended == 0)
- cb = raidPtr->quiesce_wait_list;
- else
- cb = NULL;
- raidPtr->quiesce_wait_list = NULL;
- RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
-
- while (cb) {
- t = cb;
- cb = cb->next;
- (t->callbackFunc) (t->callbackArg);
- rf_FreeCallbackDesc(t);
- }
-}
-/*****************************************************************************************
- *
- * debug routines
- *
- ****************************************************************************************/
-
-static void
-set_debug_option(name, val)
- char *name;
- long val;
-{
- RF_DebugName_t *p;
-
- for (p = rf_debugNames; p->name; p++) {
- if (!strcmp(p->name, name)) {
- *(p->ptr) = val;
- printf("[Set debug variable %s to %ld]\n", name, val);
- return;
- }
- }
- RF_ERRORMSG1("Unknown debug string \"%s\"\n", name);
-}
-
-
-/* would like to use sscanf here, but apparently not available in kernel */
-/*ARGSUSED*/
-static void
-rf_ConfigureDebug(cfgPtr)
- RF_Config_t *cfgPtr;
-{
- char *val_p, *name_p, *white_p;
- long val;
- int i;
-
- rf_ResetDebugOptions();
- for (i = 0; cfgPtr->debugVars[i][0] && i < RF_MAXDBGV; i++) {
- name_p = rf_find_non_white(&cfgPtr->debugVars[i][0]);
- white_p = rf_find_white(name_p); /* skip to start of 2nd
- * word */
- val_p = rf_find_non_white(white_p);
- if (*val_p == '0' && *(val_p + 1) == 'x')
- val = rf_htoi(val_p + 2);
- else
- val = rf_atoi(val_p);
- *white_p = '\0';
- set_debug_option(name_p, val);
- }
-}
-/* performance monitoring stuff */
-
-#define TIMEVAL_TO_US(t) (((long) t.tv_sec) * 1000000L + (long) t.tv_usec)
-
-#if !defined(_KERNEL) && !defined(SIMULATE)
-
-/*
- * Throughput stats currently only used in user-level RAIDframe
- */
-
-static int
-rf_InitThroughputStats(
- RF_ShutdownList_t ** listp,
- RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr)
-{
- int rc;
-
- /* these used by user-level raidframe only */
- rc = rf_create_managed_mutex(listp, &raidPtr->throughputstats.mutex);
- if (rc) {
- RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- return (rc);
- }
- raidPtr->throughputstats.sum_io_us = 0;
- raidPtr->throughputstats.num_ios = 0;
- raidPtr->throughputstats.num_out_ios = 0;
- return (0);
-}
-
-void
-rf_StartThroughputStats(RF_Raid_t * raidPtr)
-{
- RF_LOCK_MUTEX(raidPtr->throughputstats.mutex);
- raidPtr->throughputstats.num_ios++;
- raidPtr->throughputstats.num_out_ios++;
- if (raidPtr->throughputstats.num_out_ios == 1)
- RF_GETTIME(raidPtr->throughputstats.start);
- RF_UNLOCK_MUTEX(raidPtr->throughputstats.mutex);
-}
-
-static void
-rf_StopThroughputStats(RF_Raid_t * raidPtr)
-{
- struct timeval diff;
-
- RF_LOCK_MUTEX(raidPtr->throughputstats.mutex);
- raidPtr->throughputstats.num_out_ios--;
- if (raidPtr->throughputstats.num_out_ios == 0) {
- RF_GETTIME(raidPtr->throughputstats.stop);
- RF_TIMEVAL_DIFF(&raidPtr->throughputstats.start, &raidPtr->throughputstats.stop, &diff);
- raidPtr->throughputstats.sum_io_us += TIMEVAL_TO_US(diff);
- }
- RF_UNLOCK_MUTEX(raidPtr->throughputstats.mutex);
-}
-
-static void
-rf_PrintThroughputStats(RF_Raid_t * raidPtr)
-{
- RF_ASSERT(raidPtr->throughputstats.num_out_ios == 0);
- if (raidPtr->throughputstats.sum_io_us != 0) {
- printf("[Througphut: %8.2f IOs/second]\n", raidPtr->throughputstats.num_ios
- / (raidPtr->throughputstats.sum_io_us / 1000000.0));
- }
-}
-#endif /* !KERNEL && !SIMULATE */
-
-void
-rf_StartUserStats(RF_Raid_t * raidPtr)
-{
- RF_GETTIME(raidPtr->userstats.start);
- raidPtr->userstats.sum_io_us = 0;
- raidPtr->userstats.num_ios = 0;
- raidPtr->userstats.num_sect_moved = 0;
-}
-
-void
-rf_StopUserStats(RF_Raid_t * raidPtr)
-{
- RF_GETTIME(raidPtr->userstats.stop);
-}
-
-void
-rf_UpdateUserStats(raidPtr, rt, numsect)
- RF_Raid_t *raidPtr;
- int rt; /* resp time in us */
- int numsect; /* number of sectors for this access */
-{
- raidPtr->userstats.sum_io_us += rt;
- raidPtr->userstats.num_ios++;
- raidPtr->userstats.num_sect_moved += numsect;
-}
-
-void
-rf_PrintUserStats(RF_Raid_t * raidPtr)
-{
- long elapsed_us, mbs, mbs_frac;
- struct timeval diff;
-
- RF_TIMEVAL_DIFF(&raidPtr->userstats.start, &raidPtr->userstats.stop, &diff);
- elapsed_us = TIMEVAL_TO_US(diff);
-
- /* 2000 sectors per megabyte, 10000000 microseconds per second */
- if (elapsed_us)
- mbs = (raidPtr->userstats.num_sect_moved / 2000) / (elapsed_us / 1000000);
- else
- mbs = 0;
-
- /* this computes only the first digit of the fractional mb/s moved */
- if (elapsed_us) {
- mbs_frac = ((raidPtr->userstats.num_sect_moved / 200) / (elapsed_us / 1000000))
- - (mbs * 10);
- } else {
- mbs_frac = 0;
- }
-
- printf("Number of I/Os: %ld\n", raidPtr->userstats.num_ios);
- printf("Elapsed time (us): %ld\n", elapsed_us);
- printf("User I/Os per second: %ld\n", RF_DB0_CHECK(raidPtr->userstats.num_ios, (elapsed_us / 1000000)));
- printf("Average user response time: %ld us\n", RF_DB0_CHECK(raidPtr->userstats.sum_io_us, raidPtr->userstats.num_ios));
- printf("Total sectors moved: %ld\n", raidPtr->userstats.num_sect_moved);
- printf("Average access size (sect): %ld\n", RF_DB0_CHECK(raidPtr->userstats.num_sect_moved, raidPtr->userstats.num_ios));
- printf("Achieved data rate: %ld.%ld MB/sec\n", mbs, mbs_frac);
-}
-
-
-void
-rf_print_panic_message(line,file)
- int line;
- char *file;
-{
- sprintf(rf_panicbuf,"raidframe error at line %d file %s",
- line, file);
-}
-
-void
-rf_print_assert_panic_message(line,file,condition)
- int line;
- char *file;
- char *condition;
-{
- sprintf(rf_panicbuf,
- "raidframe error at line %d file %s (failed asserting %s)\n",
- line, file, condition);
-}
diff --git a/sys/dev/raidframe/rf_driver.h b/sys/dev/raidframe/rf_driver.h
deleted file mode 100644
index 8b156c5..0000000
--- a/sys/dev/raidframe/rf_driver.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_driver.h,v 1.4 2000/02/13 04:53:57 oster Exp $ */
-/*
- * rf_driver.h
- */
-/*
- * Copyright (c) 1996 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Jim Zelenka
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#ifndef _RF__RF_DRIVER_H_
-#define _RF__RF_DRIVER_H_
-
-#include <dev/raidframe/rf_threadstuff.h>
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_bsd.h>
-
-#if _KERNEL
-RF_DECLARE_EXTERN_MUTEX(rf_printf_mutex)
-int rf_BootRaidframe(void);
-int rf_UnbootRaidframe(void);
-int rf_Shutdown(RF_Raid_t * raidPtr);
-int rf_Configure(RF_Raid_t * raidPtr, RF_Config_t * cfgPtr,
- RF_AutoConfig_t *ac);
-RF_RaidAccessDesc_t *rf_AllocRaidAccDesc(RF_Raid_t * raidPtr, RF_IoType_t type,
- RF_RaidAddr_t raidAddress,
- RF_SectorCount_t numBlocks,
- caddr_t bufPtr,
- void *bp, RF_DagHeader_t ** paramDAG,
- RF_AccessStripeMapHeader_t ** paramASM,
- RF_RaidAccessFlags_t flags,
- void (*cbF) (RF_Buf_t),
- void *cbA,
- RF_AccessState_t * states);
-void rf_FreeRaidAccDesc(RF_RaidAccessDesc_t * desc);
-int rf_DoAccess(RF_Raid_t * raidPtr, RF_IoType_t type, int async_flag,
- RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks,
- caddr_t bufPtr, void *bp_in, RF_DagHeader_t ** paramDAG,
- RF_AccessStripeMapHeader_t ** paramASM,
- RF_RaidAccessFlags_t flags,
- RF_RaidAccessDesc_t ** paramDesc,
- void (*cbF) (RF_Buf_t), void *cbA);
-int rf_SetReconfiguredMode(RF_Raid_t * raidPtr, RF_RowCol_t row,
- RF_RowCol_t col);
-int rf_FailDisk(RF_Raid_t * raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol,
- int initRecon);
-void rf_SignalQuiescenceLock(RF_Raid_t * raidPtr,
- RF_RaidReconDesc_t * reconDesc);
-int rf_SuspendNewRequestsAndWait(RF_Raid_t * raidPtr);
-void rf_ResumeNewRequests(RF_Raid_t * raidPtr);
-void rf_StartThroughputStats(RF_Raid_t * raidPtr);
-void rf_StartUserStats(RF_Raid_t * raidPtr);
-void rf_StopUserStats(RF_Raid_t * raidPtr);
-void rf_UpdateUserStats(RF_Raid_t * raidPtr, int rt, int numsect);
-void rf_PrintUserStats(RF_Raid_t * raidPtr);
-#endif /* _KERNEL */
-#endif /* !_RF__RF_DRIVER_H_ */
diff --git a/sys/dev/raidframe/rf_engine.c b/sys/dev/raidframe/rf_engine.c
deleted file mode 100644
index d49ec20..0000000
--- a/sys/dev/raidframe/rf_engine.c
+++ /dev/null
@@ -1,812 +0,0 @@
-/* $NetBSD: rf_engine.c,v 1.10 2000/08/20 16:51:03 thorpej Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: William V. Courtright II, Mark Holland, Rachad Youssef
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/****************************************************************************
- * *
- * engine.c -- code for DAG execution engine *
- * *
- * Modified to work as follows (holland): *
- * A user-thread calls into DispatchDAG, which fires off the nodes that *
- * are direct successors to the header node. DispatchDAG then returns, *
- * and the rest of the I/O continues asynchronously. As each node *
- * completes, the node execution function calls FinishNode(). FinishNode *
- * scans the list of successors to the node and increments the antecedent *
- * counts. Each node that becomes enabled is placed on a central node *
- * queue. A dedicated dag-execution thread grabs nodes off of this *
- * queue and fires them. *
- * *
- * NULL nodes are never fired. *
- * *
- * Terminator nodes are never fired, but rather cause the callback *
- * associated with the DAG to be invoked. *
- * *
- * If a node fails, the dag either rolls forward to the completion or *
- * rolls back, undoing previously-completed nodes and fails atomically. *
- * The direction of recovery is determined by the location of the failed *
- * node in the graph. If the failure occured before the commit node in *
- * the graph, backward recovery is used. Otherwise, forward recovery is *
- * used. *
- * *
- ****************************************************************************/
-
-#include <dev/raidframe/rf_threadstuff.h>
-
-#include <sys/errno.h>
-
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_engine.h>
-#include <dev/raidframe/rf_etimer.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_dagutils.h>
-#include <dev/raidframe/rf_shutdown.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_kintf.h>
-
-static void DAGExecutionThread(RF_ThreadArg_t arg);
-
-#define DO_INIT(_l_,_r_) { \
- int _rc; \
- _rc = rf_create_managed_mutex(_l_,&(_r_)->node_queue_mutex); \
- if (_rc) { \
- return(_rc); \
- } \
- _rc = rf_create_managed_cond(_l_,&(_r_)->node_queue_cond); \
- if (_rc) { \
- return(_rc); \
- } \
-}
-
-/* synchronization primitives for this file. DO_WAIT should be enclosed in a while loop. */
-
-/*
- * XXX Is this spl-ing really necessary?
- */
-#define DO_LOCK(_r_) \
-do { \
- ks = splbio(); \
- RF_LOCK_MUTEX((_r_)->node_queue_mutex); \
-} while (0)
-
-#define DO_UNLOCK(_r_) \
-do { \
- RF_UNLOCK_MUTEX((_r_)->node_queue_mutex); \
- splx(ks); \
-} while (0)
-
-#define DO_WAIT(_r_) \
- RF_WAIT_COND((_r_)->node_queue, (_r_)->node_queue_mutex)
-
-#define DO_SIGNAL(_r_) \
- RF_BROADCAST_COND((_r_)->node_queue) /* XXX RF_SIGNAL_COND? */
-
-static void rf_ShutdownEngine(void *);
-
-static void
-rf_ShutdownEngine(arg)
- void *arg;
-{
- RF_Raid_t *raidPtr;
-
- raidPtr = (RF_Raid_t *) arg;
- raidPtr->shutdown_engine = 1;
- DO_SIGNAL(raidPtr);
-}
-
-int
-rf_ConfigureEngine(
- RF_ShutdownList_t ** listp,
- RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr)
-{
- int rc;
-
- DO_INIT(listp, raidPtr);
-
- raidPtr->node_queue = NULL;
- raidPtr->dags_in_flight = 0;
-
- rc = rf_init_managed_threadgroup(listp, &raidPtr->engine_tg);
- if (rc)
- return (rc);
-
- /* we create the execution thread only once per system boot. no need
- * to check return code b/c the kernel panics if it can't create the
- * thread. */
- if (rf_engineDebug) {
- printf("raid%d: Creating engine thread\n", raidPtr->raidid);
- }
- if (RF_CREATE_THREAD(raidPtr->engine_thread, DAGExecutionThread, raidPtr,"raid")) {
- RF_ERRORMSG("RAIDFRAME: Unable to create engine thread\n");
- return (ENOMEM);
- }
- if (rf_engineDebug) {
- printf("raid%d: Created engine thread\n", raidPtr->raidid);
- }
- RF_THREADGROUP_STARTED(&raidPtr->engine_tg);
- /* XXX something is missing here... */
-#ifdef debug
- printf("Skipping the WAIT_START!!\n");
-#endif
-#if 1
- printf("Waiting for DAG engine to start\n");
- RF_THREADGROUP_WAIT_START(&raidPtr->engine_tg);
-#endif
- /* engine thread is now running and waiting for work */
- if (rf_engineDebug) {
- printf("raid%d: Engine thread running and waiting for events\n", raidPtr->raidid);
- }
- rc = rf_ShutdownCreate(listp, rf_ShutdownEngine, raidPtr);
- if (rc) {
- RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- rf_ShutdownEngine(NULL);
- }
- return (rc);
-}
-
-static int
-BranchDone(RF_DagNode_t * node)
-{
- int i;
-
- /* return true if forward execution is completed for a node and it's
- * succedents */
- switch (node->status) {
- case rf_wait:
- /* should never be called in this state */
- RF_PANIC();
- break;
- case rf_fired:
- /* node is currently executing, so we're not done */
- return (RF_FALSE);
- case rf_good:
- for (i = 0; i < node->numSuccedents; i++) /* for each succedent */
- if (!BranchDone(node->succedents[i])) /* recursively check
- * branch */
- return RF_FALSE;
- return RF_TRUE; /* node and all succedent branches aren't in
- * fired state */
- break;
- case rf_bad:
- /* succedents can't fire */
- return (RF_TRUE);
- case rf_recover:
- /* should never be called in this state */
- RF_PANIC();
- break;
- case rf_undone:
- case rf_panic:
- /* XXX need to fix this case */
- /* for now, assume that we're done */
- return (RF_TRUE);
- break;
- default:
- /* illegal node status */
- RF_PANIC();
- break;
- }
-}
-
-static int
-NodeReady(RF_DagNode_t * node)
-{
- int ready;
-
- switch (node->dagHdr->status) {
- case rf_enable:
- case rf_rollForward:
- if ((node->status == rf_wait) && (node->numAntecedents == node->numAntDone))
- ready = RF_TRUE;
- else
- ready = RF_FALSE;
- break;
- case rf_rollBackward:
- RF_ASSERT(node->numSuccDone <= node->numSuccedents);
- RF_ASSERT(node->numSuccFired <= node->numSuccedents);
- RF_ASSERT(node->numSuccFired <= node->numSuccDone);
- if ((node->status == rf_good) && (node->numSuccDone == node->numSuccedents))
- ready = RF_TRUE;
- else
- ready = RF_FALSE;
- break;
- default:
- printf("Execution engine found illegal DAG status in NodeReady\n");
- RF_PANIC();
- break;
- }
-
- return (ready);
-}
-
-
-
-/* user context and dag-exec-thread context:
- * Fire a node. The node's status field determines which function, do or undo,
- * to be fired.
- * This routine assumes that the node's status field has alread been set to
- * "fired" or "recover" to indicate the direction of execution.
- */
-static void
-FireNode(RF_DagNode_t * node)
-{
- switch (node->status) {
- case rf_fired:
- /* fire the do function of a node */
- if (rf_engineDebug) {
- printf("raid%d: Firing node 0x%lx (%s)\n",
- node->dagHdr->raidPtr->raidid,
- (unsigned long) node, node->name);
- }
- if (node->flags & RF_DAGNODE_FLAG_YIELD) {
-#if defined(__NetBSD__) || defined(__FreeBSD__) && defined(_KERNEL)
- /* thread_block(); */
- /* printf("Need to block the thread here...\n"); */
- /* XXX thread_block is actually mentioned in
- * /usr/include/vm/vm_extern.h */
-#else
- thread_block();
-#endif
- }
- (*(node->doFunc)) (node);
- break;
- case rf_recover:
- /* fire the undo function of a node */
- if (rf_engineDebug) {
- printf("raid%d: Firing (undo) node 0x%lx (%s)\n",
- node->dagHdr->raidPtr->raidid,
- (unsigned long) node, node->name);
- }
- if (node->flags & RF_DAGNODE_FLAG_YIELD)
-#if defined(__NetBSD__) || defined(__FreeBSD__) && defined(_KERNEL)
- /* thread_block(); */
- /* printf("Need to block the thread here...\n"); */
- /* XXX thread_block is actually mentioned in
- * /usr/include/vm/vm_extern.h */
-#else
- thread_block();
-#endif
- (*(node->undoFunc)) (node);
- break;
- default:
- RF_PANIC();
- break;
- }
-}
-
-
-
-/* user context:
- * Attempt to fire each node in a linear array.
- * The entire list is fired atomically.
- */
-static void
-FireNodeArray(
- int numNodes,
- RF_DagNode_t ** nodeList)
-{
- RF_DagStatus_t dstat;
- RF_DagNode_t *node;
- int i, j;
-
- /* first, mark all nodes which are ready to be fired */
- for (i = 0; i < numNodes; i++) {
- node = nodeList[i];
- dstat = node->dagHdr->status;
- RF_ASSERT((node->status == rf_wait) || (node->status == rf_good));
- if (NodeReady(node)) {
- if ((dstat == rf_enable) || (dstat == rf_rollForward)) {
- RF_ASSERT(node->status == rf_wait);
- if (node->commitNode)
- node->dagHdr->numCommits++;
- node->status = rf_fired;
- for (j = 0; j < node->numAntecedents; j++)
- node->antecedents[j]->numSuccFired++;
- } else {
- RF_ASSERT(dstat == rf_rollBackward);
- RF_ASSERT(node->status == rf_good);
- RF_ASSERT(node->commitNode == RF_FALSE); /* only one commit node
- * per graph */
- node->status = rf_recover;
- }
- }
- }
- /* now, fire the nodes */
- for (i = 0; i < numNodes; i++) {
- if ((nodeList[i]->status == rf_fired) || (nodeList[i]->status == rf_recover))
- FireNode(nodeList[i]);
- }
-}
-
-
-/* user context:
- * Attempt to fire each node in a linked list.
- * The entire list is fired atomically.
- */
-static void
-FireNodeList(RF_DagNode_t * nodeList)
-{
- RF_DagNode_t *node, *next;
- RF_DagStatus_t dstat;
- int j;
-
- if (nodeList) {
- /* first, mark all nodes which are ready to be fired */
- for (node = nodeList; node; node = next) {
- next = node->next;
- dstat = node->dagHdr->status;
- RF_ASSERT((node->status == rf_wait) || (node->status == rf_good));
- if (NodeReady(node)) {
- if ((dstat == rf_enable) || (dstat == rf_rollForward)) {
- RF_ASSERT(node->status == rf_wait);
- if (node->commitNode)
- node->dagHdr->numCommits++;
- node->status = rf_fired;
- for (j = 0; j < node->numAntecedents; j++)
- node->antecedents[j]->numSuccFired++;
- } else {
- RF_ASSERT(dstat == rf_rollBackward);
- RF_ASSERT(node->status == rf_good);
- RF_ASSERT(node->commitNode == RF_FALSE); /* only one commit node
- * per graph */
- node->status = rf_recover;
- }
- }
- }
- /* now, fire the nodes */
- for (node = nodeList; node; node = next) {
- next = node->next;
- if ((node->status == rf_fired) || (node->status == rf_recover))
- FireNode(node);
- }
- }
-}
-/* interrupt context:
- * for each succedent
- * propagate required results from node to succedent
- * increment succedent's numAntDone
- * place newly-enable nodes on node queue for firing
- *
- * To save context switches, we don't place NIL nodes on the node queue,
- * but rather just process them as if they had fired. Note that NIL nodes
- * that are the direct successors of the header will actually get fired by
- * DispatchDAG, which is fine because no context switches are involved.
- *
- * Important: when running at user level, this can be called by any
- * disk thread, and so the increment and check of the antecedent count
- * must be locked. I used the node queue mutex and locked down the
- * entire function, but this is certainly overkill.
- */
-static void
-PropagateResults(
- RF_DagNode_t * node,
- int context)
-{
- RF_DagNode_t *s, *a;
- RF_Raid_t *raidPtr;
- int i, ks;
- RF_DagNode_t *finishlist = NULL; /* a list of NIL nodes to be
- * finished */
- RF_DagNode_t *skiplist = NULL; /* list of nodes with failed truedata
- * antecedents */
- RF_DagNode_t *firelist = NULL; /* a list of nodes to be fired */
- RF_DagNode_t *q = NULL, *qh = NULL, *next;
- int j, skipNode;
-
- raidPtr = node->dagHdr->raidPtr;
-
- DO_LOCK(raidPtr);
-
- /* debug - validate fire counts */
- for (i = 0; i < node->numAntecedents; i++) {
- a = *(node->antecedents + i);
- RF_ASSERT(a->numSuccFired >= a->numSuccDone);
- RF_ASSERT(a->numSuccFired <= a->numSuccedents);
- a->numSuccDone++;
- }
-
- switch (node->dagHdr->status) {
- case rf_enable:
- case rf_rollForward:
- for (i = 0; i < node->numSuccedents; i++) {
- s = *(node->succedents + i);
- RF_ASSERT(s->status == rf_wait);
- (s->numAntDone)++;
- if (s->numAntDone == s->numAntecedents) {
- /* look for NIL nodes */
- if (s->doFunc == rf_NullNodeFunc) {
- /* don't fire NIL nodes, just process
- * them */
- s->next = finishlist;
- finishlist = s;
- } else {
- /* look to see if the node is to be
- * skipped */
- skipNode = RF_FALSE;
- for (j = 0; j < s->numAntecedents; j++)
- if ((s->antType[j] == rf_trueData) && (s->antecedents[j]->status == rf_bad))
- skipNode = RF_TRUE;
- if (skipNode) {
- /* this node has one or more
- * failed true data
- * dependencies, so skip it */
- s->next = skiplist;
- skiplist = s;
- } else
- /* add s to list of nodes (q)
- * to execute */
- if (context != RF_INTR_CONTEXT) {
- /* we only have to
- * enqueue if we're at
- * intr context */
- s->next = firelist; /* put node on a list to
- * be fired after we
- * unlock */
- firelist = s;
- } else { /* enqueue the node for
- * the dag exec thread
- * to fire */
- RF_ASSERT(NodeReady(s));
- if (q) {
- q->next = s;
- q = s;
- } else {
- qh = q = s;
- qh->next = NULL;
- }
- }
- }
- }
- }
-
- if (q) {
- /* xfer our local list of nodes to the node queue */
- q->next = raidPtr->node_queue;
- raidPtr->node_queue = qh;
- DO_SIGNAL(raidPtr);
- }
- DO_UNLOCK(raidPtr);
-
- for (; skiplist; skiplist = next) {
- next = skiplist->next;
- skiplist->status = rf_skipped;
- for (i = 0; i < skiplist->numAntecedents; i++) {
- skiplist->antecedents[i]->numSuccFired++;
- }
- if (skiplist->commitNode) {
- skiplist->dagHdr->numCommits++;
- }
- rf_FinishNode(skiplist, context);
- }
- for (; finishlist; finishlist = next) {
- /* NIL nodes: no need to fire them */
- next = finishlist->next;
- finishlist->status = rf_good;
- for (i = 0; i < finishlist->numAntecedents; i++) {
- finishlist->antecedents[i]->numSuccFired++;
- }
- if (finishlist->commitNode)
- finishlist->dagHdr->numCommits++;
- /*
- * Okay, here we're calling rf_FinishNode() on nodes that
- * have the null function as their work proc. Such a node
- * could be the terminal node in a DAG. If so, it will
- * cause the DAG to complete, which will in turn free
- * memory used by the DAG, which includes the node in
- * question. Thus, we must avoid referencing the node
- * at all after calling rf_FinishNode() on it.
- */
- rf_FinishNode(finishlist, context); /* recursive call */
- }
- /* fire all nodes in firelist */
- FireNodeList(firelist);
- break;
-
- case rf_rollBackward:
- for (i = 0; i < node->numAntecedents; i++) {
- a = *(node->antecedents + i);
- RF_ASSERT(a->status == rf_good);
- RF_ASSERT(a->numSuccDone <= a->numSuccedents);
- RF_ASSERT(a->numSuccDone <= a->numSuccFired);
-
- if (a->numSuccDone == a->numSuccFired) {
- if (a->undoFunc == rf_NullNodeFunc) {
- /* don't fire NIL nodes, just process
- * them */
- a->next = finishlist;
- finishlist = a;
- } else {
- if (context != RF_INTR_CONTEXT) {
- /* we only have to enqueue if
- * we're at intr context */
- a->next = firelist; /* put node on a list to
- * be fired after we
- * unlock */
- firelist = a;
- } else { /* enqueue the node for
- * the dag exec thread
- * to fire */
- RF_ASSERT(NodeReady(a));
- if (q) {
- q->next = a;
- q = a;
- } else {
- qh = q = a;
- qh->next = NULL;
- }
- }
- }
- }
- }
- if (q) {
- /* xfer our local list of nodes to the node queue */
- q->next = raidPtr->node_queue;
- raidPtr->node_queue = qh;
- DO_SIGNAL(raidPtr);
- }
- DO_UNLOCK(raidPtr);
- for (; finishlist; finishlist = next) { /* NIL nodes: no need to
- * fire them */
- next = finishlist->next;
- finishlist->status = rf_good;
- /*
- * Okay, here we're calling rf_FinishNode() on nodes that
- * have the null function as their work proc. Such a node
- * could be the first node in a DAG. If so, it will
- * cause the DAG to complete, which will in turn free
- * memory used by the DAG, which includes the node in
- * question. Thus, we must avoid referencing the node
- * at all after calling rf_FinishNode() on it.
- */
- rf_FinishNode(finishlist, context); /* recursive call */
- }
- /* fire all nodes in firelist */
- FireNodeList(firelist);
-
- break;
- default:
- printf("Engine found illegal DAG status in PropagateResults()\n");
- RF_PANIC();
- break;
- }
-}
-
-
-
-/*
- * Process a fired node which has completed
- */
-static void
-ProcessNode(
- RF_DagNode_t * node,
- int context)
-{
- RF_Raid_t *raidPtr;
-
- raidPtr = node->dagHdr->raidPtr;
-
- switch (node->status) {
- case rf_good:
- /* normal case, don't need to do anything */
- break;
- case rf_bad:
- if ((node->dagHdr->numCommits > 0) || (node->dagHdr->numCommitNodes == 0)) {
- node->dagHdr->status = rf_rollForward; /* crossed commit
- * barrier */
- if (rf_engineDebug || 1) {
- printf("raid%d: node (%s) returned fail, rolling forward\n", raidPtr->raidid, node->name);
- }
- } else {
- node->dagHdr->status = rf_rollBackward; /* never reached commit
- * barrier */
- if (rf_engineDebug || 1) {
- printf("raid%d: node (%s) returned fail, rolling backward\n", raidPtr->raidid, node->name);
- }
- }
- break;
- case rf_undone:
- /* normal rollBackward case, don't need to do anything */
- break;
- case rf_panic:
- /* an undo node failed!!! */
- printf("UNDO of a node failed!!!/n");
- break;
- default:
- printf("node finished execution with an illegal status!!!\n");
- RF_PANIC();
- break;
- }
-
- /* enqueue node's succedents (antecedents if rollBackward) for
- * execution */
- PropagateResults(node, context);
-}
-
-
-
-/* user context or dag-exec-thread context:
- * This is the first step in post-processing a newly-completed node.
- * This routine is called by each node execution function to mark the node
- * as complete and fire off any successors that have been enabled.
- */
-int
-rf_FinishNode(
- RF_DagNode_t * node,
- int context)
-{
- /* as far as I can tell, retcode is not used -wvcii */
- int retcode = RF_FALSE;
- node->dagHdr->numNodesCompleted++;
- ProcessNode(node, context);
-
- return (retcode);
-}
-
-
-/* user context:
- * submit dag for execution, return non-zero if we have to wait for completion.
- * if and only if we return non-zero, we'll cause cbFunc to get invoked with
- * cbArg when the DAG has completed.
- *
- * for now we always return 1. If the DAG does not cause any I/O, then the callback
- * may get invoked before DispatchDAG returns. There's code in state 5 of ContinueRaidAccess
- * to handle this.
- *
- * All we do here is fire the direct successors of the header node. The
- * DAG execution thread does the rest of the dag processing.
- */
-int
-rf_DispatchDAG(
- RF_DagHeader_t * dag,
- void (*cbFunc) (void *),
- void *cbArg)
-{
- RF_Raid_t *raidPtr;
-
- raidPtr = dag->raidPtr;
- if (dag->tracerec) {
- RF_ETIMER_START(dag->tracerec->timer);
- }
- if (rf_engineDebug || rf_validateDAGDebug) {
- if (rf_ValidateDAG(dag))
- RF_PANIC();
- }
- if (rf_engineDebug) {
- printf("raid%d: Entering DispatchDAG\n", raidPtr->raidid);
- }
- raidPtr->dags_in_flight++; /* debug only: blow off proper
- * locking */
- dag->cbFunc = cbFunc;
- dag->cbArg = cbArg;
- dag->numNodesCompleted = 0;
- dag->status = rf_enable;
- FireNodeArray(dag->numSuccedents, dag->succedents);
- return (1);
-}
-/* dedicated kernel thread:
- * the thread that handles all DAG node firing.
- * To minimize locking and unlocking, we grab a copy of the entire node queue and then set the
- * node queue to NULL before doing any firing of nodes. This way we only have to release the
- * lock once. Of course, it's probably rare that there's more than one node in the queue at
- * any one time, but it sometimes happens.
- *
- * In the kernel, this thread runs at spl0 and is not swappable. I copied these
- * characteristics from the aio_completion_thread.
- */
-
-static void
-DAGExecutionThread(RF_ThreadArg_t arg)
-{
- RF_DagNode_t *nd, *local_nq, *term_nq, *fire_nq;
- RF_Raid_t *raidPtr;
- int ks;
-
- raidPtr = (RF_Raid_t *) arg;
-
- if (rf_engineDebug) {
- printf("raid%d: Engine thread is running\n", raidPtr->raidid);
- }
-
- mtx_lock(&Giant);
-
- RF_THREADGROUP_RUNNING(&raidPtr->engine_tg);
-
- DO_LOCK(raidPtr);
- while (!raidPtr->shutdown_engine) {
-
- while (raidPtr->node_queue != NULL) {
- local_nq = raidPtr->node_queue;
- fire_nq = NULL;
- term_nq = NULL;
- raidPtr->node_queue = NULL;
- DO_UNLOCK(raidPtr);
-
- /* first, strip out the terminal nodes */
- while (local_nq) {
- nd = local_nq;
- local_nq = local_nq->next;
- switch (nd->dagHdr->status) {
- case rf_enable:
- case rf_rollForward:
- if (nd->numSuccedents == 0) {
- /* end of the dag, add to
- * callback list */
- nd->next = term_nq;
- term_nq = nd;
- } else {
- /* not the end, add to the
- * fire queue */
- nd->next = fire_nq;
- fire_nq = nd;
- }
- break;
- case rf_rollBackward:
- if (nd->numAntecedents == 0) {
- /* end of the dag, add to the
- * callback list */
- nd->next = term_nq;
- term_nq = nd;
- } else {
- /* not the end, add to the
- * fire queue */
- nd->next = fire_nq;
- fire_nq = nd;
- }
- break;
- default:
- RF_PANIC();
- break;
- }
- }
-
- /* execute callback of dags which have reached the
- * terminal node */
- while (term_nq) {
- nd = term_nq;
- term_nq = term_nq->next;
- nd->next = NULL;
- (nd->dagHdr->cbFunc) (nd->dagHdr->cbArg);
- raidPtr->dags_in_flight--; /* debug only */
- }
-
- /* fire remaining nodes */
- FireNodeList(fire_nq);
-
- DO_LOCK(raidPtr);
- }
- while (!raidPtr->shutdown_engine && raidPtr->node_queue == NULL)
- DO_WAIT(raidPtr);
- }
- DO_UNLOCK(raidPtr);
-
- RF_THREADGROUP_DONE(&raidPtr->engine_tg);
-
- RF_THREAD_EXIT(0);
-}
diff --git a/sys/dev/raidframe/rf_engine.h b/sys/dev/raidframe/rf_engine.h
deleted file mode 100644
index c758c05..0000000
--- a/sys/dev/raidframe/rf_engine.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_engine.h,v 1.3 1999/02/05 00:06:11 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: William V. Courtright II, Mark Holland, Jim Zelenka
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/**********************************************************
- * *
- * engine.h -- header file for execution engine functions *
- * *
- **********************************************************/
-
-#ifndef _RF__RF_ENGINE_H_
-#define _RF__RF_ENGINE_H_
-
-int
-rf_ConfigureEngine(RF_ShutdownList_t ** listp,
- RF_Raid_t * raidPtr, RF_Config_t * cfgPtr);
-
-int rf_FinishNode(RF_DagNode_t * node, int context); /* return finished node
- * to engine */
-
-int rf_DispatchDAG(RF_DagHeader_t * dag, void (*cbFunc) (void *), void *cbArg); /* execute dag */
-
-#endif /* !_RF__RF_ENGINE_H_ */
diff --git a/sys/dev/raidframe/rf_etimer.h b/sys/dev/raidframe/rf_etimer.h
deleted file mode 100644
index e66e01b..0000000
--- a/sys/dev/raidframe/rf_etimer.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_etimer.h,v 1.4 1999/08/13 03:26:55 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#ifndef _RF__RF_TIMER_H_
-#define _RF__RF_TIMER_H_
-
-#include <dev/raidframe/rf_options.h>
-#include <dev/raidframe/rf_utils.h>
-
-#include <sys/time.h>
-
-struct RF_Etimer_s {
- struct timeval st;
- struct timeval et;
- struct timeval diff;
-};
-
-#if defined(_KERNEL)
-#include <sys/kernel.h>
-
-#if defined(__NetBSD__)
-#define RF_ETIMER_START(_t_) \
- { \
- int s; \
- bzero(&(_t_), sizeof (_t_)); \
- s = splclock(); \
- (_t_).st = mono_time; \
- splx(s); \
- }
-#elif defined(__FreeBSD__)
-#define RF_ETIMER_START(_t_) \
- { \
- int s; \
- bzero(&(_t_), sizeof (_t_)); \
- s = splclock(); \
- getmicrouptime(&(_t_).st); \
- splx(s); \
- }
-#endif
-
-#if defined(__NetBSD__)
-#define RF_ETIMER_STOP(_t_) \
- { \
- int s; \
- s = splclock(); \
- (_t_).et = mono_time; \
- splx(s); \
- }
-#elif defined(__FreeBSD__)
-#define RF_ETIMER_STOP(_t_) \
- { \
- int s; \
- s = splclock(); \
- getmicrouptime(&(_t_).et); \
- splx(s); \
- }
-#endif
-
-#define RF_ETIMER_EVAL(_t_) \
- { \
- RF_TIMEVAL_DIFF(&(_t_).st, &(_t_).et, &(_t_).diff) \
- }
-
-#define RF_ETIMER_VAL_US(_t_) (RF_TIMEVAL_TO_US((_t_).diff))
-#define RF_ETIMER_VAL_MS(_t_) (RF_TIMEVAL_TO_US((_t_).diff)/1000)
-
-#endif /* _KERNEL */
-
-#endif /* !_RF__RF_TIMER_H_ */
diff --git a/sys/dev/raidframe/rf_evenodd.c b/sys/dev/raidframe/rf_evenodd.c
deleted file mode 100644
index 334ba0b..0000000
--- a/sys/dev/raidframe/rf_evenodd.c
+++ /dev/null
@@ -1,559 +0,0 @@
-/* $NetBSD: rf_evenodd.c,v 1.4 2000/01/07 03:40:59 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Chang-Ming Wu
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*****************************************************************************************
- *
- * rf_evenodd.c -- implements EVENODD array architecture
- *
- ****************************************************************************************/
-
-#include <dev/raidframe/rf_archs.h>
-
-#if RF_INCLUDE_EVENODD > 0
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_dagffrd.h>
-#include <dev/raidframe/rf_dagffwr.h>
-#include <dev/raidframe/rf_dagdegrd.h>
-#include <dev/raidframe/rf_dagdegwr.h>
-#include <dev/raidframe/rf_dagutils.h>
-#include <dev/raidframe/rf_dagfuncs.h>
-#include <dev/raidframe/rf_etimer.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_evenodd.h>
-#include <dev/raidframe/rf_configure.h>
-#include <dev/raidframe/rf_parityscan.h>
-#include <dev/raidframe/rf_utils.h>
-#include <dev/raidframe/rf_map.h>
-#include <dev/raidframe/rf_pq.h>
-#include <dev/raidframe/rf_mcpair.h>
-#include <dev/raidframe/rf_evenodd.h>
-#include <dev/raidframe/rf_evenodd_dagfuncs.h>
-#include <dev/raidframe/rf_evenodd_dags.h>
-#include <dev/raidframe/rf_engine.h>
-#include <dev/raidframe/rf_kintf.h>
-
-typedef struct RF_EvenOddConfigInfo_s {
- RF_RowCol_t **stripeIdentifier; /* filled in at config time & used by
- * IdentifyStripe */
-} RF_EvenOddConfigInfo_t;
-
-int
-rf_ConfigureEvenOdd(listp, raidPtr, cfgPtr)
- RF_ShutdownList_t **listp;
- RF_Raid_t *raidPtr;
- RF_Config_t *cfgPtr;
-{
- RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
- RF_EvenOddConfigInfo_t *info;
- RF_RowCol_t i, j, startdisk;
-
- RF_MallocAndAdd(info, sizeof(RF_EvenOddConfigInfo_t), (RF_EvenOddConfigInfo_t *), raidPtr->cleanupList);
- layoutPtr->layoutSpecificInfo = (void *) info;
-
- RF_ASSERT(raidPtr->numRow == 1);
-
- info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, raidPtr->numCol, raidPtr->cleanupList);
- startdisk = 0;
- for (i = 0; i < raidPtr->numCol; i++) {
- for (j = 0; j < raidPtr->numCol; j++) {
- info->stripeIdentifier[i][j] = (startdisk + j) % raidPtr->numCol;
- }
- if ((startdisk -= 2) < 0)
- startdisk += raidPtr->numCol;
- }
-
- /* fill in the remaining layout parameters */
- layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk;
- layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
- layoutPtr->numDataCol = raidPtr->numCol - 2; /* ORIG:
- * layoutPtr->numDataCol
- * = raidPtr->numCol-1; */
-#if RF_EO_MATRIX_DIM > 17
- if (raidPtr->numCol <= 17) {
- printf("Number of stripe units in a parity stripe is smaller than 17. Please\n");
- printf("define the macro RF_EO_MATRIX_DIM in file rf_evenodd_dagfuncs.h to \n");
- printf("be 17 to increase performance. \n");
- return (EINVAL);
- }
-#elif RF_EO_MATRIX_DIM == 17
- if (raidPtr->numCol > 17) {
- printf("Number of stripe units in a parity stripe is bigger than 17. Please\n");
- printf("define the macro RF_EO_MATRIX_DIM in file rf_evenodd_dagfuncs.h to \n");
- printf("be 257 for encoding and decoding functions to work. \n");
- return (EINVAL);
- }
-#endif
- layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
- layoutPtr->numParityCol = 2;
- layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk;
- raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
-
- raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
-
- return (0);
-}
-
-int
-rf_GetDefaultNumFloatingReconBuffersEvenOdd(RF_Raid_t * raidPtr)
-{
- return (20);
-}
-
-RF_HeadSepLimit_t
-rf_GetDefaultHeadSepLimitEvenOdd(RF_Raid_t * raidPtr)
-{
- return (10);
-}
-
-void
-rf_IdentifyStripeEvenOdd(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t addr,
- RF_RowCol_t ** diskids,
- RF_RowCol_t * outRow)
-{
- RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr);
- RF_EvenOddConfigInfo_t *info = (RF_EvenOddConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
-
- *outRow = 0;
- *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol];
-}
-/* The layout of stripe unit on the disks are: c0 c1 c2 c3 c4
-
- 0 1 2 E P
- 5 E P 3 4
- P 6 7 8 E
- 10 11 E P 9
- E P 12 13 14
- ....
-
- We use the MapSectorRAID5 to map data information because the routine can be shown to map exactly
- the layout of data stripe unit as shown above although we have 2 redundant information now.
- But for E and P, we use rf_MapEEvenOdd and rf_MapParityEvenOdd which are different method from raid-5.
-*/
-
-
-void
-rf_MapParityEvenOdd(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t raidSector,
- RF_RowCol_t * row,
- RF_RowCol_t * col,
- RF_SectorNum_t * diskSector,
- int remap)
-{
- RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
- RF_StripeNum_t endSUIDofthisStrip = (SUID / raidPtr->Layout.numDataCol + 1) * raidPtr->Layout.numDataCol - 1;
-
- *row = 0;
- *col = (endSUIDofthisStrip + 2) % raidPtr->numCol;
- *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit +
- (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
-}
-
-void
-rf_MapEEvenOdd(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t raidSector,
- RF_RowCol_t * row,
- RF_RowCol_t * col,
- RF_SectorNum_t * diskSector,
- int remap)
-{
- RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
- RF_StripeNum_t endSUIDofthisStrip = (SUID / raidPtr->Layout.numDataCol + 1) * raidPtr->Layout.numDataCol - 1;
-
- *row = 0;
- *col = (endSUIDofthisStrip + 1) % raidPtr->numCol;
- *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit +
- (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
-}
-
-void
-rf_EODagSelect(
- RF_Raid_t * raidPtr,
- RF_IoType_t type,
- RF_AccessStripeMap_t * asmap,
- RF_VoidFuncPtr * createFunc)
-{
- RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
- unsigned ndfail = asmap->numDataFailed;
- unsigned npfail = asmap->numParityFailed + asmap->numQFailed;
- unsigned ntfail = npfail + ndfail;
-
- RF_ASSERT(RF_IO_IS_R_OR_W(type));
- if (ntfail > 2) {
- RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n");
- /* *infoFunc = */ *createFunc = NULL;
- return;
- }
- /* ok, we can do this I/O */
- if (type == RF_IO_TYPE_READ) {
- switch (ndfail) {
- case 0:
- /* fault free read */
- *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; /* same as raid 5 */
- break;
- case 1:
- /* lost a single data unit */
- /* two cases: (1) parity is not lost. do a normal raid
- * 5 reconstruct read. (2) parity is lost. do a
- * reconstruct read using "e". */
- if (ntfail == 2) { /* also lost redundancy */
- if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY)
- *createFunc = (RF_VoidFuncPtr) rf_EO_110_CreateReadDAG;
- else
- *createFunc = (RF_VoidFuncPtr) rf_EO_101_CreateReadDAG;
- } else {
- /* P and E are ok. But is there a failure in
- * some unaccessed data unit? */
- if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
- *createFunc = (RF_VoidFuncPtr) rf_EO_200_CreateReadDAG;
- else
- *createFunc = (RF_VoidFuncPtr) rf_EO_100_CreateReadDAG;
- }
- break;
- case 2:
- /* *createFunc = rf_EO_200_CreateReadDAG; */
- *createFunc = NULL;
- break;
- }
- return;
- }
- /* a write */
- switch (ntfail) {
- case 0: /* fault free */
- if (rf_suppressLocksAndLargeWrites ||
- (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) ||
- (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) {
-
- *createFunc = (RF_VoidFuncPtr) rf_EOCreateSmallWriteDAG;
- } else {
- *createFunc = (RF_VoidFuncPtr) rf_EOCreateLargeWriteDAG;
- }
- break;
-
- case 1: /* single disk fault */
- if (npfail == 1) {
- RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q));
- if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) { /* q died, treat like
- * normal mode raid5
- * write. */
- if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
- || (asmap->parityInfo->next != NULL) || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
- *createFunc = (RF_VoidFuncPtr) rf_EO_001_CreateSmallWriteDAG;
- else
- *createFunc = (RF_VoidFuncPtr) rf_EO_001_CreateLargeWriteDAG;
- } else {/* parity died, small write only updating Q */
- if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
- || (asmap->qInfo->next != NULL) || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
- *createFunc = (RF_VoidFuncPtr) rf_EO_010_CreateSmallWriteDAG;
- else
- *createFunc = (RF_VoidFuncPtr) rf_EO_010_CreateLargeWriteDAG;
- }
- } else { /* data missing. Do a P reconstruct write if
- * only a single data unit is lost in the
- * stripe, otherwise a reconstruct write which
- * employnig both P and E units. */
- if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2) {
- if (asmap->numStripeUnitsAccessed == 1)
- *createFunc = (RF_VoidFuncPtr) rf_EO_200_CreateWriteDAG;
- else
- *createFunc = NULL; /* No direct support for
- * this case now, like
- * that in Raid-5 */
- } else {
- if (asmap->numStripeUnitsAccessed != 1 && asmap->failedPDAs[0]->numSector != layoutPtr->sectorsPerStripeUnit)
- *createFunc = NULL; /* No direct support for
- * this case now, like
- * that in Raid-5 */
- else
- *createFunc = (RF_VoidFuncPtr) rf_EO_100_CreateWriteDAG;
- }
- }
- break;
-
- case 2: /* two disk faults */
- switch (npfail) {
- case 2: /* both p and q dead */
- *createFunc = (RF_VoidFuncPtr) rf_EO_011_CreateWriteDAG;
- break;
- case 1: /* either p or q and dead data */
- RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA);
- RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q));
- if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q) {
- if (asmap->numStripeUnitsAccessed != 1 && asmap->failedPDAs[0]->numSector != layoutPtr->sectorsPerStripeUnit)
- *createFunc = NULL; /* In both PQ and
- * EvenOdd, no direct
- * support for this case
- * now, like that in
- * Raid-5 */
- else
- *createFunc = (RF_VoidFuncPtr) rf_EO_101_CreateWriteDAG;
- } else {
- if (asmap->numStripeUnitsAccessed != 1 && asmap->failedPDAs[0]->numSector != layoutPtr->sectorsPerStripeUnit)
- *createFunc = NULL; /* No direct support for
- * this case, like that
- * in Raid-5 */
- else
- *createFunc = (RF_VoidFuncPtr) rf_EO_110_CreateWriteDAG;
- }
- break;
- case 0: /* double data loss */
- /* if(asmap->failedPDAs[0]->numSector +
- * asmap->failedPDAs[1]->numSector == 2 *
- * layoutPtr->sectorsPerStripeUnit ) createFunc =
- * rf_EOCreateLargeWriteDAG; else */
- *createFunc = NULL; /* currently, in Evenodd, No
- * support for simultaneous
- * access of both failed SUs */
- break;
- }
- break;
-
- default: /* more than 2 disk faults */
- *createFunc = NULL;
- RF_PANIC();
- }
- return;
-}
-
-
-int
-rf_VerifyParityEvenOdd(raidPtr, raidAddr, parityPDA, correct_it, flags)
- RF_Raid_t *raidPtr;
- RF_RaidAddr_t raidAddr;
- RF_PhysDiskAddr_t *parityPDA;
- int correct_it;
- RF_RaidAccessFlags_t flags;
-{
- RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
- RF_RaidAddr_t startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr);
- RF_SectorCount_t numsector = parityPDA->numSector;
- int numbytes = rf_RaidAddressToByte(raidPtr, numsector);
- int bytesPerStripe = numbytes * layoutPtr->numDataCol;
- RF_DagHeader_t *rd_dag_h, *wr_dag_h; /* read, write dag */
- RF_DagNode_t *blockNode, *unblockNode, *wrBlock, *wrUnblock;
- RF_AccessStripeMapHeader_t *asm_h;
- RF_AccessStripeMap_t *asmap;
- RF_AllocListElem_t *alloclist;
- RF_PhysDiskAddr_t *pda;
- char *pbuf, *buf, *end_p, *p;
- char *redundantbuf2;
- int redundantTwoErr = 0, redundantOneErr = 0;
- int parity_cant_correct = RF_FALSE, red2_cant_correct = RF_FALSE,
- parity_corrected = RF_FALSE, red2_corrected = RF_FALSE;
- int i, retcode;
- RF_ReconUnitNum_t which_ru;
- RF_StripeNum_t psID = rf_RaidAddressToParityStripeID(layoutPtr, raidAddr, &which_ru);
- int stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol;
- RF_AccTraceEntry_t tracerec;
- RF_MCPair_t *mcpair;
-
- retcode = RF_PARITY_OKAY;
-
- mcpair = rf_AllocMCPair();
- rf_MakeAllocList(alloclist);
- RF_MallocAndAdd(buf, numbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol), (char *), alloclist);
- RF_CallocAndAdd(pbuf, 1, numbytes, (char *), alloclist); /* use calloc to make
- * sure buffer is zeroed */
- end_p = buf + bytesPerStripe;
- RF_CallocAndAdd(redundantbuf2, 1, numbytes, (char *), alloclist); /* use calloc to make
- * sure buffer is zeroed */
-
- rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, numbytes, buf, rf_DiskReadFunc, rf_DiskReadUndoFunc,
- "Rod", alloclist, flags, RF_IO_NORMAL_PRIORITY);
- blockNode = rd_dag_h->succedents[0];
- unblockNode = blockNode->succedents[0]->succedents[0];
-
- /* map the stripe and fill in the PDAs in the dag */
- asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, buf, RF_DONT_REMAP);
- asmap = asm_h->stripeMap;
-
- for (pda = asmap->physInfo, i = 0; i < layoutPtr->numDataCol; i++, pda = pda->next) {
- RF_ASSERT(pda);
- rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1);
- RF_ASSERT(pda->numSector != 0);
- if (rf_TryToRedirectPDA(raidPtr, pda, 0))
- goto out; /* no way to verify parity if disk is
- * dead. return w/ good status */
- blockNode->succedents[i]->params[0].p = pda;
- blockNode->succedents[i]->params[2].v = psID;
- blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- }
-
- RF_ASSERT(!asmap->parityInfo->next);
- rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->parityInfo, 0, 1);
- RF_ASSERT(asmap->parityInfo->numSector != 0);
- if (rf_TryToRedirectPDA(raidPtr, asmap->parityInfo, 1))
- goto out;
- blockNode->succedents[layoutPtr->numDataCol]->params[0].p = asmap->parityInfo;
-
- RF_ASSERT(!asmap->qInfo->next);
- rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->qInfo, 0, 1);
- RF_ASSERT(asmap->qInfo->numSector != 0);
- if (rf_TryToRedirectPDA(raidPtr, asmap->qInfo, 1))
- goto out;
- /* if disk is dead, b/c no reconstruction is implemented right now,
- * the function "rf_TryToRedirectPDA" always return one, which cause
- * go to out and return w/ good status */
- blockNode->succedents[layoutPtr->numDataCol + 1]->params[0].p = asmap->qInfo;
-
- /* fire off the DAG */
- bzero((char *) &tracerec, sizeof(tracerec));
- rd_dag_h->tracerec = &tracerec;
-
- if (rf_verifyParityDebug) {
- printf("Parity verify read dag:\n");
- rf_PrintDAGList(rd_dag_h);
- }
- RF_LOCK_MUTEX(mcpair->mutex);
- mcpair->flag = 0;
- rf_DispatchDAG(rd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
- (void *) mcpair);
- while (!mcpair->flag)
- RF_WAIT_COND(mcpair->cond, mcpair->mutex);
- RF_UNLOCK_MUTEX(mcpair->mutex);
- if (rd_dag_h->status != rf_enable) {
- RF_ERRORMSG("Unable to verify parity: can't read the stripe\n");
- retcode = RF_PARITY_COULD_NOT_VERIFY;
- goto out;
- }
- for (p = buf, i = 0; p < end_p; p += numbytes, i++) {
- rf_e_encToBuf(raidPtr, i, p, RF_EO_MATRIX_DIM - 2, redundantbuf2, numsector);
- /* the corresponding columes in EvenOdd encoding Matrix for
- * these p pointers which point to the databuffer in a full
- * stripe are sequentially from 0 to layoutPtr->numDataCol-1 */
- rf_bxor(p, pbuf, numbytes, NULL);
- }
- RF_ASSERT(i == layoutPtr->numDataCol);
-
- for (i = 0; i < numbytes; i++) {
- if (pbuf[i] != buf[bytesPerStripe + i]) {
- if (!correct_it) {
- RF_ERRORMSG3("Parity verify error: byte %d of parity is 0x%x should be 0x%x\n",
- i, (u_char) buf[bytesPerStripe + i], (u_char) pbuf[i]);
- }
- }
- redundantOneErr = 1;
- break;
- }
-
- for (i = 0; i < numbytes; i++) {
- if (redundantbuf2[i] != buf[bytesPerStripe + numbytes + i]) {
- if (!correct_it) {
- RF_ERRORMSG3("Parity verify error: byte %d of second redundant information is 0x%x should be 0x%x\n",
- i, (u_char) buf[bytesPerStripe + numbytes + i], (u_char) redundantbuf2[i]);
- }
- redundantTwoErr = 1;
- break;
- }
- }
- if (redundantOneErr || redundantTwoErr)
- retcode = RF_PARITY_BAD;
-
- /* correct the first redundant disk, ie parity if it is error */
- if (redundantOneErr && correct_it) {
- wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, pbuf, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
- "Wnp", alloclist, flags, RF_IO_NORMAL_PRIORITY);
- wrBlock = wr_dag_h->succedents[0];
- wrUnblock = wrBlock->succedents[0]->succedents[0];
- wrBlock->succedents[0]->params[0].p = asmap->parityInfo;
- wrBlock->succedents[0]->params[2].v = psID;
- wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- bzero((char *) &tracerec, sizeof(tracerec));
- wr_dag_h->tracerec = &tracerec;
- if (rf_verifyParityDebug) {
- printf("Parity verify write dag:\n");
- rf_PrintDAGList(wr_dag_h);
- }
- RF_LOCK_MUTEX(mcpair->mutex);
- mcpair->flag = 0;
- rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
- (void *) mcpair);
- while (!mcpair->flag)
- RF_WAIT_COND(mcpair->cond, mcpair->mutex);
- RF_UNLOCK_MUTEX(mcpair->mutex);
- if (wr_dag_h->status != rf_enable) {
- RF_ERRORMSG("Unable to correct parity in VerifyParity: can't write the stripe\n");
- parity_cant_correct = RF_TRUE;
- } else {
- parity_corrected = RF_TRUE;
- }
- rf_FreeDAG(wr_dag_h);
- }
- if (redundantTwoErr && correct_it) {
- wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, redundantbuf2, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
- "Wnred2", alloclist, flags, RF_IO_NORMAL_PRIORITY);
- wrBlock = wr_dag_h->succedents[0];
- wrUnblock = wrBlock->succedents[0]->succedents[0];
- wrBlock->succedents[0]->params[0].p = asmap->qInfo;
- wrBlock->succedents[0]->params[2].v = psID;
- wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- bzero((char *) &tracerec, sizeof(tracerec));
- wr_dag_h->tracerec = &tracerec;
- if (rf_verifyParityDebug) {
- printf("Dag of write new second redundant information in parity verify :\n");
- rf_PrintDAGList(wr_dag_h);
- }
- RF_LOCK_MUTEX(mcpair->mutex);
- mcpair->flag = 0;
- rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
- (void *) mcpair);
- while (!mcpair->flag)
- RF_WAIT_COND(mcpair->cond, mcpair->mutex);
- RF_UNLOCK_MUTEX(mcpair->mutex);
- if (wr_dag_h->status != rf_enable) {
- RF_ERRORMSG("Unable to correct second redundant information in VerifyParity: can't write the stripe\n");
- red2_cant_correct = RF_TRUE;
- } else {
- red2_corrected = RF_TRUE;
- }
- rf_FreeDAG(wr_dag_h);
- }
- if ((redundantOneErr && parity_cant_correct) ||
- (redundantTwoErr && red2_cant_correct))
- retcode = RF_PARITY_COULD_NOT_CORRECT;
- if ((retcode = RF_PARITY_BAD) && parity_corrected && red2_corrected)
- retcode = RF_PARITY_CORRECTED;
-
-
-out:
- rf_FreeAccessStripeMap(asm_h);
- rf_FreeAllocList(alloclist);
- rf_FreeDAG(rd_dag_h);
- rf_FreeMCPair(mcpair);
- return (retcode);
-}
-#endif /* RF_INCLUDE_EVENODD > 0 */
diff --git a/sys/dev/raidframe/rf_evenodd.h b/sys/dev/raidframe/rf_evenodd.h
deleted file mode 100644
index 4babdec..0000000
--- a/sys/dev/raidframe/rf_evenodd.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_evenodd.h,v 1.2 1999/02/05 00:06:11 oster Exp $ */
-/*
- * Copyright (c) 1995, 1996 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Chang-Ming Wu
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#ifndef _RF__RF_EVENODD_H_
-#define _RF__RF_EVENODD_H_
-
-/* extern declerations of the failure mode functions. */
-int
-rf_ConfigureEvenOdd(RF_ShutdownList_t ** shutdownListp, RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr);
-int rf_GetDefaultNumFloatingReconBuffersEvenOdd(RF_Raid_t * raidPtr);
-RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitEvenOdd(RF_Raid_t * raidPtr);
-void
-rf_IdentifyStripeEvenOdd(RF_Raid_t * raidPtr, RF_RaidAddr_t addr,
- RF_RowCol_t ** diskids, RF_RowCol_t * outrow);
-void
-rf_MapParityEvenOdd(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
- RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap);
-void
-rf_MapEEvenOdd(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
- RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap);
-void
-rf_EODagSelect(RF_Raid_t * raidPtr, RF_IoType_t type,
- RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc);
-int
-rf_VerifyParityEvenOdd(RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr,
- RF_PhysDiskAddr_t * parityPDA, int correct_it, RF_RaidAccessFlags_t flags);
-
-#endif /* !_RF__RF_EVENODD_H_ */
diff --git a/sys/dev/raidframe/rf_evenodd_dagfuncs.c b/sys/dev/raidframe/rf_evenodd_dagfuncs.c
deleted file mode 100644
index 2e39a53..0000000
--- a/sys/dev/raidframe/rf_evenodd_dagfuncs.c
+++ /dev/null
@@ -1,977 +0,0 @@
-/* $NetBSD: rf_evenodd_dagfuncs.c,v 1.7 2001/01/26 03:50:53 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: ChangMing Wu
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*
- * Code for RAID-EVENODD architecture.
- */
-
-#include <dev/raidframe/rf_archs.h>
-
-#if RF_INCLUDE_EVENODD > 0
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_dagffrd.h>
-#include <dev/raidframe/rf_dagffwr.h>
-#include <dev/raidframe/rf_dagdegrd.h>
-#include <dev/raidframe/rf_dagdegwr.h>
-#include <dev/raidframe/rf_dagutils.h>
-#include <dev/raidframe/rf_dagfuncs.h>
-#include <dev/raidframe/rf_etimer.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_configure.h>
-#include <dev/raidframe/rf_parityscan.h>
-#include <dev/raidframe/rf_evenodd.h>
-#include <dev/raidframe/rf_evenodd_dagfuncs.h>
-
-/* These redundant functions are for small write */
-RF_RedFuncs_t rf_EOSmallWritePFuncs = {rf_RegularXorFunc, "Regular Old-New P", rf_SimpleXorFunc, "Simple Old-New P"};
-RF_RedFuncs_t rf_EOSmallWriteEFuncs = {rf_RegularONEFunc, "Regular Old-New E", rf_SimpleONEFunc, "Regular Old-New E"};
-/* These redundant functions are for degraded read */
-RF_RedFuncs_t rf_eoPRecoveryFuncs = {rf_RecoveryXorFunc, "Recovery Xr", rf_RecoveryXorFunc, "Recovery Xr"};
-RF_RedFuncs_t rf_eoERecoveryFuncs = {rf_RecoveryEFunc, "Recovery E Func", rf_RecoveryEFunc, "Recovery E Func"};
-/**********************************************************************************************
- * the following encoding node functions is used in EO_000_CreateLargeWriteDAG
- **********************************************************************************************/
-int
-rf_RegularPEFunc(node)
- RF_DagNode_t *node;
-{
- rf_RegularESubroutine(node, node->results[1]);
- rf_RegularXorFunc(node);/* does the wakeup here! */
-#if 1
- return (0); /* XXX This was missing... GO */
-#endif
-}
-
-
-/************************************************************************************************
- * For EO_001_CreateSmallWriteDAG, there are (i)RegularONEFunc() and (ii)SimpleONEFunc() to
- * be used. The previous case is when write access at least sectors of full stripe unit.
- * The later function is used when the write access two stripe units but with total sectors
- * less than sectors per SU. In this case, the access of parity and 'E' are shown as disconnected
- * areas in their stripe unit and parity write and 'E' write are both devided into two distinct
- * writes( totally four). This simple old-new write and regular old-new write happen as in RAID-5
- ************************************************************************************************/
-
-/* Algorithm:
- 1. Store the difference of old data and new data in the Rod buffer.
- 2. then encode this buffer into the buffer which already have old 'E' information inside it,
- the result can be shown to be the new 'E' information.
- 3. xor the Wnd buffer into the difference buffer to recover the original old data.
- Here we have another alternative: to allocate a temporary buffer for storing the difference of
- old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach
- take the same speed as the previous, and need more memory.
-*/
-int
-rf_RegularONEFunc(node)
- RF_DagNode_t *node;
-{
- RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
- RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
- int EpdaIndex = (node->numParams - 1) / 2 - 1; /* the parameter of node
- * where you can find
- * e-pda */
- int i, k, retcode = 0;
- int suoffset, length;
- RF_RowCol_t scol;
- char *srcbuf, *destbuf;
- RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
- RF_Etimer_t timer;
- RF_PhysDiskAddr_t *pda, *EPDA = (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p;
- int ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector); /* generally zero */
-
- RF_ASSERT(EPDA->type == RF_PDA_TYPE_Q);
- RF_ASSERT(ESUOffset == 0);
-
- RF_ETIMER_START(timer);
-
- /* Xor the Wnd buffer into Rod buffer, the difference of old data and
- * new data is stored in Rod buffer */
- for (k = 0; k < EpdaIndex; k += 2) {
- length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
- retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length, node->dagHdr->bp);
- }
- /* Start to encoding the buffer storing the difference of old data and
- * new data into 'E' buffer */
- for (i = 0; i < EpdaIndex; i += 2)
- if (node->params[i + 1].p != node->results[0]) { /* results[0] is buf ptr
- * of E */
- pda = (RF_PhysDiskAddr_t *) node->params[i].p;
- srcbuf = (char *) node->params[i + 1].p;
- scol = rf_EUCol(layoutPtr, pda->raidAddress);
- suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
- destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset);
- rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
- }
- /* Recover the original old data to be used by parity encoding
- * function in XorNode */
- for (k = 0; k < EpdaIndex; k += 2) {
- length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
- retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length, node->dagHdr->bp);
- }
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- tracerec->q_us += RF_ETIMER_VAL_US(timer);
- rf_GenericWakeupFunc(node, 0);
-#if 1
- return (0); /* XXX this was missing.. GO */
-#endif
-}
-
-int
-rf_SimpleONEFunc(node)
- RF_DagNode_t *node;
-{
- RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
- RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
- RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
- int retcode = 0;
- char *srcbuf, *destbuf;
- RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
- int length;
- RF_RowCol_t scol;
- RF_Etimer_t timer;
-
- RF_ASSERT(((RF_PhysDiskAddr_t *) node->params[2].p)->type == RF_PDA_TYPE_Q);
- if (node->dagHdr->status == rf_enable) {
- RF_ETIMER_START(timer);
- length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[4].p)->numSector); /* this is a pda of
- * writeDataNodes */
- /* bxor to buffer of readDataNodes */
- retcode = rf_bxor(node->params[5].p, node->params[1].p, length, node->dagHdr->bp);
- /* find out the corresponding colume in encoding matrix for
- * write colume to be encoded into redundant disk 'E' */
- scol = rf_EUCol(layoutPtr, pda->raidAddress);
- srcbuf = node->params[1].p;
- destbuf = node->params[3].p;
- /* Start encoding process */
- rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
- rf_bxor(node->params[5].p, node->params[1].p, length, node->dagHdr->bp);
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- tracerec->q_us += RF_ETIMER_VAL_US(timer);
-
- }
- return (rf_GenericWakeupFunc(node, retcode)); /* call wake func
- * explicitly since no
- * I/O in this node */
-}
-
-
-/****** called by rf_RegularPEFunc(node) and rf_RegularEFunc(node) in f.f. large write ********/
-void
-rf_RegularESubroutine(node, ebuf)
- RF_DagNode_t *node;
- char *ebuf;
-{
- RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
- RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
- RF_PhysDiskAddr_t *pda;
- int i, suoffset;
- RF_RowCol_t scol;
- char *srcbuf, *destbuf;
- RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
- RF_Etimer_t timer;
-
- RF_ETIMER_START(timer);
- for (i = 0; i < node->numParams - 2; i += 2) {
- RF_ASSERT(node->params[i + 1].p != ebuf);
- pda = (RF_PhysDiskAddr_t *) node->params[i].p;
- suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
- scol = rf_EUCol(layoutPtr, pda->raidAddress);
- srcbuf = (char *) node->params[i + 1].p;
- destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset);
- rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
- }
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- tracerec->xor_us += RF_ETIMER_VAL_US(timer);
-}
-
-
-/*******************************************************************************************
- * Used in EO_001_CreateLargeWriteDAG
- ******************************************************************************************/
-int
-rf_RegularEFunc(node)
- RF_DagNode_t *node;
-{
- rf_RegularESubroutine(node, node->results[0]);
- rf_GenericWakeupFunc(node, 0);
-#if 1
- return (0); /* XXX this was missing?.. GO */
-#endif
-}
-/*******************************************************************************************
- * This degraded function allow only two case:
- * 1. when write access the full failed stripe unit, then the access can be more than
- * one tripe units.
- * 2. when write access only part of the failed SU, we assume accesses of more than
- * one stripe unit is not allowed so that the write can be dealt with like a
- * large write.
- * The following function is based on these assumptions. So except in the second case,
- * it looks the same as a large write encodeing function. But this is not exactly the
- * normal way for doing a degraded write, since raidframe have to break cases of access
- * other than the above two into smaller accesses. We may have to change
- * DegrESubroutin in the future.
- *******************************************************************************************/
-void
-rf_DegrESubroutine(node, ebuf)
- RF_DagNode_t *node;
- char *ebuf;
-{
- RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
- RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
- RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
- RF_PhysDiskAddr_t *pda;
- int i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
- RF_RowCol_t scol;
- char *srcbuf, *destbuf;
- RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
- RF_Etimer_t timer;
-
- RF_ETIMER_START(timer);
- for (i = 0; i < node->numParams - 2; i += 2) {
- RF_ASSERT(node->params[i + 1].p != ebuf);
- pda = (RF_PhysDiskAddr_t *) node->params[i].p;
- suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
- scol = rf_EUCol(layoutPtr, pda->raidAddress);
- srcbuf = (char *) node->params[i + 1].p;
- destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
- rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
- }
-
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- tracerec->q_us += RF_ETIMER_VAL_US(timer);
-}
-
-
-/**************************************************************************************
- * This function is used in case where one data disk failed and both redundant disks
- * alive. It is used in the EO_100_CreateWriteDAG. Note: if there is another disk
- * failed in the stripe but not accessed at this time, then we should, instead, use
- * the rf_EOWriteDoubleRecoveryFunc().
- **************************************************************************************/
-int
-rf_Degraded_100_EOFunc(node)
- RF_DagNode_t *node;
-{
- rf_DegrESubroutine(node, node->results[1]);
- rf_RecoveryXorFunc(node); /* does the wakeup here! */
-#if 1
- return (0); /* XXX this was missing... SHould these be
- * void functions??? GO */
-#endif
-}
-/**************************************************************************************
- * This function is to encode one sector in one of the data disks to the E disk.
- * However, in evenodd this function can also be used as decoding function to recover
- * data from dead disk in the case of parity failure and a single data failure.
- **************************************************************************************/
-void
-rf_e_EncOneSect(
- RF_RowCol_t srcLogicCol,
- char *srcSecbuf,
- RF_RowCol_t destLogicCol,
- char *destSecbuf,
- int bytesPerSector)
-{
- int S_index; /* index of the EU in the src col which need
- * be Xored into all EUs in a dest sector */
- int numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
- RF_RowCol_t j, indexInDest, /* row index of an encoding unit in
- * the destination colume of encoding
- * matrix */
- indexInSrc; /* row index of an encoding unit in the source
- * colume used for recovery */
- int bytesPerEU = bytesPerSector / numRowInEncMatix;
-
-#if RF_EO_MATRIX_DIM > 17
- int shortsPerEU = bytesPerEU / sizeof(short);
- short *destShortBuf, *srcShortBuf1, *srcShortBuf2;
- short temp1;
-#elif RF_EO_MATRIX_DIM == 17
- int longsPerEU = bytesPerEU / sizeof(long);
- long *destLongBuf, *srcLongBuf1, *srcLongBuf2;
- long temp1;
-#endif
-
-#if RF_EO_MATRIX_DIM > 17
- RF_ASSERT(sizeof(short) == 2 || sizeof(short) == 1);
- RF_ASSERT(bytesPerEU % sizeof(short) == 0);
-#elif RF_EO_MATRIX_DIM == 17
- RF_ASSERT(sizeof(long) == 8 || sizeof(long) == 4);
- RF_ASSERT(bytesPerEU % sizeof(long) == 0);
-#endif
-
- S_index = rf_EO_Mod((RF_EO_MATRIX_DIM - 1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
-#if RF_EO_MATRIX_DIM > 17
- srcShortBuf1 = (short *) (srcSecbuf + S_index * bytesPerEU);
-#elif RF_EO_MATRIX_DIM == 17
- srcLongBuf1 = (long *) (srcSecbuf + S_index * bytesPerEU);
-#endif
-
- for (indexInDest = 0; indexInDest < numRowInEncMatix; indexInDest++) {
- indexInSrc = rf_EO_Mod((indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
-
-#if RF_EO_MATRIX_DIM > 17
- destShortBuf = (short *) (destSecbuf + indexInDest * bytesPerEU);
- srcShortBuf2 = (short *) (srcSecbuf + indexInSrc * bytesPerEU);
- for (j = 0; j < shortsPerEU; j++) {
- temp1 = destShortBuf[j] ^ srcShortBuf1[j];
- /* note: S_index won't be at the end row for any src
- * col! */
- if (indexInSrc != RF_EO_MATRIX_DIM - 1)
- destShortBuf[j] = (srcShortBuf2[j]) ^ temp1;
- /* if indexInSrc is at the end row, ie.
- * RF_EO_MATRIX_DIM -1, then all elements are zero! */
- else
- destShortBuf[j] = temp1;
- }
-
-#elif RF_EO_MATRIX_DIM == 17
- destLongBuf = (long *) (destSecbuf + indexInDest * bytesPerEU);
- srcLongBuf2 = (long *) (srcSecbuf + indexInSrc * bytesPerEU);
- for (j = 0; j < longsPerEU; j++) {
- temp1 = destLongBuf[j] ^ srcLongBuf1[j];
- if (indexInSrc != RF_EO_MATRIX_DIM - 1)
- destLongBuf[j] = (srcLongBuf2[j]) ^ temp1;
- else
- destLongBuf[j] = temp1;
- }
-#endif
- }
-}
-
-void
-rf_e_encToBuf(
- RF_Raid_t * raidPtr,
- RF_RowCol_t srcLogicCol,
- char *srcbuf,
- RF_RowCol_t destLogicCol,
- char *destbuf,
- int numSector)
-{
- int i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
-
- for (i = 0; i < numSector; i++) {
- rf_e_EncOneSect(srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector);
- srcbuf += bytesPerSector;
- destbuf += bytesPerSector;
- }
-}
-/**************************************************************************************
- * when parity die and one data die, We use second redundant information, 'E',
- * to recover the data in dead disk. This function is used in the recovery node of
- * for EO_110_CreateReadDAG
- **************************************************************************************/
-int
-rf_RecoveryEFunc(node)
- RF_DagNode_t *node;
-{
- RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
- RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
- RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
- RF_RowCol_t scol, /* source logical column */
- fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress); /* logical column of
- * failed SU */
- int i;
- RF_PhysDiskAddr_t *pda;
- int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
- char *srcbuf, *destbuf;
- RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
- RF_Etimer_t timer;
-
- bzero((char *) node->results[0], rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
- if (node->dagHdr->status == rf_enable) {
- RF_ETIMER_START(timer);
- for (i = 0; i < node->numParams - 2; i += 2)
- if (node->params[i + 1].p != node->results[0]) {
- pda = (RF_PhysDiskAddr_t *) node->params[i].p;
- if (i == node->numParams - 4)
- scol = RF_EO_MATRIX_DIM - 2; /* the colume of
- * redundant E */
- else
- scol = rf_EUCol(layoutPtr, pda->raidAddress);
- srcbuf = (char *) node->params[i + 1].p;
- suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
- destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
- rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector);
- }
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- tracerec->xor_us += RF_ETIMER_VAL_US(timer);
- }
- return (rf_GenericWakeupFunc(node, 0)); /* node execute successfully */
-}
-/**************************************************************************************
- * This function is used in the case where one data and the parity have filed.
- * (in EO_110_CreateWriteDAG )
- **************************************************************************************/
-int
-rf_EO_DegradedWriteEFunc(RF_DagNode_t * node)
-{
- rf_DegrESubroutine(node, node->results[0]);
- rf_GenericWakeupFunc(node, 0);
-#if 1
- return (0); /* XXX Yet another one!! GO */
-#endif
-}
-
-
-
-/**************************************************************************************
- * THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES
- **************************************************************************************/
-
-void
-rf_doubleEOdecode(
- RF_Raid_t * raidPtr,
- char **rrdbuf,
- char **dest,
- RF_RowCol_t * fcol,
- char *pbuf,
- char *ebuf)
-{
- RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
- int i, j, k, f1, f2, row;
- int rrdrow, erow, count = 0;
- int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
- int numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
-#if 0
- int pcol = (RF_EO_MATRIX_DIM) - 1;
-#endif
- int ecol = (RF_EO_MATRIX_DIM) - 2;
- int bytesPerEU = bytesPerSector / numRowInEncMatix;
- int numDataCol = layoutPtr->numDataCol;
-#if RF_EO_MATRIX_DIM > 17
- int shortsPerEU = bytesPerEU / sizeof(short);
- short *rrdbuf_current, *pbuf_current, *ebuf_current;
- short *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
- short *temp;
- short *P;
-
- RF_ASSERT(bytesPerEU % sizeof(short) == 0);
- RF_Malloc(P, bytesPerEU, (short *));
- RF_Malloc(temp, bytesPerEU, (short *));
-#elif RF_EO_MATRIX_DIM == 17
- int longsPerEU = bytesPerEU / sizeof(long);
- long *rrdbuf_current, *pbuf_current, *ebuf_current;
- long *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
- long *temp;
- long *P;
-
- RF_ASSERT(bytesPerEU % sizeof(long) == 0);
- RF_Malloc(P, bytesPerEU, (long *));
- RF_Malloc(temp, bytesPerEU, (long *));
-#endif
- RF_ASSERT(*((long *) dest[0]) == 0);
- RF_ASSERT(*((long *) dest[1]) == 0);
- bzero((char *) P, bytesPerEU);
- bzero((char *) temp, bytesPerEU);
- RF_ASSERT(*P == 0);
- /* calculate the 'P' parameter, which, not parity, is the Xor of all
- * elements in the last two column, ie. 'E' and 'parity' colume, see
- * the Ref. paper by Blaum, et al 1993 */
- for (i = 0; i < numRowInEncMatix; i++)
- for (k = 0; k < longsPerEU; k++) {
-#if RF_EO_MATRIX_DIM > 17
- ebuf_current = ((short *) ebuf) + i * shortsPerEU + k;
- pbuf_current = ((short *) pbuf) + i * shortsPerEU + k;
-#elif RF_EO_MATRIX_DIM == 17
- ebuf_current = ((long *) ebuf) + i * longsPerEU + k;
- pbuf_current = ((long *) pbuf) + i * longsPerEU + k;
-#endif
- P[k] ^= *ebuf_current;
- P[k] ^= *pbuf_current;
- }
- RF_ASSERT(fcol[0] != fcol[1]);
- if (fcol[0] < fcol[1]) {
-#if RF_EO_MATRIX_DIM > 17
- dest_smaller = (short *) (dest[0]);
- dest_larger = (short *) (dest[1]);
-#elif RF_EO_MATRIX_DIM == 17
- dest_smaller = (long *) (dest[0]);
- dest_larger = (long *) (dest[1]);
-#endif
- f1 = fcol[0];
- f2 = fcol[1];
- } else {
-#if RF_EO_MATRIX_DIM > 17
- dest_smaller = (short *) (dest[1]);
- dest_larger = (short *) (dest[0]);
-#elif RF_EO_MATRIX_DIM == 17
- dest_smaller = (long *) (dest[1]);
- dest_larger = (long *) (dest[0]);
-#endif
- f1 = fcol[1];
- f2 = fcol[0];
- }
- row = (RF_EO_MATRIX_DIM) - 1;
- while ((row = rf_EO_Mod((row + f1 - f2), RF_EO_MATRIX_DIM)) != ((RF_EO_MATRIX_DIM) - 1)) {
-#if RF_EO_MATRIX_DIM > 17
- dest_larger_current = dest_larger + row * shortsPerEU;
- dest_smaller_current = dest_smaller + row * shortsPerEU;
-#elif RF_EO_MATRIX_DIM == 17
- dest_larger_current = dest_larger + row * longsPerEU;
- dest_smaller_current = dest_smaller + row * longsPerEU;
-#endif
- /** Do the diagonal recovery. Initially, temp[k] = (failed 1),
- which is the failed data in the colume which has smaller col index. **/
- /* step 1: ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3)) */
- for (j = 0; j < numDataCol; j++) {
- if (j == f1 || j == f2)
- continue;
- rrdrow = rf_EO_Mod((row + f2 - j), RF_EO_MATRIX_DIM);
- if (rrdrow != (RF_EO_MATRIX_DIM) - 1) {
-#if RF_EO_MATRIX_DIM > 17
- rrdbuf_current = (short *) (rrdbuf[j]) + rrdrow * shortsPerEU;
- for (k = 0; k < shortsPerEU; k++)
- temp[k] ^= *(rrdbuf_current + k);
-#elif RF_EO_MATRIX_DIM == 17
- rrdbuf_current = (long *) (rrdbuf[j]) + rrdrow * longsPerEU;
- for (k = 0; k < longsPerEU; k++)
- temp[k] ^= *(rrdbuf_current + k);
-#endif
- }
- }
- /* step 2: ^E(erow,m-2), If erow is at the buttom row, don't
- * Xor into it E(erow,m-2) = (principle diagonal) ^ (failed
- * 1) ^ (failed 2) ^ ( SUM of nonfailed in-diagonal
- * A(rrdrow,0..m-3) ) After this step, temp[k] = (principle
- * diagonal) ^ (failed 2) */
-
- erow = rf_EO_Mod((row + f2 - ecol), (RF_EO_MATRIX_DIM));
- if (erow != (RF_EO_MATRIX_DIM) - 1) {
-#if RF_EO_MATRIX_DIM > 17
- ebuf_current = (short *) ebuf + shortsPerEU * erow;
- for (k = 0; k < shortsPerEU; k++)
- temp[k] ^= *(ebuf_current + k);
-#elif RF_EO_MATRIX_DIM == 17
- ebuf_current = (long *) ebuf + longsPerEU * erow;
- for (k = 0; k < longsPerEU; k++)
- temp[k] ^= *(ebuf_current + k);
-#endif
- }
- /* step 3: ^P to obtain the failed data (failed 2). P can be
- * proved to be actually (principle diagonal) After this
- * step, temp[k] = (failed 2), the failed data to be recovered */
-#if RF_EO_MATRIX_DIM > 17
- for (k = 0; k < shortsPerEU; k++)
- temp[k] ^= P[k];
- /* Put the data to the destination buffer */
- for (k = 0; k < shortsPerEU; k++)
- dest_larger_current[k] = temp[k];
-#elif RF_EO_MATRIX_DIM == 17
- for (k = 0; k < longsPerEU; k++)
- temp[k] ^= P[k];
- /* Put the data to the destination buffer */
- for (k = 0; k < longsPerEU; k++)
- dest_larger_current[k] = temp[k];
-#endif
-
- /** THE FOLLOWING DO THE HORIZONTAL XOR **/
- /* step 1: ^(SUM of A(row,0..m-3)), ie. all nonfailed data
- * columes */
- for (j = 0; j < numDataCol; j++) {
- if (j == f1 || j == f2)
- continue;
-#if RF_EO_MATRIX_DIM > 17
- rrdbuf_current = (short *) (rrdbuf[j]) + row * shortsPerEU;
- for (k = 0; k < shortsPerEU; k++)
- temp[k] ^= *(rrdbuf_current + k);
-#elif RF_EO_MATRIX_DIM == 17
- rrdbuf_current = (long *) (rrdbuf[j]) + row * longsPerEU;
- for (k = 0; k < longsPerEU; k++)
- temp[k] ^= *(rrdbuf_current + k);
-#endif
- }
- /* step 2: ^A(row,m-1) */
- /* step 3: Put the data to the destination buffer */
-#if RF_EO_MATRIX_DIM > 17
- pbuf_current = (short *) pbuf + shortsPerEU * row;
- for (k = 0; k < shortsPerEU; k++)
- temp[k] ^= *(pbuf_current + k);
- for (k = 0; k < shortsPerEU; k++)
- dest_smaller_current[k] = temp[k];
-#elif RF_EO_MATRIX_DIM == 17
- pbuf_current = (long *) pbuf + longsPerEU * row;
- for (k = 0; k < longsPerEU; k++)
- temp[k] ^= *(pbuf_current + k);
- for (k = 0; k < longsPerEU; k++)
- dest_smaller_current[k] = temp[k];
-#endif
- count++;
- }
- /* Check if all Encoding Unit in the data buffer have been decoded,
- * according EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime number,
- * this algorithm will covered all buffer */
- RF_ASSERT(count == numRowInEncMatix);
- RF_Free((char *) P, bytesPerEU);
- RF_Free((char *) temp, bytesPerEU);
-}
-
-
-/***************************************************************************************
-* This function is called by double degragded read
-* EO_200_CreateReadDAG
-*
-***************************************************************************************/
-int
-rf_EvenOddDoubleRecoveryFunc(node)
- RF_DagNode_t *node;
-{
- int ndataParam = 0;
- int np = node->numParams;
- RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
- RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
- RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
- int i, prm, sector, nresults = node->numResults;
- RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
- unsigned sosAddr;
- int two = 0, mallc_one = 0, mallc_two = 0; /* flags to indicate if
- * memory is allocated */
- int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
- RF_PhysDiskAddr_t *ppda, *ppda2, *epda, *epda2, *pda, *pda0, *pda1,
- npda;
- RF_RowCol_t fcol[2], fsuoff[2], fsuend[2], numDataCol = layoutPtr->numDataCol;
- char **buf, *ebuf, *pbuf, *dest[2];
- long *suoff = NULL, *suend = NULL, *prmToCol = NULL, psuoff, esuoff;
- RF_SectorNum_t startSector, endSector;
- RF_Etimer_t timer;
- RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
-
- RF_ETIMER_START(timer);
-
- /* Find out the number of parameters which are pdas for data
- * information */
- for (i = 0; i <= np; i++)
- if (((RF_PhysDiskAddr_t *) node->params[i].p)->type != RF_PDA_TYPE_DATA) {
- ndataParam = i;
- break;
- }
- RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
- if (ndataParam != 0) {
- RF_Malloc(suoff, ndataParam * sizeof(long), (long *));
- RF_Malloc(suend, ndataParam * sizeof(long), (long *));
- RF_Malloc(prmToCol, ndataParam * sizeof(long), (long *));
- }
- if (asmap->failedPDAs[1] &&
- (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) {
- RF_ASSERT(0); /* currently, no support for this situation */
- ppda = node->params[np - 6].p;
- ppda2 = node->params[np - 5].p;
- RF_ASSERT(ppda2->type == RF_PDA_TYPE_PARITY);
- epda = node->params[np - 4].p;
- epda2 = node->params[np - 3].p;
- RF_ASSERT(epda2->type == RF_PDA_TYPE_Q);
- two = 1;
- } else {
- ppda = node->params[np - 4].p;
- epda = node->params[np - 3].p;
- psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector);
- esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector);
- RF_ASSERT(psuoff == esuoff);
- }
- /*
- the followings have three goals:
- 1. determine the startSector to begin decoding and endSector to end decoding.
- 2. determine the colume numbers of the two failed disks.
- 3. determine the offset and end offset of the access within each failed stripe unit.
- */
- if (nresults == 1) {
- /* find the startSector to begin decoding */
- pda = node->results[0];
- bzero(pda->bufPtr, bytesPerSector * pda->numSector);
- fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
- fsuend[0] = fsuoff[0] + pda->numSector;
- startSector = fsuoff[0];
- endSector = fsuend[0];
-
- /* find out the column of failed disk being accessed */
- fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress);
-
- /* find out the other failed colume not accessed */
- sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
- for (i = 0; i < numDataCol; i++) {
- npda.raidAddress = sosAddr + (i * secPerSU);
- (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0);
- /* skip over dead disks */
- if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
- if (i != fcol[0])
- break;
- }
- RF_ASSERT(i < numDataCol);
- fcol[1] = i;
- } else {
- RF_ASSERT(nresults == 2);
- pda0 = node->results[0];
- bzero(pda0->bufPtr, bytesPerSector * pda0->numSector);
- pda1 = node->results[1];
- bzero(pda1->bufPtr, bytesPerSector * pda1->numSector);
- /* determine the failed colume numbers of the two failed
- * disks. */
- fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress);
- fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress);
- /* determine the offset and end offset of the access within
- * each failed stripe unit. */
- fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector);
- fsuend[0] = fsuoff[0] + pda0->numSector;
- fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector);
- fsuend[1] = fsuoff[1] + pda1->numSector;
- /* determine the startSector to begin decoding */
- startSector = RF_MIN(pda0->startSector, pda1->startSector);
- /* determine the endSector to end decoding */
- endSector = RF_MAX(fsuend[0], fsuend[1]);
- }
- /*
- assign the beginning sector and the end sector for each parameter
- find out the corresponding colume # for each parameter
- */
- for (prm = 0; prm < ndataParam; prm++) {
- pda = node->params[prm].p;
- suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
- suend[prm] = suoff[prm] + pda->numSector;
- prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress);
- }
- /* 'sector' is the sector for the current decoding algorithm. For each
- * sector in the failed SU, find out the corresponding parameters that
- * cover the current sector and that are needed for decoding of this
- * sector in failed SU. 2. Find out if sector is in the shadow of any
- * accessed failed SU. If not, malloc a temporary space of a sector in
- * size. */
- for (sector = startSector; sector < endSector; sector++) {
- if (nresults == 2)
- if (!(fsuoff[0] <= sector && sector < fsuend[0]) && !(fsuoff[1] <= sector && sector < fsuend[1]))
- continue;
- for (prm = 0; prm < ndataParam; prm++)
- if (suoff[prm] <= sector && sector < suend[prm])
- buf[(prmToCol[prm])] = ((RF_PhysDiskAddr_t *) node->params[prm].p)->bufPtr +
- rf_RaidAddressToByte(raidPtr, sector - suoff[prm]);
- /* find out if sector is in the shadow of any accessed failed
- * SU. If yes, assign dest[0], dest[1] to point at suitable
- * position of the buffer corresponding to failed SUs. if no,
- * malloc a temporary space of a sector in size for
- * destination of decoding. */
- RF_ASSERT(nresults == 1 || nresults == 2);
- if (nresults == 1) {
- dest[0] = ((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
- /* Always malloc temp buffer to dest[1] */
- RF_Malloc(dest[1], bytesPerSector, (char *));
- bzero(dest[1], bytesPerSector);
- mallc_two = 1;
- } else {
- if (fsuoff[0] <= sector && sector < fsuend[0])
- dest[0] = ((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
- else {
- RF_Malloc(dest[0], bytesPerSector, (char *));
- bzero(dest[0], bytesPerSector);
- mallc_one = 1;
- }
- if (fsuoff[1] <= sector && sector < fsuend[1])
- dest[1] = ((RF_PhysDiskAddr_t *) node->results[1])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[1]);
- else {
- RF_Malloc(dest[1], bytesPerSector, (char *));
- bzero(dest[1], bytesPerSector);
- mallc_two = 1;
- }
- RF_ASSERT(mallc_one == 0 || mallc_two == 0);
- }
- pbuf = ppda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - psuoff);
- ebuf = epda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - esuoff);
- /*
- * After finish finding all needed sectors, call doubleEOdecode function for decoding
- * one sector to destination.
- */
- rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
- /* free all allocated memory, and mark flag to indicate no
- * memory is being allocated */
- if (mallc_one == 1)
- RF_Free(dest[0], bytesPerSector);
- if (mallc_two == 1)
- RF_Free(dest[1], bytesPerSector);
- mallc_one = mallc_two = 0;
- }
- RF_Free(buf, numDataCol * sizeof(char *));
- if (ndataParam != 0) {
- RF_Free(suoff, ndataParam * sizeof(long));
- RF_Free(suend, ndataParam * sizeof(long));
- RF_Free(prmToCol, ndataParam * sizeof(long));
- }
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- if (tracerec) {
- tracerec->q_us += RF_ETIMER_VAL_US(timer);
- }
- rf_GenericWakeupFunc(node, 0);
-#if 1
- return (0); /* XXX is this even close!!?!?!!? GO */
-#endif
-}
-
-
-/* currently, only access of one of the two failed SU is allowed in this function.
- * also, asmap->numStripeUnitsAccessed is limited to be one, the RaidFrame will break large access into
- * many accesses of single stripe unit.
- */
-
-int
-rf_EOWriteDoubleRecoveryFunc(node)
- RF_DagNode_t *node;
-{
- int np = node->numParams;
- RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
- RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
- RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
- RF_SectorNum_t sector;
- RF_RowCol_t col, scol;
- int prm, i, j;
- RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
- unsigned sosAddr;
- unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
- RF_int64 numbytes;
- RF_SectorNum_t startSector, endSector;
- RF_PhysDiskAddr_t *ppda, *epda, *pda, *fpda, npda;
- RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol;
- char **buf; /* buf[0], buf[1], buf[2], ...etc. point to
- * buffer storing data read from col0, col1,
- * col2 */
- char *ebuf, *pbuf, *dest[2], *olddata[2];
- RF_Etimer_t timer;
- RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
-
- RF_ASSERT(asmap->numDataFailed == 1); /* currently only support this
- * case, the other failed SU
- * is not being accessed */
- RF_ETIMER_START(timer);
- RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
-
- ppda = node->results[0];/* Instead of being buffers, node->results[0]
- * and [1] are Ppda and Epda */
- epda = node->results[1];
- fpda = asmap->failedPDAs[0];
-
- /* First, recovery the failed old SU using EvenOdd double decoding */
- /* determine the startSector and endSector for decoding */
- startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector);
- endSector = startSector + fpda->numSector;
- /* Assign buf[col] pointers to point to each non-failed colume and
- * initialize the pbuf and ebuf to point at the beginning of each
- * source buffers and destination buffers */
- for (prm = 0; prm < numDataCol - 2; prm++) {
- pda = (RF_PhysDiskAddr_t *) node->params[prm].p;
- col = rf_EUCol(layoutPtr, pda->raidAddress);
- buf[col] = pda->bufPtr;
- }
- /* pbuf and ebuf: they will change values as double recovery decoding
- * goes on */
- pbuf = ppda->bufPtr;
- ebuf = epda->bufPtr;
- /* find out the logical colume numbers in the encoding matrix of the
- * two failed columes */
- fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress);
-
- /* find out the other failed colume not accessed this time */
- sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
- for (i = 0; i < numDataCol; i++) {
- npda.raidAddress = sosAddr + (i * secPerSU);
- (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0);
- /* skip over dead disks */
- if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
- if (i != fcol[0])
- break;
- }
- RF_ASSERT(i < numDataCol);
- fcol[1] = i;
- /* assign temporary space to put recovered failed SU */
- numbytes = fpda->numSector * bytesPerSector;
- RF_Malloc(olddata[0], numbytes, (char *));
- RF_Malloc(olddata[1], numbytes, (char *));
- dest[0] = olddata[0];
- dest[1] = olddata[1];
- bzero(olddata[0], numbytes);
- bzero(olddata[1], numbytes);
- /* Begin the recovery decoding, initially buf[j], ebuf, pbuf, dest[j]
- * have already pointed at the beginning of each source buffers and
- * destination buffers */
- for (sector = startSector, i = 0; sector < endSector; sector++, i++) {
- rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
- for (j = 0; j < numDataCol; j++)
- if ((j != fcol[0]) && (j != fcol[1]))
- buf[j] += bytesPerSector;
- dest[0] += bytesPerSector;
- dest[1] += bytesPerSector;
- ebuf += bytesPerSector;
- pbuf += bytesPerSector;
- }
- /* after recovery, the buffer pointed by olddata[0] is the old failed
- * data. With new writing data and this old data, use small write to
- * calculate the new redundant informations */
- /* node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of
- * Rrd; params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol
- * -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ; params[
- * PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol
- * +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1] are Pdas of
- * wudNodes; For current implementation, we assume the simplest case:
- * asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1
- * ie. PDAPerDisk = 1 then node->params[numDataCol] must be the new
- * data to be writen to the failed disk. We first bxor the new data
- * into the old recovered data, then do the same things as small
- * write. */
-
- rf_bxor(((RF_PhysDiskAddr_t *) node->params[numDataCol].p)->bufPtr, olddata[0], numbytes, node->dagHdr->bp);
- /* do new 'E' calculation */
- /* find out the corresponding colume in encoding matrix for write
- * colume to be encoded into redundant disk 'E' */
- scol = rf_EUCol(layoutPtr, fpda->raidAddress);
- /* olddata[0] now is source buffer pointer; epda->bufPtr is the dest
- * buffer pointer */
- rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2, epda->bufPtr, fpda->numSector);
-
- /* do new 'P' calculation */
- rf_bxor(olddata[0], ppda->bufPtr, numbytes, node->dagHdr->bp);
- /* Free the allocated buffer */
- RF_Free(olddata[0], numbytes);
- RF_Free(olddata[1], numbytes);
- RF_Free(buf, numDataCol * sizeof(char *));
-
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- if (tracerec) {
- tracerec->q_us += RF_ETIMER_VAL_US(timer);
- }
- rf_GenericWakeupFunc(node, 0);
- return (0);
-}
-#endif /* RF_INCLUDE_EVENODD > 0 */
diff --git a/sys/dev/raidframe/rf_evenodd_dagfuncs.h b/sys/dev/raidframe/rf_evenodd_dagfuncs.h
deleted file mode 100644
index cf5028b..0000000
--- a/sys/dev/raidframe/rf_evenodd_dagfuncs.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_evenodd_dagfuncs.h,v 1.2 1999/02/05 00:06:11 oster Exp $ */
-/*
- * rf_evenodd_dagfuncs.h
- */
-/*
- * Copyright (c) 1996 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Chang-Ming Wu
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#ifndef _RF__RF_EVENODD_DAGFUNCS_H_
-#define _RF__RF_EVENODD_DAGFUNCS_H_
-
-extern RF_RedFuncs_t rf_EOSmallWriteEFuncs;
-extern RF_RedFuncs_t rf_EOSmallWritePFuncs;
-extern RF_RedFuncs_t rf_eoERecoveryFuncs;
-extern RF_RedFuncs_t rf_eoPRecoveryFuncs;
-
-int rf_RegularPEFunc(RF_DagNode_t * node);
-int rf_RegularONEFunc(RF_DagNode_t * node);
-int rf_SimpleONEFunc(RF_DagNode_t * node);
-void rf_RegularESubroutine(RF_DagNode_t * node, char *ebuf);
-int rf_RegularEFunc(RF_DagNode_t * node);
-void rf_DegrESubroutine(RF_DagNode_t * node, char *ebuf);
-int rf_Degraded_100_EOFunc(RF_DagNode_t * node);
-void
-rf_e_EncOneSect(RF_RowCol_t srcLogicCol, char *srcSecbuf,
- RF_RowCol_t destLogicCol, char *destSecbuf, int bytesPerSector);
-void
-rf_e_encToBuf(RF_Raid_t * raidPtr, RF_RowCol_t srcLogicCol,
- char *srcbuf, RF_RowCol_t destLogicCol, char *destbuf, int numSector);
-int rf_RecoveryEFunc(RF_DagNode_t * node);
-int rf_EO_DegradedWriteEFunc(RF_DagNode_t * node);
-void
-rf_doubleEOdecode(RF_Raid_t * raidPtr, char **rrdbuf, char **dest,
- RF_RowCol_t * fcol, char *pbuf, char *ebuf);
-int rf_EvenOddDoubleRecoveryFunc(RF_DagNode_t * node);
-int rf_EOWriteDoubleRecoveryFunc(RF_DagNode_t * node);
-
-#define rf_EUCol(_layoutPtr_, _addr_ ) \
-( (_addr_)%( (_layoutPtr_)->dataSectorsPerStripe ) )/((_layoutPtr_)->sectorsPerStripeUnit)
-
-#define rf_EO_Mod( _int1_, _int2_ ) \
-( ((_int1_) < 0)? (((_int1_)+(_int2_))%(_int2_)) : (_int1_)%(_int2_) )
-
-#define rf_OffsetOfNextEUBoundary(_offset_, sec_per_eu) ((_offset_)/(sec_per_eu) + 1)*(sec_per_eu)
-
-#define RF_EO_MATRIX_DIM 17
-
-/*
- * RF_EO_MATRIX_DIM should be a prime number: and "bytesPerSector" should be
- * dividable by ( RF_EO_MATRIX_DIM - 1) to fully encode and utilize the space
- * in a sector, this number could also be 17. Tha later case doesn't apply
- * for disk array larger than 17 columns totally.
- */
-
-#endif /* !_RF__RF_EVENODD_DAGFUNCS_H_ */
diff --git a/sys/dev/raidframe/rf_evenodd_dags.c b/sys/dev/raidframe/rf_evenodd_dags.c
deleted file mode 100644
index cef32c2..0000000
--- a/sys/dev/raidframe/rf_evenodd_dags.c
+++ /dev/null
@@ -1,191 +0,0 @@
-/* $NetBSD: rf_evenodd_dags.c,v 1.2 1999/02/05 00:06:11 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * rf_evenodd_dags.c
- */
-/*
- * Copyright (c) 1996 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Chang-Ming Wu
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#include <dev/raidframe/rf_archs.h>
-
-#if RF_INCLUDE_EVENODD > 0
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_dagfuncs.h>
-#include <dev/raidframe/rf_dagutils.h>
-#include <dev/raidframe/rf_etimer.h>
-#include <dev/raidframe/rf_acctrace.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_evenodd_dags.h>
-#include <dev/raidframe/rf_evenodd.h>
-#include <dev/raidframe/rf_evenodd_dagfuncs.h>
-#include <dev/raidframe/rf_pq.h>
-#include <dev/raidframe/rf_dagdegrd.h>
-#include <dev/raidframe/rf_dagdegwr.h>
-#include <dev/raidframe/rf_dagffwr.h>
-
-
-/*
- * Lost one data.
- * Use P to reconstruct missing data.
- */
-RF_CREATE_DAG_FUNC_DECL(rf_EO_100_CreateReadDAG)
-{
- rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_eoPRecoveryFuncs);
-}
-/*
- * Lost data + E.
- * Use P to reconstruct missing data.
- */
-RF_CREATE_DAG_FUNC_DECL(rf_EO_101_CreateReadDAG)
-{
- rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_eoPRecoveryFuncs);
-}
-/*
- * Lost data + P.
- * Make E look like P, and use Eor for Xor, and we can
- * use degraded read DAG.
- */
-RF_CREATE_DAG_FUNC_DECL(rf_EO_110_CreateReadDAG)
-{
- RF_PhysDiskAddr_t *temp;
- /* swap P and E pointers to fake out the DegradedReadDAG code */
- temp = asmap->parityInfo;
- asmap->parityInfo = asmap->qInfo;
- asmap->qInfo = temp;
- rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_eoERecoveryFuncs);
-}
-/*
- * Lost two data.
- */
-RF_CREATE_DAG_FUNC_DECL(rf_EOCreateDoubleDegradedReadDAG)
-{
- rf_EO_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList);
-}
-/*
- * Lost two data.
- */
-RF_CREATE_DAG_FUNC_DECL(rf_EO_200_CreateReadDAG)
-{
- rf_EOCreateDoubleDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList);
-}
-RF_CREATE_DAG_FUNC_DECL(rf_EO_100_CreateWriteDAG)
-{
- if (asmap->numStripeUnitsAccessed != 1 &&
- asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit)
- RF_PANIC();
- rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2, (int (*) (RF_DagNode_t *)) rf_Degraded_100_EOFunc, RF_TRUE);
-}
-/*
- * E is dead. Small write.
- */
-RF_CREATE_DAG_FUNC_DECL(rf_EO_001_CreateSmallWriteDAG)
-{
- rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_EOSmallWritePFuncs, NULL);
-}
-/*
- * E is dead. Large write.
- */
-RF_CREATE_DAG_FUNC_DECL(rf_EO_001_CreateLargeWriteDAG)
-{
- rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularPFunc, RF_TRUE);
-}
-/*
- * P is dead. Small write.
- * Swap E + P, use single-degraded stuff.
- */
-RF_CREATE_DAG_FUNC_DECL(rf_EO_010_CreateSmallWriteDAG)
-{
- RF_PhysDiskAddr_t *temp;
- /* swap P and E pointers to fake out the DegradedReadDAG code */
- temp = asmap->parityInfo;
- asmap->parityInfo = asmap->qInfo;
- asmap->qInfo = temp;
- rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_EOSmallWriteEFuncs, NULL);
-}
-/*
- * P is dead. Large write.
- * Swap E + P, use single-degraded stuff.
- */
-RF_CREATE_DAG_FUNC_DECL(rf_EO_010_CreateLargeWriteDAG)
-{
- RF_PhysDiskAddr_t *temp;
- /* swap P and E pointers to fake out the code */
- temp = asmap->parityInfo;
- asmap->parityInfo = asmap->qInfo;
- asmap->qInfo = temp;
- rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularEFunc, RF_FALSE);
-}
-RF_CREATE_DAG_FUNC_DECL(rf_EO_011_CreateWriteDAG)
-{
- rf_CreateNonRedundantWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
- RF_IO_TYPE_WRITE);
-}
-RF_CREATE_DAG_FUNC_DECL(rf_EO_110_CreateWriteDAG)
-{
- RF_PhysDiskAddr_t *temp;
-
- if (asmap->numStripeUnitsAccessed != 1 &&
- asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) {
- RF_PANIC();
- }
- /* swap P and E to fake out parity code */
- temp = asmap->parityInfo;
- asmap->parityInfo = asmap->qInfo;
- asmap->qInfo = temp;
- rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, (int (*) (RF_DagNode_t *)) rf_EO_DegradedWriteEFunc, RF_FALSE);
- /* is the regular E func the right one to call? */
-}
-RF_CREATE_DAG_FUNC_DECL(rf_EO_101_CreateWriteDAG)
-{
- if (asmap->numStripeUnitsAccessed != 1 &&
- asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit)
- RF_PANIC();
- rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RecoveryXorFunc, RF_TRUE);
-}
-RF_CREATE_DAG_FUNC_DECL(rf_EO_DoubleDegRead)
-{
- rf_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList,
- "Re", "EvenOddRecovery", rf_EvenOddDoubleRecoveryFunc);
-}
-RF_CREATE_DAG_FUNC_DECL(rf_EOCreateSmallWriteDAG)
-{
- rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_EOSmallWriteEFuncs);
-}
-RF_CREATE_DAG_FUNC_DECL(rf_EOCreateLargeWriteDAG)
-{
- rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2, rf_RegularPEFunc, RF_FALSE);
-}
-RF_CREATE_DAG_FUNC_DECL(rf_EO_200_CreateWriteDAG)
-{
- rf_DoubleDegSmallWrite(raidPtr, asmap, dag_h, bp, flags, allocList, "Re", "We", "EOWrDDRecovery", rf_EOWriteDoubleRecoveryFunc);
-}
-#endif /* RF_INCLUDE_EVENODD > 0 */
diff --git a/sys/dev/raidframe/rf_evenodd_dags.h b/sys/dev/raidframe/rf_evenodd_dags.h
deleted file mode 100644
index c4218a4..0000000
--- a/sys/dev/raidframe/rf_evenodd_dags.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_evenodd_dags.h,v 1.2 1999/02/05 00:06:11 oster Exp $ */
-/*
- * rf_evenodd_dags.h
- */
-/*
- * Copyright (c) 1996 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Chang-Ming Wu
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#ifndef _RF__RF_EVENODD_DAGS_H_
-#define _RF__RF_EVENODD_DAGS_H_
-
-#include <dev/raidframe/rf_types.h>
-
-#if RF_UTILITY == 0
-#include <dev/raidframe/rf_dag.h>
-
-/* extern decl's of the failure mode EO functions.
- * swiped from rf_pqdeg.h
- */
-
-RF_CREATE_DAG_FUNC_DECL(rf_EO_100_CreateReadDAG);
-RF_CREATE_DAG_FUNC_DECL(rf_EO_101_CreateReadDAG);
-RF_CREATE_DAG_FUNC_DECL(rf_EO_110_CreateReadDAG);
-RF_CREATE_DAG_FUNC_DECL(rf_EO_200_CreateReadDAG);
-RF_CREATE_DAG_FUNC_DECL(rf_EOCreateDoubleDegradedReadDAG);
-RF_CREATE_DAG_FUNC_DECL(rf_EO_100_CreateWriteDAG);
-RF_CREATE_DAG_FUNC_DECL(rf_EO_010_CreateSmallWriteDAG);
-RF_CREATE_DAG_FUNC_DECL(rf_EO_001_CreateSmallWriteDAG);
-RF_CREATE_DAG_FUNC_DECL(rf_EO_010_CreateLargeWriteDAG);
-RF_CREATE_DAG_FUNC_DECL(rf_EO_001_CreateLargeWriteDAG);
-RF_CREATE_DAG_FUNC_DECL(rf_EO_011_CreateWriteDAG);
-RF_CREATE_DAG_FUNC_DECL(rf_EO_110_CreateWriteDAG);
-RF_CREATE_DAG_FUNC_DECL(rf_EO_101_CreateWriteDAG);
-RF_CREATE_DAG_FUNC_DECL(rf_EO_DoubleDegRead);
-RF_CREATE_DAG_FUNC_DECL(rf_EOCreateSmallWriteDAG);
-RF_CREATE_DAG_FUNC_DECL(rf_EOCreateLargeWriteDAG);
-RF_CREATE_DAG_FUNC_DECL(rf_EO_200_CreateWriteDAG);
-#endif /* RF_UTILITY == 0 */
-
-#endif /* !_RF__RF_EVENODD_DAGS_H_ */
diff --git a/sys/dev/raidframe/rf_fifo.c b/sys/dev/raidframe/rf_fifo.c
deleted file mode 100644
index d5ce0d0..0000000
--- a/sys/dev/raidframe/rf_fifo.c
+++ /dev/null
@@ -1,238 +0,0 @@
-/* $NetBSD: rf_fifo.c,v 1.5 2000/03/04 03:27:13 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/***************************************************
- *
- * rf_fifo.c -- prioritized fifo queue code.
- * There are only two priority levels: hi and lo.
- *
- * Aug 4, 1994, adapted from raidSim version (MCH)
- *
- ***************************************************/
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_alloclist.h>
-#include <dev/raidframe/rf_stripelocks.h>
-#include <dev/raidframe/rf_layout.h>
-#include <dev/raidframe/rf_diskqueue.h>
-#include <dev/raidframe/rf_fifo.h>
-#include <dev/raidframe/rf_debugMem.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_options.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_types.h>
-
-/* just malloc a header, zero it (via calloc), and return it */
-/*ARGSUSED*/
-void *
-rf_FifoCreate(sectPerDisk, clList, listp)
- RF_SectorCount_t sectPerDisk;
- RF_AllocListElem_t *clList;
- RF_ShutdownList_t **listp;
-{
- RF_FifoHeader_t *q;
-
- RF_CallocAndAdd(q, 1, sizeof(RF_FifoHeader_t), (RF_FifoHeader_t *), clList);
- q->hq_count = q->lq_count = 0;
- return ((void *) q);
-}
-
-void
-rf_FifoEnqueue(q_in, elem, priority)
- void *q_in;
- RF_DiskQueueData_t *elem;
- int priority;
-{
- RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in;
-
- RF_ASSERT(priority == RF_IO_NORMAL_PRIORITY || priority == RF_IO_LOW_PRIORITY);
-
- elem->next = NULL;
- if (priority == RF_IO_NORMAL_PRIORITY) {
- if (!q->hq_tail) {
- RF_ASSERT(q->hq_count == 0 && q->hq_head == NULL);
- q->hq_head = q->hq_tail = elem;
- } else {
- RF_ASSERT(q->hq_count != 0 && q->hq_head != NULL);
- q->hq_tail->next = elem;
- q->hq_tail = elem;
- }
- q->hq_count++;
- } else {
- RF_ASSERT(elem->next == NULL);
- if (rf_fifoDebug) {
- printf("raid%d: fifo: ENQ lopri\n",
- elem->raidPtr->raidid);
- }
- if (!q->lq_tail) {
- RF_ASSERT(q->lq_count == 0 && q->lq_head == NULL);
- q->lq_head = q->lq_tail = elem;
- } else {
- RF_ASSERT(q->lq_count != 0 && q->lq_head != NULL);
- q->lq_tail->next = elem;
- q->lq_tail = elem;
- }
- q->lq_count++;
- }
- if ((q->hq_count + q->lq_count) != elem->queue->queueLength) {
- printf("Queue lengths differ!: %d %d %d\n",
- q->hq_count, q->lq_count, (int) elem->queue->queueLength);
- printf("%d %d %d %d\n",
- (int) elem->queue->numOutstanding,
- (int) elem->queue->maxOutstanding,
- (int) elem->queue->row,
- (int) elem->queue->col);
- }
- RF_ASSERT((q->hq_count + q->lq_count) == elem->queue->queueLength);
-}
-
-RF_DiskQueueData_t *
-rf_FifoDequeue(q_in)
- void *q_in;
-{
- RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in;
- RF_DiskQueueData_t *nd;
-
- RF_ASSERT(q);
- if (q->hq_head) {
- RF_ASSERT(q->hq_count != 0 && q->hq_tail != NULL);
- nd = q->hq_head;
- q->hq_head = q->hq_head->next;
- if (!q->hq_head)
- q->hq_tail = NULL;
- nd->next = NULL;
- q->hq_count--;
- } else
- if (q->lq_head) {
- RF_ASSERT(q->lq_count != 0 && q->lq_tail != NULL);
- nd = q->lq_head;
- q->lq_head = q->lq_head->next;
- if (!q->lq_head)
- q->lq_tail = NULL;
- nd->next = NULL;
- q->lq_count--;
- if (rf_fifoDebug) {
- printf("raid%d: fifo: DEQ lopri %lx\n",
- nd->raidPtr->raidid, (long) nd);
- }
- } else {
- RF_ASSERT(q->hq_count == 0 && q->lq_count == 0 && q->hq_tail == NULL && q->lq_tail == NULL);
- nd = NULL;
- }
- return (nd);
-}
-
-/* Return ptr to item at head of queue. Used to examine request
- * info without actually dequeueing the request.
- */
-RF_DiskQueueData_t *
-rf_FifoPeek(void *q_in)
-{
- RF_DiskQueueData_t *headElement = NULL;
- RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in;
-
- RF_ASSERT(q);
- if (q->hq_head)
- headElement = q->hq_head;
- else
- if (q->lq_head)
- headElement = q->lq_head;
- return (headElement);
-}
-/* We sometimes need to promote a low priority access to a regular priority access.
- * Currently, this is only used when the user wants to write a stripe which is currently
- * under reconstruction.
- * This routine will promote all accesses tagged with the indicated parityStripeID from
- * the low priority queue to the end of the normal priority queue.
- * We assume the queue is locked upon entry.
- */
-int
-rf_FifoPromote(q_in, parityStripeID, which_ru)
- void *q_in;
- RF_StripeNum_t parityStripeID;
- RF_ReconUnitNum_t which_ru;
-{
- RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in;
- RF_DiskQueueData_t *lp = q->lq_head, *pt = NULL; /* lp = lo-pri queue
- * pointer, pt = trailer */
- int retval = 0;
-
- while (lp) {
-
- /* search for the indicated parity stripe in the low-pri queue */
- if (lp->parityStripeID == parityStripeID && lp->which_ru == which_ru) {
- /* printf("FifoPromote: promoting access for psid
- * %ld\n",parityStripeID); */
- if (pt)
- pt->next = lp->next; /* delete an entry other
- * than the first */
- else
- q->lq_head = lp->next; /* delete the head entry */
-
- if (!q->lq_head)
- q->lq_tail = NULL; /* we deleted the only
- * entry */
- else
- if (lp == q->lq_tail)
- q->lq_tail = pt; /* we deleted the tail
- * entry */
-
- lp->next = NULL;
- q->lq_count--;
-
- if (q->hq_tail) {
- q->hq_tail->next = lp;
- q->hq_tail = lp;
- }
- /* append to hi-priority queue */
- else {
- q->hq_head = q->hq_tail = lp;
- }
- q->hq_count++;
-
- /* UpdateShortestSeekFinishTimeForced(lp->requestPtr,
- * lp->diskState); *//* deal with this later, if ever */
-
- lp = (pt) ? pt->next : q->lq_head; /* reset low-pri pointer
- * and continue */
- retval++;
-
- } else {
- pt = lp;
- lp = lp->next;
- }
- }
-
- /* sanity check. delete this if you ever put more than one entry in
- * the low-pri queue */
- RF_ASSERT(retval == 0 || retval == 1);
- return (retval);
-}
diff --git a/sys/dev/raidframe/rf_fifo.h b/sys/dev/raidframe/rf_fifo.h
deleted file mode 100644
index 9392f08..0000000
--- a/sys/dev/raidframe/rf_fifo.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_fifo.h,v 1.3 1999/02/05 00:06:11 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*
- * rf_fifo.h -- prioritized FIFO queue code.
- *
- * 4-9-93 Created (MCH)
- */
-
-
-#ifndef _RF__RF_FIFO_H_
-#define _RF__RF_FIFO_H_
-
-#include <dev/raidframe/rf_archs.h>
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_diskqueue.h>
-
-typedef struct RF_FifoHeader_s {
- RF_DiskQueueData_t *hq_head, *hq_tail; /* high priority requests */
- RF_DiskQueueData_t *lq_head, *lq_tail; /* low priority requests */
- int hq_count, lq_count; /* debug only */
-} RF_FifoHeader_t;
-
-extern void *
-rf_FifoCreate(RF_SectorCount_t sectPerDisk,
- RF_AllocListElem_t * clList, RF_ShutdownList_t ** listp);
-extern void
-rf_FifoEnqueue(void *q_in, RF_DiskQueueData_t * elem,
- int priority);
-extern RF_DiskQueueData_t *rf_FifoDequeue(void *q_in);
-extern RF_DiskQueueData_t *rf_FifoPeek(void *q_in);
-extern int
-rf_FifoPromote(void *q_in, RF_StripeNum_t parityStripeID,
- RF_ReconUnitNum_t which_ru);
-
-#endif /* !_RF__RF_FIFO_H_ */
diff --git a/sys/dev/raidframe/rf_freebsdkintf.c b/sys/dev/raidframe/rf_freebsdkintf.c
deleted file mode 100644
index 13f5abb..0000000
--- a/sys/dev/raidframe/rf_freebsdkintf.c
+++ /dev/null
@@ -1,3192 +0,0 @@
-/*-
- * Copyright (c) 2002 Scott Long <scottl@freebsd.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-/* $NetBSD: rf_netbsdkintf.c,v 1.105 2001/04/05 02:48:51 oster Exp $ */
-/*-
- * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
- * All rights reserved.
- *
- * This code is derived from software contributed to The NetBSD Foundation
- * by Greg Oster; Jason R. Thorpe.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the NetBSD
- * Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * Copyright (c) 1988 University of Utah.
- * Copyright (c) 1990, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * the Systems Programming Group of the University of Utah Computer
- * Science Department.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: Utah $Hdr: cd.c 1.6 90/11/28$
- *
- * @(#)cd.c 8.2 (Berkeley) 11/16/93
- */
-
-
-
-
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Authors: Mark Holland, Jim Zelenka
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/***********************************************************
- *
- * rf_kintf.c -- the kernel interface routines for RAIDframe
- *
- ***********************************************************/
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/errno.h>
-#include <sys/param.h>
-#include <sys/queue.h>
-#include <sys/stat.h>
-#include <sys/ioccom.h>
-#include <sys/filio.h>
-#include <sys/filedesc.h>
-#include <sys/fcntl.h>
-#include <sys/systm.h>
-#include <sys/namei.h>
-#include <sys/vnode.h>
-#include <sys/bio.h>
-#include <sys/buf.h>
-#include <sys/conf.h>
-#include <sys/disk.h>
-#include <sys/lock.h>
-#include <sys/reboot.h>
-#include <sys/module.h>
-#include <vm/uma.h>
-#include <geom/geom_disk.h>
-
-#include "opt_raid.h"
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_raidframe.h>
-#include <dev/raidframe/rf_copyback.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_dagflags.h>
-#include <dev/raidframe/rf_desc.h>
-#include <dev/raidframe/rf_diskqueue.h>
-#include <dev/raidframe/rf_acctrace.h>
-#include <dev/raidframe/rf_etimer.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_debugMem.h>
-#include <dev/raidframe/rf_kintf.h>
-#include <dev/raidframe/rf_options.h>
-#include <dev/raidframe/rf_driver.h>
-#include <dev/raidframe/rf_parityscan.h>
-#include <dev/raidframe/rf_debugprint.h>
-#include <dev/raidframe/rf_threadstuff.h>
-#include <dev/raidframe/rf_configure.h>
-
-RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
-
-static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
- * spare table */
-static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
- * installation process */
-
-/* prototypes */
-static void KernelWakeupFunc(struct bio *);
-static void InitBP(struct bio *, struct vnode *, unsigned rw_flag,
- dev_t dev, RF_SectorNum_t startSect,
- RF_SectorCount_t numSect, caddr_t buf,
- void (*cbFunc) (struct bio *), void *cbArg,
- int logBytesPerSector, struct proc * b_proc);
-static struct raid_softc *raidinit(RF_Raid_t *);
-static void rf_search_label(dev_t, struct disklabel *,
- RF_AutoConfig_t **) __unused;
-
-static int raid_modevent(module_t, int, void*);
-void raidattach(void);
-
-disk_open_t raidopen;
-disk_close_t raidclose;
-disk_ioctl_t raidioctl;
-disk_strategy_t raidstrategy;
-
-d_open_t raidctlopen;
-d_close_t raidctlclose;
-d_ioctl_t raidctlioctl;
-
-static struct cdevsw raidctl_cdevsw = {
- .d_version = D_VERSION,
- .d_flags = D_NEEDGIANT,
- .d_open = raidctlopen,
- .d_close = raidctlclose,
- .d_ioctl = raidctlioctl,
- .d_name = "raidctl",
-};
-
-/*
- * Pilfered from ccd.c
- */
-
-struct raidbuf {
- struct bio rf_buf; /* new I/O buf. MUST BE FIRST!!! */
- struct bio *rf_obp; /* ptr. to original I/O buf */
- int rf_flags; /* misc. flags */
- RF_DiskQueueData_t *req;/* the request that this was part of.. */
-};
-
-
-#define RAIDGETBUF(sc) uma_zalloc((sc)->sc_cbufpool, M_NOWAIT)
-#define RAIDPUTBUF(sc, cbp) uma_zfree((sc)->sc_cbufpool, cbp)
-
-#define RF_MAX_ARRAYS 32
-
-/* Raid control device */
-struct raidctl_softc {
- dev_t sc_dev; /* Device node */
- int sc_flags; /* flags */
- int sc_numraid; /* Number of configured raid devices */
- struct raid_softc *sc_raiddevs[RF_MAX_ARRAYS];
-};
-
-struct raid_softc {
- dev_t sc_parent_dev;
- int sc_flags; /* flags */
- int sc_busycount; /* How many times are we opened? */
- size_t sc_size; /* size of the raid device */
- dev_t sc_parent; /* Parent device */
- struct disk *sc_disk; /* generic disk device info */
- uma_zone_t sc_cbufpool; /* component buffer pool */
- RF_Raid_t *raidPtr; /* Raid information struct */
- struct bio_queue_head bio_queue; /* used for the device queue */
-};
-/* sc_flags */
-#define RAIDF_OPEN 0x01 /* unit has been initialized */
-#define RAIDF_WLABEL 0x02 /* label area is writable */
-#define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
-#define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
-#define RAIDF_LOCKED 0x80 /* unit is locked */
-
-/*
- * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
- * Be aware that large numbers can allow the driver to consume a lot of
- * kernel memory, especially on writes, and in degraded mode reads.
- *
- * For example: with a stripe width of 64 blocks (32k) and 5 disks,
- * a single 64K write will typically require 64K for the old data,
- * 64K for the old parity, and 64K for the new parity, for a total
- * of 192K (if the parity buffer is not re-used immediately).
- * Even it if is used immedately, that's still 128K, which when multiplied
- * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
- *
- * Now in degraded mode, for example, a 64K read on the above setup may
- * require data reconstruction, which will require *all* of the 4 remaining
- * disks to participate -- 4 * 32K/disk == 128K again.
- */
-
-#ifndef RAIDOUTSTANDING
-#define RAIDOUTSTANDING 10
-#endif
-
-static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, struct disk*);
-static int raidlock(struct raid_softc *);
-static void raidunlock(struct raid_softc *);
-
-static void rf_markalldirty(RF_Raid_t *);
-
-static dev_t raidctl_dev;
-
-void rf_ReconThread(struct rf_recon_req *);
-/* XXX what I want is: */
-/*void rf_ReconThread(RF_Raid_t *raidPtr); */
-void rf_RewriteParityThread(RF_Raid_t *raidPtr);
-void rf_CopybackThread(RF_Raid_t *raidPtr);
-void rf_ReconstructInPlaceThread(struct rf_recon_req *);
-void rf_buildroothack(void *, struct raidctl_softc *);
-
-RF_AutoConfig_t *rf_find_raid_components(void);
-RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
-static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
-static int rf_reasonable_label(RF_ComponentLabel_t *);
-void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
-int rf_set_autoconfig(RF_Raid_t *, int);
-int rf_set_rootpartition(RF_Raid_t *, int);
-void rf_release_all_vps(RF_ConfigSet_t *);
-void rf_cleanup_config_set(RF_ConfigSet_t *);
-int rf_have_enough_components(RF_ConfigSet_t *);
-int rf_auto_config_set(RF_ConfigSet_t *, int *, struct raidctl_softc *);
-static int raidgetunit(struct raidctl_softc *, int);
-static int raidshutdown(void);
-
-void
-raidattach(void)
-{
- struct raidctl_softc *parent_sc = NULL;
- RF_AutoConfig_t *ac_list; /* autoconfig list */
- RF_ConfigSet_t *config_sets;
- int autoconfig = 0;
-
- /* This is where all the initialization stuff gets done. */
-
- if(rf_mutex_init(&rf_sparet_wait_mutex, __FUNCTION__)) {
- rf_printf(0, "RAIDframe: failed to initialize mutexes\n");
- return;
- }
-
- rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
-
- if (rf_BootRaidframe() != 0) {
- rf_printf(0, "Serious error booting RAIDframe!!\n");
- return;
- }
-
- rf_printf(0, "Kernelized RAIDframe activated\n");
- MALLOC(parent_sc, struct raidctl_softc *, sizeof(*parent_sc),
- M_RAIDFRAME, M_NOWAIT|M_ZERO);
- if (parent_sc == NULL) {
- RF_PANIC();
- return;
- }
-
- parent_sc->sc_dev= make_dev(&raidctl_cdevsw, 0, UID_ROOT, GID_WHEEL,
- 0600, "raidctl");
- parent_sc->sc_dev->si_drv1 = parent_sc;
- raidctl_dev = parent_sc->sc_dev;
-
-#if RAID_AUTOCONFIG
- autoconfig = 1;
-#endif
-
- if (autoconfig) {
- /* 1. locate all RAID components on the system */
-
- rf_printf(0, "Searching for raid components...\n");
- ac_list = rf_find_raid_components();
- if (ac_list == NULL)
- return;
-
- /* 2. sort them into their respective sets */
-
- config_sets = rf_create_auto_sets(ac_list);
-
- /* 3. evaluate each set and configure the valid ones
- This gets done in rf_buildroothack() */
-
- /* schedule the creation of the thread to do the
- "/ on RAID" stuff */
-
- rf_buildroothack(config_sets, parent_sc);
-#if 0
- kthread_create(rf_buildroothack,config_sets);
-
-#endif /* RAID_AUTOCONFIG */
- }
-}
-
-void
-rf_buildroothack(arg, parent_sc)
- void *arg;
- struct raidctl_softc *parent_sc;
-{
- RF_ConfigSet_t *config_sets = arg;
- RF_ConfigSet_t *cset;
- RF_ConfigSet_t *next_cset;
- int retcode;
- int raidID;
- int rootID;
- int num_root;
-
- rootID = 0;
- num_root = 0;
- cset = config_sets;
- while(cset != NULL ) {
- next_cset = cset->next;
- if (rf_have_enough_components(cset) &&
- cset->ac->clabel->autoconfigure==1) {
- retcode = rf_auto_config_set(cset, &raidID, parent_sc);
- if (!retcode) {
- if (cset->rootable) {
- rootID = raidID;
- num_root++;
- }
- } else {
- /* The autoconfig didn't work :( */
- rf_printf(1, "Autoconfig failed with code %d"
- "for raid%d\n", retcode, raidID);
- rf_release_all_vps(cset);
- }
- } else {
- /* we're not autoconfiguring this set...
- release the associated resources */
- rf_release_all_vps(cset);
- }
- /* cleanup */
- rf_cleanup_config_set(cset);
- cset = next_cset;
- }
- if (boothowto & RB_ASKNAME) {
- /* We don't auto-config... */
- } else {
- /* They didn't ask, and we found something bootable... */
-
-#if 0
- if (num_root == 1) {
- booted_device = &raidrootdev[rootID];
- } else if (num_root > 1) {
- /* we can't guess.. require the user to answer... */
- boothowto |= RB_ASKNAME;
- }
-#endif
- }
-}
-
-int
-raidctlopen(dev_t dev, int flags, int fmt, struct thread *td)
-{
- struct raidctl_softc *parent_sc;
-
- parent_sc = dev->si_drv1;
-
- if ((parent_sc->sc_flags & RAIDF_OPEN) != 0)
- return (EBUSY);
-
- parent_sc->sc_flags |= RAIDF_OPEN;
- return (0);
-}
-
-int
-raidctlclose(dev_t dev, int flags, int fmt, struct thread *td)
-{
- struct raidctl_softc *parent_sc;
-
- parent_sc = dev->si_drv1;
-
- parent_sc->sc_flags &= ~RAIDF_OPEN;
- return (0);
-}
-
-int
-raidctlioctl(dev_t dev, u_long cmd, caddr_t data, int flags, struct thread *td)
-{
- struct raidctl_softc *parent_sc;
- struct raid_softc *sc;
- RF_Config_t *u_cfg, *k_cfg;
- RF_Raid_t *raidPtr;
- u_char *specific_buf;
- u_int unit;
- int retcode = 0;
-
- parent_sc = dev->si_drv1;
-
- switch (cmd) {
- /* configure the system */
- case RAIDFRAME_CONFIGURE:
-
- /* copy-in the configuration information */
- /* data points to a pointer to the configuration structure */
-
- u_cfg = *((RF_Config_t **) data);
- RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
- if (k_cfg == NULL) {
- return (ENOMEM);
- }
- retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
- sizeof(RF_Config_t));
- if (retcode) {
- RF_Free(k_cfg, sizeof(RF_Config_t));
- rf_printf(2, "raidctlioctl: retcode=%d copyin.1\n",
- retcode);
- return (retcode);
- }
- /* allocate a buffer for the layout-specific data, and copy it
- * in */
- if (k_cfg->layoutSpecificSize) {
- if (k_cfg->layoutSpecificSize > 10000) {
- /* sanity check */
- RF_Free(k_cfg, sizeof(RF_Config_t));
- return (EINVAL);
- }
- RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
- (u_char *));
- if (specific_buf == NULL) {
- RF_Free(k_cfg, sizeof(RF_Config_t));
- return (ENOMEM);
- }
- retcode = copyin(k_cfg->layoutSpecific,
- (caddr_t) specific_buf,
- k_cfg->layoutSpecificSize);
- if (retcode) {
- RF_Free(specific_buf,
- k_cfg->layoutSpecificSize);
- RF_Free(k_cfg, sizeof(RF_Config_t));
- rf_printf(2, "raidctlioctl: retcode=%d "
- "copyin.2\n", retcode);
- return (retcode);
- }
- } else
- specific_buf = NULL;
- k_cfg->layoutSpecific = specific_buf;
-
- /* should do some kind of sanity check on the configuration.
- * Store the sum of all the bytes in the last byte? */
-
- /* configure the system */
-
- RF_Malloc(raidPtr, sizeof(*raidPtr), (RF_Raid_t *));
- if (raidPtr == NULL) {
- rf_printf(0, "No memory for raid device\n");
- RF_Free(k_cfg, sizeof(RF_Config_t));
- retcode = ENOMEM;
- }
- bzero((char *) raidPtr, sizeof(RF_Raid_t));
-
- /* Request a unit number for this soon-to-be device. */
- unit = raidgetunit(parent_sc, 0);
- if (unit == -1) {
- rf_printf(0, "Cannot allocate raid unit\n");
- RF_Free(raidPtr, sizeof(*raidPtr));
- goto out;
- }
- raidPtr->raidid = unit;
-
- if ((retcode = rf_Configure(raidPtr, k_cfg, NULL)) == 0) {
-
- /* allow this many simultaneous IO's to
- this RAID device */
- raidPtr->openings = RAIDOUTSTANDING;
-
- parent_sc->sc_raiddevs[unit] = raidinit(raidPtr);
- if (parent_sc->sc_raiddevs[unit] == NULL) {
- rf_printf(0, "Could not create raid device\n");
- RF_Free(raidPtr, sizeof(*raidPtr));
- goto out;
- }
- parent_sc->sc_numraid++;
- ((struct raid_softc *)raidPtr->sc)->sc_parent_dev = dev;
- rf_markalldirty(raidPtr);
- } else {
- parent_sc->sc_raiddevs[unit] = NULL;
- RF_Free(raidPtr, sizeof(*raidPtr));
- }
-
-out:
- /* free the buffers. No return code here. */
- if (k_cfg->layoutSpecificSize) {
- RF_Free(specific_buf, k_cfg->layoutSpecificSize);
- }
- RF_Free(k_cfg, sizeof(RF_Config_t));
- break;
-
- case RAIDFRAME_SHUTDOWN:
-
- unit = *(u_int *)data;
- if ((unit >= RF_MAX_ARRAYS) ||
- (parent_sc->sc_raiddevs[unit] == NULL))
- return (EINVAL);
-
- sc = parent_sc->sc_raiddevs[unit];
- if ((retcode = raidlock(sc)) != 0)
- return (retcode);
-
- /*
- * If somebody has a partition mounted, we shouldn't
- * shutdown.
- */
-
- if ((sc->sc_flags & RAIDF_OPEN) != 0) {
- raidunlock(sc);
- return (EBUSY);
- }
-
- rf_printf(0, "Shutting down RAIDframe engine\n");
- retcode = rf_Shutdown(sc->raidPtr);
- RF_THREADGROUP_WAIT_STOP(&sc->raidPtr->engine_tg);
-
- disk_destroy(sc->sc_disk);
- raidunlock(sc);
-
- /* XXX Need to be able to destroy the zone */
- uma_zdestroy(sc->sc_cbufpool);
-
- parent_sc->sc_numraid--;
- parent_sc->sc_raiddevs[unit] = NULL;
-
- RF_Free(sc->raidPtr, sizeof(*raidPtr));
- RF_Free(sc, sizeof(*sc));
-
- break;
-
- default:
- retcode = ENOIOCTL;
- }
-
- return (retcode);
-}
-
-/* ARGSUSED */
-int
-raidopen(struct disk *dp)
-{
- struct raid_softc *sc;
- int error = 0;
-
- sc = dp->d_drv1;
-
- if ((error = raidlock(sc)) != 0)
- return (error);
- dp = sc->sc_disk;
-
- rf_printf(1, "Opening raid device %s%d\n", dp->d_name, dp->d_unit);
-
- /* Generate overall disklabel */
- raidgetdefaultlabel(sc->raidPtr, sc, dp);
-
- if (sc->sc_busycount == 0) {
- /* First one... mark things as dirty... Note that we *MUST*
- have done a configure before this. I DO NOT WANT TO BE
- SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
- THAT THEY BELONG TOGETHER!!!!! */
- /* XXX should check to see if we're only open for reading
- here... If so, we needn't do this, but then need some
- other way of keeping track of what's happened.. */
-
- rf_markalldirty( sc->raidPtr );
- sc->sc_flags |= RAIDF_OPEN;
- }
-
- /* Prevent this unit from being unconfigured while open. */
- sc->sc_busycount++;
-
- raidunlock(sc);
-
- return (error);
-
-
-}
-/* ARGSUSED */
-int
-raidclose(struct disk *dp)
-{
- struct raid_softc *sc;
- int error = 0;
-
- sc = dp->d_drv1;
-
- if ((error = raidlock(sc)) != 0)
- return (error);
-
- sc->sc_busycount--;
- if (sc->sc_busycount == 0) {
- sc->sc_flags &= ~RAIDF_OPEN;
- rf_update_component_labels(sc->raidPtr,
- RF_FINAL_COMPONENT_UPDATE);
- }
-
- raidunlock(sc);
- return (0);
-
-}
-
-void
-raidstrategy(bp)
- struct bio *bp;
-{
- RF_Raid_t *raidPtr;
- struct raid_softc *sc = bp->bio_disk->d_drv1;
- int s;
-
- raidPtr = sc->raidPtr;
- if (raidPtr == NULL) {
- bp->bio_error = ENODEV;
- bp->bio_flags |= BIO_ERROR;
- bp->bio_resid = bp->bio_bcount;
- biodone(bp);
- return;
- }
- if (!raidPtr->valid) {
- bp->bio_error = ENODEV;
- bp->bio_flags |= BIO_ERROR;
- bp->bio_resid = bp->bio_bcount;
- biodone(bp);
- return;
- }
- if (bp->bio_bcount == 0) {
- rf_printf(2, "b_bcount is zero..\n");
- biodone(bp);
- return;
- }
-
- s = splbio();
-
- bp->bio_resid = 0;
-
- /* stuff it onto our queue. XXX locking? */
- bioq_insert_tail(&sc->bio_queue, bp);
-
- raidstart(raidPtr);
-
- splx(s);
-}
-
-int
-raidioctl(dp, cmd, data, flag, td)
- struct disk *dp;
- u_long cmd;
- void *data;
- int flag;
- struct thread *td;
-{
- struct raid_softc *sc;
- RF_Raid_t *raidPtr;
- RF_RaidDisk_t *diskPtr;
- RF_AccTotals_t *totals;
- RF_DeviceConfig_t *d_cfg, **ucfgp;
- struct rf_recon_req *rrcopy, *rr;
- RF_ComponentLabel_t *clabel;
- RF_ComponentLabel_t *ci_label;
- RF_SingleComponent_t *sparePtr,*componentPtr;
- RF_SingleComponent_t *hot_spare, *component;
- RF_ProgressInfo_t progressInfo;
- int retcode = 0;
- int row, column;
- int unit;
- int i, j, d;
-
- sc = dp->d_drv1;
- raidPtr = sc->raidPtr;
-
- rf_printf(2, "raidioctl: %s%d %ld\n", dp->d_name, dp->d_unit, cmd);
-
- switch (cmd) {
-
- case RAIDFRAME_GET_COMPONENT_LABEL:
- /* need to read the component label for the disk indicated
- by row,column in clabel */
-
- /* For practice, let's get it directly fromdisk, rather
- than from the in-core copy */
- RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
- (RF_ComponentLabel_t *));
- if (clabel == NULL)
- return (ENOMEM);
-
- bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
-
- bcopy(data, clabel, sizeof(RF_ComponentLabel_t));
-
- row = clabel->row;
- column = clabel->column;
-
- if ((row < 0) || (row >= raidPtr->numRow) ||
- (column < 0) || (column >= raidPtr->numCol +
- raidPtr->numSpare)) {
- RF_Free( clabel, sizeof(RF_ComponentLabel_t));
- return(EINVAL);
- }
-
- raidread_component_label(raidPtr->Disks[row][column].dev,
- raidPtr->raid_cinfo[row][column].ci_vp,
- clabel );
-
- bcopy(clabel, data, sizeof(RF_ComponentLabel_t));
- RF_Free( clabel, sizeof(RF_ComponentLabel_t));
- return (retcode);
-
- case RAIDFRAME_SET_COMPONENT_LABEL:
- clabel = (RF_ComponentLabel_t *) data;
-
- /* XXX check the label for valid stuff... */
- /* Note that some things *should not* get modified --
- the user should be re-initing the labels instead of
- trying to patch things.
- */
-
- rf_printf(1, "Got component label:\n");
- rf_printf(1, "Version: %d\n",clabel->version);
- rf_printf(1, "Serial Number: %d\n",clabel->serial_number);
- rf_printf(1, "Mod counter: %d\n",clabel->mod_counter);
- rf_printf(1, "Row: %d\n", clabel->row);
- rf_printf(1, "Column: %d\n", clabel->column);
- rf_printf(1, "Num Rows: %d\n", clabel->num_rows);
- rf_printf(1, "Num Columns: %d\n", clabel->num_columns);
- rf_printf(1, "Clean: %d\n", clabel->clean);
- rf_printf(1, "Status: %d\n", clabel->status);
-
- row = clabel->row;
- column = clabel->column;
-
- if ((row < 0) || (row >= raidPtr->numRow) ||
- (column < 0) || (column >= raidPtr->numCol)) {
- return(EINVAL);
- }
-
- /* XXX this isn't allowed to do anything for now :-) */
-
- /* XXX and before it is, we need to fill in the rest
- of the fields!?!?!?! */
-#if 0
- raidwrite_component_label(
- raidPtr->Disks[row][column].dev,
- raidPtr->raid_cinfo[row][column].ci_vp,
- clabel );
-#endif
- return (0);
-
- case RAIDFRAME_INIT_LABELS:
- MALLOC(ci_label, RF_ComponentLabel_t *,
- sizeof(RF_ComponentLabel_t), M_RAIDFRAME,
- M_WAITOK | M_ZERO);
- clabel = (RF_ComponentLabel_t *) data;
- /*
- we only want the serial number from
- the above. We get all the rest of the information
- from the config that was used to create this RAID
- set.
- */
-
- raidPtr->serial_number = clabel->serial_number;
-
- raid_init_component_label(raidPtr, ci_label);
- ci_label->serial_number = clabel->serial_number;
-
- for(row=0;row<raidPtr->numRow;row++) {
- ci_label->row = row;
- for(column=0;column<raidPtr->numCol;column++) {
- diskPtr = &raidPtr->Disks[row][column];
- if (!RF_DEAD_DISK(diskPtr->status)) {
- ci_label->partitionSize =
- diskPtr->partitionSize;
- ci_label->column = column;
- raidwrite_component_label(
- raidPtr->Disks[row][column].dev,
- raidPtr->raid_cinfo[row][column].ci_vp,
- ci_label );
- }
- }
- }
-
- FREE(ci_label, M_RAIDFRAME);
- return (retcode);
- case RAIDFRAME_SET_AUTOCONFIG:
- d = rf_set_autoconfig(raidPtr, *(int *) data);
- rf_printf(1, "New autoconfig value is: %d\n", d);
- *(int *) data = d;
- return (retcode);
-
- case RAIDFRAME_SET_ROOT:
- d = rf_set_rootpartition(raidPtr, *(int *) data);
- rf_printf(1, "New rootpartition value is: %d\n", d);
- *(int *) data = d;
- return (retcode);
-
- /* initialize all parity */
- case RAIDFRAME_REWRITEPARITY:
-
- if (raidPtr->Layout.map->faultsTolerated == 0) {
- /* Parity for RAID 0 is trivially correct */
- raidPtr->parity_good = RF_RAID_CLEAN;
- return(0);
- }
-
- if (raidPtr->parity_rewrite_in_progress == 1) {
- /* Re-write is already in progress! */
- return(EINVAL);
- }
-
- retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
- rf_RewriteParityThread,
- raidPtr,"raid_parity");
- return (retcode);
-
-
- case RAIDFRAME_ADD_HOT_SPARE:
- MALLOC(hot_spare, RF_SingleComponent_t *,
- sizeof(RF_SingleComponent_t), M_RAIDFRAME,
- M_WAITOK | M_ZERO);
- sparePtr = (RF_SingleComponent_t *) data;
- memcpy( hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
- retcode = rf_add_hot_spare(raidPtr, hot_spare);
- FREE(hot_spare, M_RAIDFRAME);
- return(retcode);
-
- case RAIDFRAME_REMOVE_HOT_SPARE:
- return(retcode);
-
- case RAIDFRAME_DELETE_COMPONENT:
- MALLOC(component, RF_SingleComponent_t *,
- sizeof(RF_SingleComponent_t), M_RAIDFRAME,
- M_WAITOK | M_ZERO);
- componentPtr = (RF_SingleComponent_t *)data;
- memcpy( component, componentPtr,
- sizeof(RF_SingleComponent_t));
- retcode = rf_delete_component(raidPtr, component);
- FREE(component, M_RAIDFRAME);
- return(retcode);
-
- case RAIDFRAME_INCORPORATE_HOT_SPARE:
- MALLOC(component, RF_SingleComponent_t *,
- sizeof(RF_SingleComponent_t), M_RAIDFRAME,
- M_WAITOK | M_ZERO);
- componentPtr = (RF_SingleComponent_t *)data;
- memcpy( component, componentPtr,
- sizeof(RF_SingleComponent_t));
- retcode = rf_incorporate_hot_spare(raidPtr, component);
- FREE(component, M_RAIDFRAME);
- return(retcode);
-
- case RAIDFRAME_REBUILD_IN_PLACE:
-
- MALLOC(component, RF_SingleComponent_t *,
- sizeof(RF_SingleComponent_t), M_RAIDFRAME,
- M_WAITOK | M_ZERO);
- if (raidPtr->Layout.map->faultsTolerated == 0) {
- /* Can't do this on a RAID 0!! */
- FREE(component, M_RAIDFRAME);
- return(EINVAL);
- }
-
- if (raidPtr->recon_in_progress == 1) {
- /* a reconstruct is already in progress! */
- FREE(component, M_RAIDFRAME);
- return(EINVAL);
- }
-
- componentPtr = (RF_SingleComponent_t *) data;
- memcpy( component, componentPtr,
- sizeof(RF_SingleComponent_t));
- row = component->row;
- column = component->column;
- unit = raidPtr->raidid;
- rf_printf(0, "raid%d Rebuild: %d %d\n", unit, row, column);
- if ((row < 0) || (row >= raidPtr->numRow) ||
- (column < 0) || (column >= raidPtr->numCol)) {
- FREE(component, M_RAIDFRAME);
- return(EINVAL);
- }
-
- RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
- if (rrcopy == NULL) {
- FREE(component, M_RAIDFRAME);
- return(ENOMEM);
- }
-
- rrcopy->raidPtr = (void *) raidPtr;
- rrcopy->row = row;
- rrcopy->col = column;
-
- retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
- rf_ReconstructInPlaceThread,
- rrcopy,"raid_reconip");
- FREE(component, M_RAIDFRAME);
- return(retcode);
-
- case RAIDFRAME_GET_UNIT:
-
- *(int *)data = raidPtr->raidid;
- return (0);
-
- case RAIDFRAME_GET_INFO:
- if (!raidPtr->valid)
- return (ENODEV);
- ucfgp = (RF_DeviceConfig_t **) data;
- RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
- (RF_DeviceConfig_t *));
- if (d_cfg == NULL)
- return (ENOMEM);
- bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
- d_cfg->rows = raidPtr->numRow;
- d_cfg->cols = raidPtr->numCol;
- d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
- if (d_cfg->ndevs >= RF_MAX_DISKS) {
- RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
- return (ENOMEM);
- }
- d_cfg->nspares = raidPtr->numSpare;
- if (d_cfg->nspares >= RF_MAX_DISKS) {
- RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
- return (ENOMEM);
- }
- d_cfg->maxqdepth = raidPtr->maxQueueDepth;
- d = 0;
- for (i = 0; i < d_cfg->rows; i++) {
- for (j = 0; j < d_cfg->cols; j++) {
- d_cfg->devs[d] = raidPtr->Disks[i][j];
- d++;
- }
- }
- for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
- d_cfg->spares[i] = raidPtr->Disks[0][j];
- }
-
- retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
-
- RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
-
- return (retcode);
-
- case RAIDFRAME_CHECK_PARITY:
- *(int *) data = raidPtr->parity_good;
- return (0);
-
- case RAIDFRAME_RESET_ACCTOTALS:
- bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
- return (0);
-
- case RAIDFRAME_GET_ACCTOTALS:
- totals = (RF_AccTotals_t *) data;
- *totals = raidPtr->acc_totals;
- return (0);
-
- case RAIDFRAME_KEEP_ACCTOTALS:
- raidPtr->keep_acc_totals = *(int *)data;
- return (0);
-
- case RAIDFRAME_GET_SIZE:
- *(int *) data = raidPtr->totalSectors;
- return (0);
-
- /* fail a disk & optionally start reconstruction */
- case RAIDFRAME_FAIL_DISK:
-
- if (raidPtr->Layout.map->faultsTolerated == 0) {
- /* Can't do this on a RAID 0!! */
- return(EINVAL);
- }
-
- rr = (struct rf_recon_req *) data;
-
- if (rr->row < 0 || rr->row >= raidPtr->numRow
- || rr->col < 0 || rr->col >= raidPtr->numCol)
- return (EINVAL);
-
- rf_printf(0, "%s%d: Failing the disk: row: %d col: %d\n",
- dp->d_name, dp->d_unit, rr->row, rr->col);
-
- /* make a copy of the recon request so that we don't rely on
- * the user's buffer */
- RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
- if (rrcopy == NULL)
- return(ENOMEM);
- bcopy(rr, rrcopy, sizeof(*rr));
- rrcopy->raidPtr = (void *) raidPtr;
-
- retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
- rf_ReconThread,
- rrcopy,"raid_recon");
- return (0);
-
- /* invoke a copyback operation after recon on whatever disk
- * needs it, if any */
- case RAIDFRAME_COPYBACK:
-
- if (raidPtr->Layout.map->faultsTolerated == 0) {
- /* This makes no sense on a RAID 0!! */
- return(EINVAL);
- }
-
- if (raidPtr->copyback_in_progress == 1) {
- /* Copyback is already in progress! */
- return(EINVAL);
- }
-
- retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
- rf_CopybackThread,
- raidPtr,"raid_copyback");
- return (retcode);
-
- /* return the percentage completion of reconstruction */
- case RAIDFRAME_CHECK_RECON_STATUS:
- if (raidPtr->Layout.map->faultsTolerated == 0) {
- /* This makes no sense on a RAID 0, so tell the
- user it's done. */
- *(int *) data = 100;
- return(0);
- }
- row = 0; /* XXX we only consider a single row... */
- if (raidPtr->status[row] != rf_rs_reconstructing)
- *(int *) data = 100;
- else
- *(int *) data = raidPtr->reconControl[row]->percentComplete;
- return (0);
- case RAIDFRAME_CHECK_RECON_STATUS_EXT:
- row = 0; /* XXX we only consider a single row... */
- if (raidPtr->status[row] != rf_rs_reconstructing) {
- progressInfo.remaining = 0;
- progressInfo.completed = 100;
- progressInfo.total = 100;
- } else {
- progressInfo.total =
- raidPtr->reconControl[row]->numRUsTotal;
- progressInfo.completed =
- raidPtr->reconControl[row]->numRUsComplete;
- progressInfo.remaining = progressInfo.total -
- progressInfo.completed;
- }
- bcopy((caddr_t) &progressInfo, data, sizeof(RF_ProgressInfo_t));
- return (retcode);
-
- case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
- if (raidPtr->Layout.map->faultsTolerated == 0) {
- /* This makes no sense on a RAID 0, so tell the
- user it's done. */
- *(int *) data = 100;
- return(0);
- }
- if (raidPtr->parity_rewrite_in_progress == 1) {
- *(int *) data = 100 *
- raidPtr->parity_rewrite_stripes_done /
- raidPtr->Layout.numStripe;
- } else {
- *(int *) data = 100;
- }
- return (0);
-
- case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
- if (raidPtr->parity_rewrite_in_progress == 1) {
- progressInfo.total = raidPtr->Layout.numStripe;
- progressInfo.completed =
- raidPtr->parity_rewrite_stripes_done;
- progressInfo.remaining = progressInfo.total -
- progressInfo.completed;
- } else {
- progressInfo.remaining = 0;
- progressInfo.completed = 100;
- progressInfo.total = 100;
- }
- bcopy((caddr_t) &progressInfo, data, sizeof(RF_ProgressInfo_t));
- return (retcode);
-
- case RAIDFRAME_CHECK_COPYBACK_STATUS:
- if (raidPtr->Layout.map->faultsTolerated == 0) {
- /* This makes no sense on a RAID 0 */
- *(int *) data = 100;
- return(0);
- }
- if (raidPtr->copyback_in_progress == 1) {
- *(int *) data = 100 * raidPtr->copyback_stripes_done /
- raidPtr->Layout.numStripe;
- } else {
- *(int *) data = 100;
- }
- return (0);
-
- case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
- if (raidPtr->copyback_in_progress == 1) {
- progressInfo.total = raidPtr->Layout.numStripe;
- progressInfo.completed =
- raidPtr->copyback_stripes_done;
- progressInfo.remaining = progressInfo.total -
- progressInfo.completed;
- } else {
- progressInfo.remaining = 0;
- progressInfo.completed = 100;
- progressInfo.total = 100;
- }
- bcopy((caddr_t) &progressInfo, data, sizeof(RF_ProgressInfo_t));
- return (retcode);
-
- /* the sparetable daemon calls this to wait for the kernel to
- * need a spare table. this ioctl does not return until a
- * spare table is needed. XXX -- calling mpsleep here in the
- * ioctl code is almost certainly wrong and evil. -- XXX XXX
- * -- I should either compute the spare table in the kernel,
- * or have a different -- XXX XXX -- interface (a different
- * character device) for delivering the table -- XXX */
-#if 0
- case RAIDFRAME_SPARET_WAIT:
- RF_LOCK_MUTEX(rf_sparet_wait_mutex);
- while (!rf_sparet_wait_queue)
- mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
- waitreq = rf_sparet_wait_queue;
- rf_sparet_wait_queue = rf_sparet_wait_queue->next;
- RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
-
- /* structure assignment */
- *((RF_SparetWait_t *) data) = *waitreq;
-
- RF_Free(waitreq, sizeof(*waitreq));
- return (0);
-
- /* wakes up a process waiting on SPARET_WAIT and puts an error
- * code in it that will cause the dameon to exit */
- case RAIDFRAME_ABORT_SPARET_WAIT:
- RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
- waitreq->fcol = -1;
- RF_LOCK_MUTEX(rf_sparet_wait_mutex);
- waitreq->next = rf_sparet_wait_queue;
- rf_sparet_wait_queue = waitreq;
- RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
- wakeup(&rf_sparet_wait_queue);
- return (0);
-
- /* used by the spare table daemon to deliver a spare table
- * into the kernel */
- case RAIDFRAME_SEND_SPARET:
-
- /* install the spare table */
- retcode = rf_SetSpareTable(raidPtr, *(void **) data);
-
- /* respond to the requestor. the return status of the spare
- * table installation is passed in the "fcol" field */
- RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
- waitreq->fcol = retcode;
- RF_LOCK_MUTEX(rf_sparet_wait_mutex);
- waitreq->next = rf_sparet_resp_queue;
- rf_sparet_resp_queue = waitreq;
- wakeup(&rf_sparet_resp_queue);
- RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
-
- return (retcode);
-#endif
-
- default:
- retcode = ENOIOCTL;
- break; /* fall through to the os-specific code below */
-
- }
-
- return (retcode);
-
-}
-
-
-/* raidinit -- complete the rest of the initialization for the
- RAIDframe device. */
-
-
-static struct raid_softc *
-raidinit(raidPtr)
- RF_Raid_t *raidPtr;
-{
- struct raid_softc *sc;
-
- RF_Malloc(sc, sizeof(struct raid_softc), (struct raid_softc *));
- if (sc == NULL) {
- rf_printf(1, "No memory for raid device\n");
- return(NULL);
- }
-
- sc->raidPtr = raidPtr;
-
- /* XXX Should check return code here */
- bioq_init(&sc->bio_queue);
- sc->sc_cbufpool = uma_zcreate("raidpl", sizeof(struct raidbuf), NULL,
- NULL, NULL, NULL, 0, 0);
-
- /* XXX There may be a weird interaction here between this, and
- * protectedSectors, as used in RAIDframe. */
-
- sc->sc_size = raidPtr->totalSectors;
-
- /* Create the disk device */
- sc->sc_disk = disk_alloc();
- sc->sc_disk->d_open = raidopen;
- sc->sc_disk->d_close = raidclose;
- sc->sc_disk->d_ioctl = raidioctl;
- sc->sc_disk->d_strategy = raidstrategy;
- sc->sc_disk->d_drv1 = sc;
- sc->sc_disk->d_maxsize = DFLTPHYS;
- sc->sc_disk->d_name = "raid";
- sc->sc_disk->d_unit = raidPtr->raidid;
- sc->sc_disk->d_flags = DISKFLAG_NEEDSGIANT;
- disk_create(sc->sc_disk, DISK_VERSION);
- raidPtr->sc = sc;
-
- return (sc);
-}
-
-/* wake up the daemon & tell it to get us a spare table
- * XXX
- * the entries in the queues should be tagged with the raidPtr
- * so that in the extremely rare case that two recons happen at once,
- * we know for which device were requesting a spare table
- * XXX
- *
- * XXX This code is not currently used. GO
- */
-int
-rf_GetSpareTableFromDaemon(req)
- RF_SparetWait_t *req;
-{
- int retcode;
-
- RF_LOCK_MUTEX(rf_sparet_wait_mutex);
- req->next = rf_sparet_wait_queue;
- rf_sparet_wait_queue = req;
- wakeup(&rf_sparet_wait_queue);
-
- /* mpsleep unlocks the mutex */
- while (!rf_sparet_resp_queue) {
- tsleep(&rf_sparet_resp_queue, PRIBIO,
- "raidframe getsparetable", 0);
- }
- req = rf_sparet_resp_queue;
- rf_sparet_resp_queue = req->next;
- RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
-
- retcode = req->fcol;
- RF_Free(req, sizeof(*req)); /* this is not the same req as we
- * alloc'd */
- return (retcode);
-}
-
-/* a wrapper around rf_DoAccess that extracts appropriate info from the
- * bp & passes it down.
- * any calls originating in the kernel must use non-blocking I/O
- * do some extra sanity checking to return "appropriate" error values for
- * certain conditions (to make some standard utilities work)
- *
- * Formerly known as: rf_DoAccessKernel
- */
-void
-raidstart(raidPtr)
- RF_Raid_t *raidPtr;
-{
- RF_SectorCount_t num_blocks, pb, sum;
- RF_RaidAddr_t raid_addr;
- struct raid_softc *sc;
- struct bio *bp;
- daddr_t blocknum;
- int unit, retcode, do_async;
-
- unit = raidPtr->raidid;
- sc = raidPtr->sc;
-
- /* quick check to see if anything has died recently */
- RF_LOCK_MUTEX(raidPtr->mutex);
- if (raidPtr->numNewFailures > 0) {
- raidPtr->numNewFailures--;
- RF_UNLOCK_MUTEX(raidPtr->mutex);
- rf_update_component_labels(raidPtr,
- RF_NORMAL_COMPONENT_UPDATE);
- } else
- RF_UNLOCK_MUTEX(raidPtr->mutex);
-
- /* Check to see if we're at the limit... */
- RF_LOCK_MUTEX(raidPtr->mutex);
- while (raidPtr->openings > 0) {
- RF_UNLOCK_MUTEX(raidPtr->mutex);
-
- /* get the next item, if any, from the queue */
- if ((bp = bioq_first(&sc->bio_queue)) == NULL) {
- /* nothing more to do */
- return;
- }
- bioq_remove(&sc->bio_queue, bp);
-
- /* Ok, for the bp we have here, bp->b_blkno is relative to the
- * partition.. Need to make it absolute to the underlying
- * device.. */
-
- blocknum = bp->bio_pblkno =
- bp->bio_offset >> raidPtr->logBytesPerSector;
-
- rf_printf(3, "Blocks: %ld, %ld\n", (long)bp->bio_pblkno, (long)blocknum);
-
- rf_printf(3, "bp->bio_bcount = %d\n", (int) bp->bio_bcount);
- rf_printf(3, "bp->bio_resid = %d\n", (int) bp->bio_resid);
-
- /* *THIS* is where we adjust what block we're going to...
- * but DO NOT TOUCH bp->bio_pblkno!!! */
- raid_addr = blocknum;
-
- num_blocks = bp->bio_bcount >> raidPtr->logBytesPerSector;
- pb = (bp->bio_bcount & raidPtr->sectorMask) ? 1 : 0;
- sum = raid_addr + num_blocks + pb;
- if (rf_debugKernelAccess) {
- rf_printf(0, "raid_addr=0x%x sum=%d num_blocks=%d(+%d) "
- "(%d)\n", (int)raid_addr, (int)sum,
- (int)num_blocks, (int)pb,
- (int)bp->bio_resid);
- }
- if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
- || (sum < num_blocks) || (sum < pb)) {
- bp->bio_error = ENOSPC;
- bp->bio_flags |= BIO_ERROR;
- bp->bio_resid = bp->bio_bcount;
- biodone(bp);
- RF_LOCK_MUTEX(raidPtr->mutex);
- continue;
- }
- /*
- * XXX rf_DoAccess() should do this, not just DoAccessKernel()
- */
-
- if (bp->bio_bcount & raidPtr->sectorMask) {
- bp->bio_error = EINVAL;
- bp->bio_flags |= BIO_ERROR;
- bp->bio_resid = bp->bio_bcount;
- biodone(bp);
- RF_LOCK_MUTEX(raidPtr->mutex);
- continue;
-
- }
- rf_printf(3, "Calling DoAccess..\n");
-
-
- RF_LOCK_MUTEX(raidPtr->mutex);
- raidPtr->openings--;
- RF_UNLOCK_MUTEX(raidPtr->mutex);
-
- /*
- * Everything is async.
- */
- do_async = 1;
-
- /* XXX we're still at splbio() here... do we *really*
- need to be? */
-
- /* don't ever condition on bp->bio_cmd & BIO_WRITE.
- * always condition on BIO_READ instead */
-
- retcode = rf_DoAccess(raidPtr, (bp->bio_cmd & BIO_READ) ?
- RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
- do_async, raid_addr, num_blocks,
- bp->bio_data, bp, NULL, NULL,
- RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
-
-
- RF_LOCK_MUTEX(raidPtr->mutex);
- }
- RF_UNLOCK_MUTEX(raidPtr->mutex);
-}
-
-
-
-
-/* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
-
-int
-rf_DispatchKernelIO(queue, req)
- RF_DiskQueue_t *queue;
- RF_DiskQueueData_t *req;
-{
- int op = (req->type == RF_IO_TYPE_READ) ? BIO_READ : BIO_WRITE;
- struct bio *bp;
- struct raidbuf *raidbp = NULL;
- struct raid_softc *sc;
-
- /* XXX along with the vnode, we also need the softc associated with
- * this device.. */
-
- req->queue = queue;
-
- sc = queue->raidPtr->sc;
-
- rf_printf(3, "DispatchKernelIO %s\n", sc->sc_disk->d_name);
-
- bp = req->bp;
-#if 1
- /* XXX when there is a physical disk failure, someone is passing us a
- * buffer that contains old stuff!! Attempt to deal with this problem
- * without taking a performance hit... (not sure where the real bug
- * is. It's buried in RAIDframe somewhere) :-( GO ) */
-
- if (bp->bio_flags & BIO_ERROR) {
- bp->bio_flags &= ~BIO_ERROR;
- }
- if (bp->bio_error != 0) {
- bp->bio_error = 0;
- }
-#endif
- raidbp = RAIDGETBUF(sc);
-
- raidbp->rf_flags = 0; /* XXX not really used anywhere... */
-
- /*
- * context for raidiodone
- */
- raidbp->rf_obp = bp;
- raidbp->req = req;
-
-#if 0 /* XXX */
- LIST_INIT(&raidbp->rf_buf.b_dep);
-#endif
-
- switch (req->type) {
- case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
- /* XXX need to do something extra here.. */
- /* I'm leaving this in, as I've never actually seen it used,
- * and I'd like folks to report it... GO */
- rf_printf(2, "WAKEUP CALLED\n");
- queue->numOutstanding++;
-
- /* XXX need to glue the original buffer into this? */
-
- KernelWakeupFunc(&raidbp->rf_buf);
- break;
-
- case RF_IO_TYPE_READ:
- case RF_IO_TYPE_WRITE:
-
- if (req->tracerec) {
- RF_ETIMER_START(req->tracerec->timer);
- }
- InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
- op | bp->bio_cmd, queue->rf_cinfo->ci_dev,
- req->sectorOffset, req->numSector,
- req->buf, KernelWakeupFunc, (void *) req,
- queue->raidPtr->logBytesPerSector, req->b_proc);
-
- if (rf_debugKernelAccess) {
- rf_printf(0, "dispatch: bp->bio_pblkno = %ld\n",
- (long) bp->bio_pblkno);
- }
- queue->numOutstanding++;
- queue->last_deq_sector = req->sectorOffset;
- /* acc wouldn't have been let in if there were any pending
- * reqs at any other priority */
- queue->curPriority = req->priority;
-
- rf_printf(3, "Going for %c to %s%d row %d col %d\n",
- req->type, sc->sc_disk->d_name,
- sc->sc_disk->d_unit, queue->row, queue->col);
- rf_printf(3, "sector %d count %d (%d bytes) %d\n",
- (int) req->sectorOffset, (int) req->numSector,
- (int) (req->numSector <<
- queue->raidPtr->logBytesPerSector),
- (int) queue->raidPtr->logBytesPerSector);
-#if 0 /* XXX */
- if ((raidbp->rf_buf.bio_cmd & BIO_READ) == 0) {
- raidbp->rf_buf.b_vp->v_numoutput++;
- }
-#endif
- (*devsw(raidbp->rf_buf.bio_dev)->d_strategy)(&raidbp->rf_buf);
-
- break;
-
- default:
- panic("bad req->type in rf_DispatchKernelIO");
- }
- rf_printf(3, "Exiting from DispatchKernelIO\n");
- /* splx(s); */ /* want to test this */
- return (0);
-}
-/* This is the callback function associated with an I/O invoked from
- kernel code.
- */
-static void
-KernelWakeupFunc(vbp)
- struct bio *vbp;
-{
- RF_DiskQueueData_t *req = NULL;
- RF_DiskQueue_t *queue;
- struct raidbuf *raidbp = (struct raidbuf *) vbp;
- struct bio *bp;
- struct raid_softc *sc;
- int s;
-
- s = splbio();
- rf_printf(2, "recovering the request queue:\n");
- req = raidbp->req;
-
- bp = raidbp->rf_obp;
- queue = (RF_DiskQueue_t *) req->queue;
- sc = queue->raidPtr->sc;
-
- if (raidbp->rf_buf.bio_flags & BIO_ERROR) {
- bp->bio_flags |= BIO_ERROR;
- bp->bio_error = raidbp->rf_buf.bio_error ?
- raidbp->rf_buf.bio_error : EIO;
- }
-
- /* XXX methinks this could be wrong... */
-#if 1
- bp->bio_resid = raidbp->rf_buf.bio_resid;
-#endif
-
- if (req->tracerec) {
- RF_ETIMER_STOP(req->tracerec->timer);
- RF_ETIMER_EVAL(req->tracerec->timer);
- RF_LOCK_MUTEX(rf_tracing_mutex);
- req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
- req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
- req->tracerec->num_phys_ios++;
- RF_UNLOCK_MUTEX(rf_tracing_mutex);
- }
- bp->bio_bcount = raidbp->rf_buf.bio_bcount; /* XXXX ? */
-
- /* XXX Ok, let's get aggressive... If BIO_ERROR is set, let's go
- * ballistic, and mark the component as hosed... */
-
- if (bp->bio_flags & BIO_ERROR) {
- /* Mark the disk as dead */
- /* but only mark it once... */
- if (queue->raidPtr->Disks[queue->row][queue->col].status ==
- rf_ds_optimal) {
- rf_printf(0, "%s%d: IO Error. Marking %s as "
- "failed.\n", sc->sc_disk->d_name, sc->sc_disk->d_unit,
- queue->raidPtr->Disks[queue->row][queue->col].devname);
- queue->raidPtr->Disks[queue->row][queue->col].status =
- rf_ds_failed;
- queue->raidPtr->status[queue->row] = rf_rs_degraded;
- queue->raidPtr->numFailures++;
- queue->raidPtr->numNewFailures++;
- } else { /* Disk is already dead... */
- /* printf("Disk already marked as dead!\n"); */
- }
-
- }
-
- RAIDPUTBUF(sc, raidbp);
-
- rf_DiskIOComplete(queue, req, (bp->bio_flags & BIO_ERROR) ? 1 : 0);
- (req->CompleteFunc)(req->argument, (bp->bio_flags & BIO_ERROR) ? 1 : 0);
-
- splx(s);
-}
-
-
-
-/*
- * initialize a buf structure for doing an I/O in the kernel.
- */
-static void
-InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
- logBytesPerSector, b_proc)
- struct bio *bp;
- struct vnode *b_vp;
- unsigned rw_flag;
- dev_t dev;
- RF_SectorNum_t startSect;
- RF_SectorCount_t numSect;
- caddr_t buf;
- void (*cbFunc) (struct bio *);
- void *cbArg;
- int logBytesPerSector;
- struct proc *b_proc;
-{
- bp->bio_cmd = rw_flag;
- bp->bio_bcount = numSect << logBytesPerSector;
-#if 0 /* XXX */
- bp->bio_bufsize = bp->bio_bcount;
-#endif
- bp->bio_error = 0;
- bp->bio_dev = dev;
- bp->bio_data = buf;
- bp->bio_resid = bp->bio_bcount; /* XXX is this right!?!?!! */
- bp->bio_offset = startSect << logBytesPerSector;
- if (bp->bio_bcount == 0) {
- panic("bp->bio_bcount is zero in InitBP!!\n");
- }
-/*
- bp->b_proc = b_proc;
- bp->b_vp = b_vp;
-*/
- bp->bio_done = cbFunc;
-
-}
-
-static void
-raidgetdefaultlabel(raidPtr, sc, dp)
- RF_Raid_t *raidPtr;
- struct raid_softc *sc;
- struct disk *dp;
-{
- rf_printf(1, "Building a default label...\n");
- if (dp == NULL)
- panic("raidgetdefaultlabel(): dp is NULL\n");
-
- /* fabricate a label... */
- dp->d_mediasize = raidPtr->totalSectors * raidPtr->bytesPerSector;
- dp->d_sectorsize = raidPtr->bytesPerSector;
- dp->d_fwsectors = raidPtr->Layout.dataSectorsPerStripe;
- dp->d_fwheads = 4 * raidPtr->numCol;
-
-}
-/*
- * Lookup the provided name in the filesystem. If the file exists,
- * is a valid block device, and isn't being used by anyone else,
- * set *vpp to the file's vnode.
- * You'll find the original of this in ccd.c
- */
-int
-raidlookup(path, td, vpp)
- char *path;
- struct thread *td;
- struct vnode **vpp; /* result */
-{
- struct nameidata *nd;
- struct vnode *vp;
- struct vattr *va;
- struct proc *p;
- int error = 0, flags;
-
- MALLOC(nd, struct nameidata *, sizeof(struct nameidata), M_TEMP, M_NOWAIT | M_ZERO);
- MALLOC(va, struct vattr *, sizeof(struct vattr), M_TEMP, M_NOWAIT | M_ZERO);
- if ((nd == NULL) || (va == NULL)) {
- printf("Out of memory?\n");
- return (ENOMEM);
- }
-
- /* Sanity check the p_fd fields. This is really just a hack */
- p = td->td_proc;
- if (!p->p_fd->fd_rdir || !p->p_fd->fd_cdir)
- printf("Warning: p_fd fields not set\n");
-
- if (!td->td_proc->p_fd->fd_rdir)
- p->p_fd->fd_rdir = rootvnode;
-
- if (!p->p_fd->fd_cdir)
- p->p_fd->fd_cdir = rootvnode;
-
- NDINIT(nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, curthread);
- flags = FREAD | FWRITE;
- if ((error = vn_open(nd, &flags, 0, -1)) != 0) {
- rf_printf(2, "RAIDframe: vn_open returned %d\n", error);
- goto end1;
- }
- vp = nd->ni_vp;
- if (vp->v_usecount > 1) {
- rf_printf(1, "raidlookup() vp->v_usecount= %d\n", vp->v_usecount);
- error = EBUSY;
- goto end;
- }
- if ((error = VOP_GETATTR(vp, va, td->td_ucred, td)) != 0) {
- rf_printf(1, "raidlookup() VOP_GETATTR returned %d", error);
- goto end;
- }
- /* XXX: eventually we should handle VREG, too. */
- if (va->va_type != VCHR) {
- rf_printf(1, "Returning ENOTBLK\n");
- error = ENOTBLK;
- }
- *vpp = vp;
-
-end:
- VOP_UNLOCK(vp, 0, td);
- NDFREE(nd, NDF_ONLY_PNBUF);
-end1:
- FREE(nd, M_TEMP);
- FREE(va, M_TEMP);
- return (error);
-}
-/*
- * Wait interruptibly for an exclusive lock.
- *
- * XXX
- * Several drivers do this; it should be abstracted and made MP-safe.
- * (Hmm... where have we seen this warning before :-> GO )
- */
-static int
-raidlock(sc)
- struct raid_softc *sc;
-{
- int error;
-
- while ((sc->sc_flags & RAIDF_LOCKED) != 0) {
- sc->sc_flags |= RAIDF_WANTED;
- if ((error =
- tsleep(sc, PRIBIO | PCATCH, "raidlck", 0)) != 0)
- return (error);
- }
- sc->sc_flags |= RAIDF_LOCKED;
- return (0);
-}
-/*
- * Unlock and wake up any waiters.
- */
-static void
-raidunlock(sc)
- struct raid_softc *sc;
-{
-
- sc->sc_flags &= ~RAIDF_LOCKED;
- if ((sc->sc_flags & RAIDF_WANTED) != 0) {
- sc->sc_flags &= ~RAIDF_WANTED;
- wakeup(sc);
- }
-}
-
-
-#define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
-#define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
-
-int
-raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
-{
- RF_ComponentLabel_t *clabel;
-
- MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t),
- M_RAIDFRAME, M_NOWAIT | M_ZERO);
- if (clabel == NULL) {
- printf("raidmarkclean: Out of memory?\n");
- return (ENOMEM);
- }
-
- raidread_component_label(dev, b_vp, clabel);
- clabel->mod_counter = mod_counter;
- clabel->clean = RF_RAID_CLEAN;
- raidwrite_component_label(dev, b_vp, clabel);
- FREE(clabel, M_RAIDFRAME);
- return(0);
-}
-
-
-int
-raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
-{
- RF_ComponentLabel_t *clabel;
-
- MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t),
- M_RAIDFRAME, M_NOWAIT | M_ZERO);
- if (clabel == NULL) {
- printf("raidmarkclean: Out of memory?\n");
- return (ENOMEM);
- }
-
- raidread_component_label(dev, b_vp, clabel);
- clabel->mod_counter = mod_counter;
- clabel->clean = RF_RAID_DIRTY;
- raidwrite_component_label(dev, b_vp, clabel);
- FREE(clabel, M_RAIDFRAME);
- return(0);
-}
-
-/* ARGSUSED */
-int
-raidread_component_label(dev, b_vp, clabel)
- dev_t dev;
- struct vnode *b_vp;
- RF_ComponentLabel_t *clabel;
-{
- struct buf *bp;
- int error;
-
- /* XXX should probably ensure that we don't try to do this if
- someone has changed rf_protected_sectors. */
-
- if (b_vp == NULL) {
- /* For whatever reason, this component is not valid.
- Don't try to read a component label from it. */
- return(EINVAL);
- }
-
- /* get a block of the appropriate size... */
- bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
- bp->b_dev = dev;
-
- /* get our ducks in a row for the read */
- bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
- bp->b_iooffset = RF_COMPONENT_INFO_OFFSET;
- bp->b_bcount = RF_COMPONENT_INFO_SIZE;
- bp->b_iocmd = BIO_READ;
- bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
-
- DEV_STRATEGY(bp);
- error = bufwait(bp);
-
- if (!error) {
- memcpy(clabel, bp->b_data, sizeof(RF_ComponentLabel_t));
-#if 0
- rf_print_component_label( clabel );
-#endif
- } else {
-#if 0
- rf_printf(0, "Failed to read RAID component label!\n");
-#endif
- }
-
- bp->b_flags |= B_INVAL | B_AGE;
- brelse(bp);
- return(error);
-}
-/* ARGSUSED */
-int
-raidwrite_component_label(dev, b_vp, clabel)
- dev_t dev;
- struct vnode *b_vp;
- RF_ComponentLabel_t *clabel;
-{
- struct buf *bp;
- int error;
-
- /* get a block of the appropriate size... */
- bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
- bp->b_dev = dev;
-
- /* get our ducks in a row for the write */
- bp->b_flags = 0;
- bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
- bp->b_iooffset = RF_COMPONENT_INFO_OFFSET;
- bp->b_bcount = RF_COMPONENT_INFO_SIZE;
- bp->b_iocmd = BIO_WRITE;
- bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
-
- memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
-
- memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
-
- DEV_STRATEGY(bp);
- error = bufwait(bp);
-
- bp->b_flags |= B_INVAL | B_AGE;
- brelse(bp);
- if (error) {
-#if 1
- rf_printf(0, "Failed to write RAID component info!\n");
- rf_printf(0, "b_error= %d\n", bp->b_error);
-#endif
- }
-
- return(error);
-}
-
-void
-rf_markalldirty(raidPtr)
- RF_Raid_t *raidPtr;
-{
- RF_ComponentLabel_t *clabel;
- int r,c;
-
- MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t),
- M_RAIDFRAME, M_NOWAIT | M_ZERO);
-
- if (clabel == NULL) {
- printf("rf_markalldirty: Out of memory?\n");
- return;
- }
-
- raidPtr->mod_counter++;
- for (r = 0; r < raidPtr->numRow; r++) {
- for (c = 0; c < raidPtr->numCol; c++) {
- /* we don't want to touch (at all) a disk that has
- failed */
- if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
- raidread_component_label(
- raidPtr->Disks[r][c].dev,
- raidPtr->raid_cinfo[r][c].ci_vp,
- clabel);
- if (clabel->status == rf_ds_spared) {
- /* XXX do something special...
- but whatever you do, don't
- try to access it!! */
- } else {
-#if 0
- clabel->status =
- raidPtr->Disks[r][c].status;
- raidwrite_component_label(
- raidPtr->Disks[r][c].dev,
- raidPtr->raid_cinfo[r][c].ci_vp,
- clabel);
-#endif
- raidmarkdirty(
- raidPtr->Disks[r][c].dev,
- raidPtr->raid_cinfo[r][c].ci_vp,
- raidPtr->mod_counter);
- }
- }
- }
- }
- /* printf("Component labels marked dirty.\n"); */
-#if 0
- for( c = 0; c < raidPtr->numSpare ; c++) {
- sparecol = raidPtr->numCol + c;
- if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
- /*
-
- XXX this is where we get fancy and map this spare
- into it's correct spot in the array.
-
- */
- /*
-
- we claim this disk is "optimal" if it's
- rf_ds_used_spare, as that means it should be
- directly substitutable for the disk it replaced.
- We note that too...
-
- */
-
- for(i=0;i<raidPtr->numRow;i++) {
- for(j=0;j<raidPtr->numCol;j++) {
- if ((raidPtr->Disks[i][j].spareRow ==
- r) &&
- (raidPtr->Disks[i][j].spareCol ==
- sparecol)) {
- srow = r;
- scol = sparecol;
- break;
- }
- }
- }
-
- raidread_component_label(
- raidPtr->Disks[r][sparecol].dev,
- raidPtr->raid_cinfo[r][sparecol].ci_vp,
- &clabel);
- /* make sure status is noted */
- clabel.version = RF_COMPONENT_LABEL_VERSION;
- clabel.mod_counter = raidPtr->mod_counter;
- clabel.serial_number = raidPtr->serial_number;
- clabel.row = srow;
- clabel.column = scol;
- clabel.num_rows = raidPtr->numRow;
- clabel.num_columns = raidPtr->numCol;
- clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
- clabel.status = rf_ds_optimal;
- raidwrite_component_label(
- raidPtr->Disks[r][sparecol].dev,
- raidPtr->raid_cinfo[r][sparecol].ci_vp,
- &clabel);
- raidmarkclean( raidPtr->Disks[r][sparecol].dev,
- raidPtr->raid_cinfo[r][sparecol].ci_vp);
- }
- }
-
-#endif
- FREE(clabel, M_RAIDFRAME);
-}
-
-
-void
-rf_update_component_labels(raidPtr, final)
- RF_Raid_t *raidPtr;
- int final;
-{
- RF_ComponentLabel_t *clabel;
- int sparecol;
- int r,c;
- int i,j;
- int srow, scol;
-
- srow = -1;
- scol = -1;
-
- MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t),
- M_RAIDFRAME, M_NOWAIT | M_ZERO);
- if (clabel == NULL) {
- printf("rf_update_component_labels: Out of memory?\n");
- return;
- }
-
- /* XXX should do extra checks to make sure things really are clean,
- rather than blindly setting the clean bit... */
-
- raidPtr->mod_counter++;
-
- for (r = 0; r < raidPtr->numRow; r++) {
- for (c = 0; c < raidPtr->numCol; c++) {
- if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
- raidread_component_label(
- raidPtr->Disks[r][c].dev,
- raidPtr->raid_cinfo[r][c].ci_vp,
- clabel);
- /* make sure status is noted */
- clabel->status = rf_ds_optimal;
- /* bump the counter */
- clabel->mod_counter = raidPtr->mod_counter;
-
- raidwrite_component_label(
- raidPtr->Disks[r][c].dev,
- raidPtr->raid_cinfo[r][c].ci_vp,
- clabel);
- if (final == RF_FINAL_COMPONENT_UPDATE) {
- if (raidPtr->parity_good == RF_RAID_CLEAN) {
- raidmarkclean(
- raidPtr->Disks[r][c].dev,
- raidPtr->raid_cinfo[r][c].ci_vp,
- raidPtr->mod_counter);
- }
- }
- }
- /* else we don't touch it.. */
- }
- }
-
- for( c = 0; c < raidPtr->numSpare ; c++) {
- sparecol = raidPtr->numCol + c;
- if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
- /*
-
- we claim this disk is "optimal" if it's
- rf_ds_used_spare, as that means it should be
- directly substitutable for the disk it replaced.
- We note that too...
-
- */
-
- for(i=0;i<raidPtr->numRow;i++) {
- for(j=0;j<raidPtr->numCol;j++) {
- if ((raidPtr->Disks[i][j].spareRow ==
- 0) &&
- (raidPtr->Disks[i][j].spareCol ==
- sparecol)) {
- srow = i;
- scol = j;
- break;
- }
- }
- }
-
- /* XXX shouldn't *really* need this... */
- raidread_component_label(
- raidPtr->Disks[0][sparecol].dev,
- raidPtr->raid_cinfo[0][sparecol].ci_vp,
- clabel);
- /* make sure status is noted */
-
- raid_init_component_label(raidPtr, clabel);
-
- clabel->mod_counter = raidPtr->mod_counter;
- clabel->row = srow;
- clabel->column = scol;
- clabel->status = rf_ds_optimal;
-
- raidwrite_component_label(
- raidPtr->Disks[0][sparecol].dev,
- raidPtr->raid_cinfo[0][sparecol].ci_vp,
- clabel);
- if (final == RF_FINAL_COMPONENT_UPDATE) {
- if (raidPtr->parity_good == RF_RAID_CLEAN) {
- raidmarkclean( raidPtr->Disks[0][sparecol].dev,
- raidPtr->raid_cinfo[0][sparecol].ci_vp,
- raidPtr->mod_counter);
- }
- }
- }
- }
- FREE(clabel, M_RAIDFRAME);
- rf_printf(1, "Component labels updated\n");
-}
-
-void
-rf_close_component(raidPtr, vp, auto_configured)
- RF_Raid_t *raidPtr;
- struct vnode *vp;
- int auto_configured;
-{
- struct thread *td;
-
- td = raidPtr->engine_thread;
-
- if (vp != NULL) {
- if (auto_configured == 1) {
- VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td);
-
- vrele(vp);
- } else {
- vn_close(vp, FREAD | FWRITE, td->td_ucred, td);
- }
- } else {
- rf_printf(1, "vnode was NULL\n");
- }
-}
-
-
-void
-rf_UnconfigureVnodes(raidPtr)
- RF_Raid_t *raidPtr;
-{
- int r,c;
- struct thread *td;
- struct vnode *vp;
- int acd;
-
-
- /* We take this opportunity to close the vnodes like we should.. */
-
- td = raidPtr->engine_thread;
-
- for (r = 0; r < raidPtr->numRow; r++) {
- for (c = 0; c < raidPtr->numCol; c++) {
- rf_printf(1, "Closing vnode for row: %d col: %d\n", r, c);
- vp = raidPtr->raid_cinfo[r][c].ci_vp;
- acd = raidPtr->Disks[r][c].auto_configured;
- rf_close_component(raidPtr, vp, acd);
- raidPtr->raid_cinfo[r][c].ci_vp = NULL;
- raidPtr->Disks[r][c].auto_configured = 0;
- }
- }
- for (r = 0; r < raidPtr->numSpare; r++) {
- rf_printf(1, "Closing vnode for spare: %d\n", r);
- vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
- acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
- rf_close_component(raidPtr, vp, acd);
- raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
- raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
- }
-}
-
-
-void
-rf_ReconThread(req)
- struct rf_recon_req *req;
-{
- RF_Raid_t *raidPtr;
-
- mtx_lock(&Giant);
- raidPtr = (RF_Raid_t *) req->raidPtr;
- raidPtr->recon_in_progress = 1;
-
- rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
- ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
-
- /* XXX get rid of this! we don't need it at all.. */
- RF_Free(req, sizeof(*req));
-
- raidPtr->recon_in_progress = 0;
-
- /* That's all... */
- RF_THREAD_EXIT(0); /* does not return */
-}
-
-void
-rf_RewriteParityThread(raidPtr)
- RF_Raid_t *raidPtr;
-{
- int retcode;
-
- mtx_lock(&Giant);
- raidPtr->parity_rewrite_in_progress = 1;
- retcode = rf_RewriteParity(raidPtr);
- if (retcode) {
- rf_printf(0, "raid%d: Error re-writing parity!\n",raidPtr->raidid);
- } else {
- /* set the clean bit! If we shutdown correctly,
- the clean bit on each component label will get
- set */
- raidPtr->parity_good = RF_RAID_CLEAN;
- }
- raidPtr->parity_rewrite_in_progress = 0;
-
- /* Anyone waiting for us to stop? If so, inform them... */
- if (raidPtr->waitShutdown) {
- wakeup(&raidPtr->parity_rewrite_in_progress);
- }
-
- /* That's all... */
- RF_THREAD_EXIT(0); /* does not return */
-}
-
-
-void
-rf_CopybackThread(raidPtr)
- RF_Raid_t *raidPtr;
-{
- mtx_lock(&Giant);
- raidPtr->copyback_in_progress = 1;
- rf_CopybackReconstructedData(raidPtr);
- raidPtr->copyback_in_progress = 0;
-
- /* That's all... */
- RF_THREAD_EXIT(0); /* does not return */
-}
-
-
-void
-rf_ReconstructInPlaceThread(req)
- struct rf_recon_req *req;
-{
- int retcode;
- RF_Raid_t *raidPtr;
-
- mtx_lock(&Giant);
- raidPtr = req->raidPtr;
- raidPtr->recon_in_progress = 1;
- retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
- RF_Free(req, sizeof(*req));
- raidPtr->recon_in_progress = 0;
-
- /* That's all... */
- RF_THREAD_EXIT(0); /* does not return */
-}
-
-RF_AutoConfig_t *
-rf_find_raid_components()
-{
- RF_AutoConfig_t *ac_list = NULL;
-#if 0 /* XXX GEOM */
- struct vnode *vp;
- struct disklabel *label;
- struct diskslice *slice;
- struct diskslices *slices;
- struct disk *disk;
- struct thread *td;
- dev_t dev;
- char *devname;
- int error, j;
- int nslices;
-
- td = curthread;
-
- MALLOC(label, struct disklabel *, sizeof(struct disklabel),
- M_RAIDFRAME, M_NOWAIT|M_ZERO);
- MALLOC(slices, struct diskslices *, sizeof(struct diskslices),
- M_RAIDFRAME, M_NOWAIT|M_ZERO);
- if ((label == NULL) || (slices == NULL)) {
- printf("rf_find_raid_components: Out of Memory?\n");
- return (NULL);
- }
-
- /* initialize the AutoConfig list */
- ac_list = NULL;
-
- /* we begin by trolling through *all* the disk devices on the system */
-
- disk = NULL;
- while ((disk = disk_enumerate(disk))) {
-
- /* we don't care about floppies... */
- devname = disk->d_dev->si_name;
- if (!strncmp(devname, "fd", 2) ||
- !strncmp(devname, "cd", 2) ||
- !strncmp(devname, "acd", 3))
- continue;
-
- rf_printf(1, "Examining %s\n", disk->d_dev->si_name);
- if (bdevvp(disk->d_dev, &vp))
- panic("RAIDframe can't alloc vnode");
- vref(vp);
-
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
- error = VOP_OPEN(vp, FREAD, td->td_ucred, td, -1);
- VOP_UNLOCK(vp, 0, td);
- if (error) {
- vput(vp);
- continue;
- }
-
- error = VOP_IOCTL(vp, DIOCGSLICEINFO, (caddr_t)slices,
- FREAD, td->td_ucred, td);
- VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td);
- vrele(vp);
- if (error) {
- /* No slice table. */
- continue;
- }
-
- nslices = slices->dss_nslices;
- if ((nslices == 0) || (nslices > MAX_SLICES))
- continue;
-
- /* Iterate through the slices */
- for (j = 1; j < nslices; j++) {
-
- rf_printf(1, "Examining slice %d\n", j);
- slice = &slices->dss_slices[j - 1];
- dev = dkmodslice(disk->d_dev, j);
- if (bdevvp(dev, &vp))
- panic("RAIDframe can't alloc vnode");
-
- vref(vp);
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
- error = VOP_OPEN(vp, FREAD, td->td_ucred, td, -1);
- VOP_UNLOCK(vp, 0, td);
- if (error) {
- continue;
- }
-
- error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)label,
- FREAD, td->td_ucred, td);
- VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td);
- vrele(vp);
- if (error)
- continue;
-
- rf_search_label(dev, label, &ac_list);
- }
- }
-
- FREE(label, M_RAIDFRAME);
- FREE(slices, M_RAIDFRAME);
-#endif
- return (ac_list);
-}
-
-static void
-rf_search_label(dev_t dev, struct disklabel *label, RF_AutoConfig_t **ac_list)
-{
- RF_AutoConfig_t *ac;
- RF_ComponentLabel_t *clabel;
- struct vnode *vp;
- struct thread *td;
- dev_t dev1;
- int i, error, good_one;
-
- td = curthread;
-
- /* Iterate through the partitions */
- for (i=0; i < label->d_npartitions; i++) {
- /* We only support partitions marked as RAID */
- if (label->d_partitions[i].p_fstype != FS_RAID)
- continue;
-
-#if 0 /* GEOM */
- dev1 = dkmodpart(dev, i);
-#else
- dev1 = NULL;
-#endif
- if (dev1 == NULL) {
- rf_printf(1, "dev1 == null\n");
- continue;
- }
- if (bdevvp(dev1, &vp))
- panic("RAIDframe can't alloc vnode");
-
- vref(vp);
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
- error = VOP_OPEN(vp, FREAD, td->td_ucred, td, -1);
- VOP_UNLOCK(vp, 0, td);
- if (error) {
- /* Whatever... */
- continue;
- }
-
- good_one = 0;
-
- clabel = (RF_ComponentLabel_t *)
- malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME,
- M_NOWAIT);
- if (clabel == NULL) {
- /* XXX CLEANUP HERE */
- panic("RAID autoconfig: no memory!\n");
- }
-
- if (!raidread_component_label(dev1, vp, clabel)) {
- /* Got the label. Is it reasonable? */
- if (rf_reasonable_label(clabel) &&
- (clabel->partitionSize <=
- label->d_partitions[i].p_size)) {
- rf_printf(1, "Component on: %s: %d\n",
- dev1->si_name, label->d_partitions[i].p_size);
- rf_print_component_label(clabel);
- /* if it's reasonable, add it, else ignore it */
- ac = (RF_AutoConfig_t *)
- malloc(sizeof(RF_AutoConfig_t),
- M_RAIDFRAME, M_NOWAIT);
- if (ac == NULL) {
- /* XXX should panic? */
- panic("RAID autoconfig: no memory!\n");
- }
-
- sprintf(ac->devname, "%s", dev->si_name);
- ac->dev = dev1;
- ac->vp = vp;
- ac->clabel = clabel;
- ac->next = *ac_list;
- *ac_list = ac;
- good_one = 1;
- }
- }
- if (!good_one) {
- /* cleanup */
- free(clabel, M_RAIDFRAME);
- VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td);
- vrele(vp);
- }
- }
-}
-
-static int
-rf_reasonable_label(clabel)
- RF_ComponentLabel_t *clabel;
-{
-
- if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
- (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
- ((clabel->clean == RF_RAID_CLEAN) ||
- (clabel->clean == RF_RAID_DIRTY)) &&
- clabel->row >=0 &&
- clabel->column >= 0 &&
- clabel->num_rows > 0 &&
- clabel->num_columns > 0 &&
- clabel->row < clabel->num_rows &&
- clabel->column < clabel->num_columns &&
- clabel->blockSize > 0 &&
- clabel->numBlocks > 0) {
- /* label looks reasonable enough... */
- return(1);
- }
- return(0);
-}
-
-
-void
-rf_print_component_label(clabel)
- RF_ComponentLabel_t *clabel;
-{
- rf_printf(1, " Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
- clabel->row, clabel->column,
- clabel->num_rows, clabel->num_columns);
- rf_printf(1, " Version: %d Serial Number: %d Mod Counter: %d\n",
- clabel->version, clabel->serial_number,
- clabel->mod_counter);
- rf_printf(1, " Clean: %s Status: %d\n",
- clabel->clean ? "Yes" : "No", clabel->status );
- rf_printf(1, " sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
- clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
- rf_printf(1, " RAID Level: %c blocksize: %d numBlocks: %d\n",
- (char) clabel->parityConfig, clabel->blockSize,
- clabel->numBlocks);
- rf_printf(1, " Autoconfig: %s\n", clabel->autoconfigure ? "Yes":"No");
- rf_printf(1, " Contains root partition: %s\n",
- clabel->root_partition ? "Yes" : "No" );
- rf_printf(1, " Last configured as: raid%d\n", clabel->last_unit );
-#if 0
- rf_printf(1, " Config order: %d\n", clabel->config_order);
-#endif
-
-}
-
-RF_ConfigSet_t *
-rf_create_auto_sets(ac_list)
- RF_AutoConfig_t *ac_list;
-{
- RF_AutoConfig_t *ac;
- RF_ConfigSet_t *config_sets;
- RF_ConfigSet_t *cset;
- RF_AutoConfig_t *ac_next;
-
-
- config_sets = NULL;
-
- /* Go through the AutoConfig list, and figure out which components
- belong to what sets. */
- ac = ac_list;
- while(ac!=NULL) {
- /* we're going to putz with ac->next, so save it here
- for use at the end of the loop */
- ac_next = ac->next;
-
- if (config_sets == NULL) {
- /* will need at least this one... */
- config_sets = (RF_ConfigSet_t *)
- malloc(sizeof(RF_ConfigSet_t),
- M_RAIDFRAME, M_NOWAIT);
- if (config_sets == NULL) {
- panic("rf_create_auto_sets: No memory!\n");
- }
- /* this one is easy :) */
- config_sets->ac = ac;
- config_sets->next = NULL;
- config_sets->rootable = 0;
- ac->next = NULL;
- } else {
- /* which set does this component fit into? */
- cset = config_sets;
- while(cset!=NULL) {
- if (rf_does_it_fit(cset, ac)) {
- /* looks like it matches... */
- ac->next = cset->ac;
- cset->ac = ac;
- break;
- }
- cset = cset->next;
- }
- if (cset==NULL) {
- /* didn't find a match above... new set..*/
- cset = (RF_ConfigSet_t *)
- malloc(sizeof(RF_ConfigSet_t),
- M_RAIDFRAME, M_NOWAIT);
- if (cset == NULL) {
- panic("rf_create_auto_sets: No memory!\n");
- }
- cset->ac = ac;
- ac->next = NULL;
- cset->next = config_sets;
- cset->rootable = 0;
- config_sets = cset;
- }
- }
- ac = ac_next;
- }
-
-
- return(config_sets);
-}
-
-static int
-rf_does_it_fit(cset, ac)
- RF_ConfigSet_t *cset;
- RF_AutoConfig_t *ac;
-{
- RF_ComponentLabel_t *clabel1, *clabel2;
-
- /* If this one matches the *first* one in the set, that's good
- enough, since the other members of the set would have been
- through here too... */
- /* note that we are not checking partitionSize here..
-
- Note that we are also not checking the mod_counters here.
- If everything else matches execpt the mod_counter, that's
- good enough for this test. We will deal with the mod_counters
- a little later in the autoconfiguration process.
-
- (clabel1->mod_counter == clabel2->mod_counter) &&
-
- The reason we don't check for this is that failed disks
- will have lower modification counts. If those disks are
- not added to the set they used to belong to, then they will
- form their own set, which may result in 2 different sets,
- for example, competing to be configured at raid0, and
- perhaps competing to be the root filesystem set. If the
- wrong ones get configured, or both attempt to become /,
- weird behaviour and or serious lossage will occur. Thus we
- need to bring them into the fold here, and kick them out at
- a later point.
-
- */
-
- clabel1 = cset->ac->clabel;
- clabel2 = ac->clabel;
- if ((clabel1->version == clabel2->version) &&
- (clabel1->serial_number == clabel2->serial_number) &&
- (clabel1->num_rows == clabel2->num_rows) &&
- (clabel1->num_columns == clabel2->num_columns) &&
- (clabel1->sectPerSU == clabel2->sectPerSU) &&
- (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
- (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
- (clabel1->parityConfig == clabel2->parityConfig) &&
- (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
- (clabel1->blockSize == clabel2->blockSize) &&
- (clabel1->numBlocks == clabel2->numBlocks) &&
- (clabel1->autoconfigure == clabel2->autoconfigure) &&
- (clabel1->root_partition == clabel2->root_partition) &&
- (clabel1->last_unit == clabel2->last_unit) &&
- (clabel1->config_order == clabel2->config_order)) {
- /* if it get's here, it almost *has* to be a match */
- } else {
- /* it's not consistent with somebody in the set..
- punt */
- return(0);
- }
- /* all was fine.. it must fit... */
- return(1);
-}
-
-int
-rf_have_enough_components(cset)
- RF_ConfigSet_t *cset;
-{
- RF_AutoConfig_t *ac;
- RF_AutoConfig_t *auto_config;
- RF_ComponentLabel_t *clabel;
- int r,c;
- int num_rows;
- int num_cols;
- int num_missing;
- int mod_counter;
- int mod_counter_found;
- int even_pair_failed;
- char parity_type;
-
-
- /* check to see that we have enough 'live' components
- of this set. If so, we can configure it if necessary */
-
- num_rows = cset->ac->clabel->num_rows;
- num_cols = cset->ac->clabel->num_columns;
- parity_type = cset->ac->clabel->parityConfig;
-
- /* XXX Check for duplicate components!?!?!? */
-
- /* Determine what the mod_counter is supposed to be for this set. */
-
- mod_counter_found = 0;
- mod_counter = 0;
- ac = cset->ac;
- while(ac!=NULL) {
- if (mod_counter_found==0) {
- mod_counter = ac->clabel->mod_counter;
- mod_counter_found = 1;
- } else {
- if (ac->clabel->mod_counter > mod_counter) {
- mod_counter = ac->clabel->mod_counter;
- }
- }
- ac = ac->next;
- }
-
- num_missing = 0;
- auto_config = cset->ac;
-
- for(r=0; r<num_rows; r++) {
- even_pair_failed = 0;
- for(c=0; c<num_cols; c++) {
- ac = auto_config;
- while(ac!=NULL) {
- if ((ac->clabel->row == r) &&
- (ac->clabel->column == c) &&
- (ac->clabel->mod_counter == mod_counter)) {
- /* it's this one... */
- rf_printf(1, "Found: %s at %d,%d\n",
- ac->devname,r,c);
- break;
- }
- ac=ac->next;
- }
- if (ac==NULL) {
- /* Didn't find one here! */
- /* special case for RAID 1, especially
- where there are more than 2
- components (where RAIDframe treats
- things a little differently :( ) */
- if (parity_type == '1') {
- if (c%2 == 0) { /* even component */
- even_pair_failed = 1;
- } else { /* odd component. If
- we're failed, and
- so is the even
- component, it's
- "Good Night, Charlie" */
- if (even_pair_failed == 1) {
- return(0);
- }
- }
- } else {
- /* normal accounting */
- num_missing++;
- }
- }
- if ((parity_type == '1') && (c%2 == 1)) {
- /* Just did an even component, and we didn't
- bail.. reset the even_pair_failed flag,
- and go on to the next component.... */
- even_pair_failed = 0;
- }
- }
- }
-
- clabel = cset->ac->clabel;
-
- if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
- ((clabel->parityConfig == '4') && (num_missing > 1)) ||
- ((clabel->parityConfig == '5') && (num_missing > 1))) {
- /* XXX this needs to be made *much* more general */
- /* Too many failures */
- return(0);
- }
- /* otherwise, all is well, and we've got enough to take a kick
- at autoconfiguring this set */
- return(1);
-}
-
-void
-rf_create_configuration(ac,config,raidPtr)
- RF_AutoConfig_t *ac;
- RF_Config_t *config;
- RF_Raid_t *raidPtr;
-{
- RF_ComponentLabel_t *clabel;
- int i;
-
- clabel = ac->clabel;
-
- /* 1. Fill in the common stuff */
- config->numRow = clabel->num_rows;
- config->numCol = clabel->num_columns;
- config->numSpare = 0; /* XXX should this be set here? */
- config->sectPerSU = clabel->sectPerSU;
- config->SUsPerPU = clabel->SUsPerPU;
- config->SUsPerRU = clabel->SUsPerRU;
- config->parityConfig = clabel->parityConfig;
- /* XXX... */
- strcpy(config->diskQueueType,"fifo");
- config->maxOutstandingDiskReqs = clabel->maxOutstanding;
- config->layoutSpecificSize = 0; /* XXX ? */
-
- while(ac!=NULL) {
- /* row/col values will be in range due to the checks
- in reasonable_label() */
- strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
- ac->devname);
- ac = ac->next;
- }
-
- for(i=0;i<RF_MAXDBGV;i++) {
- config->debugVars[i][0] = '\0';
- }
-}
-
-int
-rf_set_autoconfig(raidPtr, new_value)
- RF_Raid_t *raidPtr;
- int new_value;
-{
- RF_ComponentLabel_t *clabel;
- struct vnode *vp;
- dev_t dev;
- int row, column;
-
- MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t),
- M_RAIDFRAME, M_WAITOK | M_ZERO);
-
- raidPtr->autoconfigure = new_value;
- for(row=0; row<raidPtr->numRow; row++) {
- for(column=0; column<raidPtr->numCol; column++) {
- if (raidPtr->Disks[row][column].status ==
- rf_ds_optimal) {
- dev = raidPtr->Disks[row][column].dev;
- vp = raidPtr->raid_cinfo[row][column].ci_vp;
- raidread_component_label(dev, vp, clabel);
- clabel->autoconfigure = new_value;
- raidwrite_component_label(dev, vp, clabel);
- }
- }
- }
- FREE(clabel, M_RAIDFRAME);
- return(new_value);
-}
-
-int
-rf_set_rootpartition(raidPtr, new_value)
- RF_Raid_t *raidPtr;
- int new_value;
-{
- RF_ComponentLabel_t *clabel;
- struct vnode *vp;
- dev_t dev;
- int row, column;
-
- MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t),
- M_RAIDFRAME, M_WAITOK | M_ZERO);
-
- raidPtr->root_partition = new_value;
- for(row=0; row<raidPtr->numRow; row++) {
- for(column=0; column<raidPtr->numCol; column++) {
- if (raidPtr->Disks[row][column].status ==
- rf_ds_optimal) {
- dev = raidPtr->Disks[row][column].dev;
- vp = raidPtr->raid_cinfo[row][column].ci_vp;
- raidread_component_label(dev, vp, clabel);
- clabel->root_partition = new_value;
- raidwrite_component_label(dev, vp, clabel);
- }
- }
- }
- FREE(clabel, M_RAIDFRAME);
- return(new_value);
-}
-
-void
-rf_release_all_vps(cset)
- RF_ConfigSet_t *cset;
-{
- RF_AutoConfig_t *ac;
- struct thread *td;
-
- td = curthread;
- ac = cset->ac;
- while(ac!=NULL) {
- /* Close the vp, and give it back */
- if (ac->vp) {
- VOP_CLOSE(ac->vp, FREAD, td->td_ucred, td);
- vrele(ac->vp);
- ac->vp = NULL;
- }
- ac = ac->next;
- }
-}
-
-
-void
-rf_cleanup_config_set(cset)
- RF_ConfigSet_t *cset;
-{
- RF_AutoConfig_t *ac;
- RF_AutoConfig_t *next_ac;
-
- ac = cset->ac;
- while(ac!=NULL) {
- next_ac = ac->next;
- /* nuke the label */
- free(ac->clabel, M_RAIDFRAME);
- /* cleanup the config structure */
- free(ac, M_RAIDFRAME);
- /* "next.." */
- ac = next_ac;
- }
- /* and, finally, nuke the config set */
- free(cset, M_RAIDFRAME);
-}
-
-
-void
-raid_init_component_label(raidPtr, clabel)
- RF_Raid_t *raidPtr;
- RF_ComponentLabel_t *clabel;
-{
- /* current version number */
- clabel->version = RF_COMPONENT_LABEL_VERSION;
- clabel->serial_number = raidPtr->serial_number;
- clabel->mod_counter = raidPtr->mod_counter;
- clabel->num_rows = raidPtr->numRow;
- clabel->num_columns = raidPtr->numCol;
- clabel->clean = RF_RAID_DIRTY; /* not clean */
- clabel->status = rf_ds_optimal; /* "It's good!" */
-
- clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
- clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
- clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
-
- clabel->blockSize = raidPtr->bytesPerSector;
- clabel->numBlocks = raidPtr->sectorsPerDisk;
-
- /* XXX not portable */
- clabel->parityConfig = raidPtr->Layout.map->parityConfig;
- clabel->maxOutstanding = raidPtr->maxOutstanding;
- clabel->autoconfigure = raidPtr->autoconfigure;
- clabel->root_partition = raidPtr->root_partition;
- clabel->last_unit = raidPtr->raidid;
- clabel->config_order = raidPtr->config_order;
-}
-
-int
-rf_auto_config_set(cset, unit, parent_sc)
- RF_ConfigSet_t *cset;
- int *unit;
- struct raidctl_softc *parent_sc;
-{
- int retcode = 0;
- RF_Raid_t *raidPtr;
- RF_Config_t *config;
- int raidID;
-
- rf_printf(0, "RAIDframe autoconfigure\n");
-
- *unit = -1;
-
- /* 1. Create a config structure */
-
- config = (RF_Config_t *)malloc(sizeof(RF_Config_t), M_RAIDFRAME,
- M_NOWAIT|M_ZERO);
- if (config==NULL) {
- rf_printf(0, "Out of mem at rf_auto_config_set\n");
- /* XXX do something more intelligent here. */
- return(1);
- }
-
- /* XXX raidID needs to be set correctly.. */
-
- /*
- 2. Figure out what RAID ID this one is supposed to live at
- See if we can get the same RAID dev that it was configured
- on last time..
- */
-
- raidID = cset->ac->clabel->last_unit;
- if (raidID < 0) {
- /* let's not wander off into lala land. */
- raidID = raidgetunit(parent_sc, 0);
- } else {
- raidID = raidgetunit(parent_sc, raidID);
- }
-
- if (raidID < 0) {
- /* punt... */
- rf_printf(0, "Unable to auto configure this set!\n");
- rf_printf(1, "Out of RAID devs!\n");
- return(1);
- }
- rf_printf(0, "Configuring raid%d:\n",raidID);
- RF_Malloc(raidPtr, sizeof(*raidPtr), (RF_Raid_t *));
- if (raidPtr == NULL) {
- rf_printf(0, "Out of mem at rf_auto_config_set\n");
- return (1);
- }
- bzero((char *)raidPtr, sizeof(RF_Raid_t));
-
- /* XXX all this stuff should be done SOMEWHERE ELSE! */
- raidPtr->raidid = raidID;
- raidPtr->openings = RAIDOUTSTANDING;
-
- /* 3. Build the configuration structure */
- rf_create_configuration(cset->ac, config, raidPtr);
-
- /* 4. Do the configuration */
- retcode = rf_Configure(raidPtr, config, cset->ac);
-
- if (retcode == 0) {
-
- parent_sc->sc_raiddevs[raidID] = raidinit(raidPtr);
- if (parent_sc->sc_raiddevs[raidID] == NULL) {
- rf_printf(0, "Could not create RAID device\n");
- RF_Free(raidPtr, sizeof(RF_Raid_t));
- free(config, M_RAIDFRAME);
- return (1);
- }
-
- parent_sc->sc_numraid++;
- ((struct raid_softc *)raidPtr->sc)->sc_parent_dev =
- parent_sc->sc_dev;
- rf_markalldirty(raidPtr);
- raidPtr->autoconfigure = 1; /* XXX do this here? */
- if (cset->ac->clabel->root_partition==1) {
- /* everything configured just fine. Make a note
- that this set is eligible to be root. */
- cset->rootable = 1;
- /* XXX do this here? */
- raidPtr->root_partition = 1;
- }
- }
-
- /* 5. Cleanup */
- free(config, M_RAIDFRAME);
-
- *unit = raidID;
- return(retcode);
-}
-
-void
-rf_disk_unbusy(desc)
- RF_RaidAccessDesc_t *desc;
-{
- struct raid_softc *sc;
- struct bio *bp;
-
- sc = desc->raidPtr->sc;
- bp = (struct bio *)desc->bp;
-}
-
-/*
- * Get the next available unit number from the bitmap. You can also request
- * a particular unit number by passing it in the second arg. If it's not
- * available, then grab the next free one. Return -1 if none are available.
- */
-static int
-raidgetunit(struct raidctl_softc *parent_sc, int id)
-{
- int i;
-
- if (id >= RF_MAX_ARRAYS)
- return (-1);
-
- for (i = id; i < RF_MAX_ARRAYS; i++) {
- if (parent_sc->sc_raiddevs[i] == NULL)
- return (i);
- }
-
- if (id != 0) {
- for (i = 0; i < id; i++) {
- if (parent_sc->sc_raiddevs[i] == NULL)
- return (i);
- }
- }
-
- return (-1);
-}
-
-static int
-raidshutdown(void)
-{
- struct raidctl_softc *parent_sc;
- int i, error = 0;
-
- parent_sc = raidctl_dev->si_drv1;
-
- if (parent_sc->sc_numraid != 0) {
-#if XXX_KTHREAD_EXIT_RACE
- return (EBUSY);
-#else
- for (i = 0; i < RF_MAX_ARRAYS; i++) {
- if (parent_sc->sc_raiddevs[i] != NULL) {
- rf_printf(0, "Shutting down raid%d\n", i);
- error = raidctlioctl(raidctl_dev,
- RAIDFRAME_SHUTDOWN, (caddr_t)&i, 0, NULL);
- if (error)
- return (error);
- if (parent_sc->sc_numraid == 0)
- break;
- }
- }
-#endif
- }
-
- destroy_dev(raidctl_dev);
-
- return (error);
-}
-
-int
-raid_getcomponentsize(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col)
-{
- struct vnode *vp;
- struct vattr va;
- RF_Thread_t td;
- off_t mediasize;
- u_int secsize;
- int retcode;
-
- td = raidPtr->engine_thread;
-
- retcode = raidlookup(raidPtr->Disks[row][col].devname, td, &vp);
-
- if (retcode) {
- printf("raid%d: rebuilding: raidlookup on device: %s failed: %d!\n",raidPtr->raidid,
- raidPtr->Disks[row][col].devname, retcode);
-
- /* XXX the component isn't responding properly...
- must be still dead :-( */
- raidPtr->reconInProgress--;
- return(retcode);
-
- } else {
-
- /* Ok, so we can at least do a lookup...
- How about actually getting a vp for it? */
-
- if ((retcode = VOP_GETATTR(vp, &va, rf_getucred(td),
- td)) != 0) {
- raidPtr->reconInProgress--;
- return(retcode);
- }
-
- retcode = VOP_IOCTL(vp, DIOCGSECTORSIZE, (caddr_t)&secsize,
- FREAD, rf_getucred(td), td);
- if (retcode)
- return (retcode);
- raidPtr->Disks[row][col].blockSize = secsize;
-
- retcode = VOP_IOCTL(vp, DIOCGMEDIASIZE, (caddr_t)&mediasize,
- FREAD, rf_getucred(td), td);
- if (retcode)
- return (retcode);
- raidPtr->Disks[row][col].numBlocks = mediasize / secsize;
-
- raidPtr->raid_cinfo[row][col].ci_vp = vp;
- raidPtr->raid_cinfo[row][col].ci_dev = udev2dev(va.va_rdev);
- raidPtr->Disks[row][col].dev = udev2dev(va.va_rdev);
-
- /* we allow the user to specify that only a
- fraction of the disks should be used this is
- just for debug: it speeds up
- * the parity scan */
- raidPtr->Disks[row][col].numBlocks =
- raidPtr->Disks[row][col].numBlocks *
- rf_sizePercentage / 100;
- }
-
- return(retcode);
-}
-
-static int
-raid_modevent(mod, type, data)
- module_t mod;
- int type;
- void *data;
-{
- int error = 0;
-
- switch (type) {
- case MOD_LOAD:
- raidattach();
- break;
-
- case MOD_UNLOAD:
- case MOD_SHUTDOWN:
- error = raidshutdown();
- break;
-
- default:
- break;
- }
-
- return (error);
-}
-
-moduledata_t raid_mod = {
- "raidframe",
- (modeventhand_t) raid_modevent,
- 0};
-
-DECLARE_MODULE(raidframe, raid_mod, SI_SUB_RAID, SI_ORDER_MIDDLE);
diff --git a/sys/dev/raidframe/rf_freelist.h b/sys/dev/raidframe/rf_freelist.h
deleted file mode 100644
index 13a5e83..0000000
--- a/sys/dev/raidframe/rf_freelist.h
+++ /dev/null
@@ -1,702 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_freelist.h,v 1.6 2002/08/08 02:53:01 oster Exp $ */
-/*
- * rf_freelist.h
- */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Jim Zelenka
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*
- * rf_freelist.h -- code to manage counted freelists
- *
- * Keep an arena of fixed-size objects. When a new object is needed,
- * allocate it as necessary. When an object is freed, either put it
- * in the arena, or really free it, depending on the maximum arena
- * size.
- */
-
-#ifndef _RF__RF_FREELIST_H_
-#define _RF__RF_FREELIST_H_
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_debugMem.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_threadstuff.h>
-
-#define RF_FREELIST_STATS 0
-
-#if RF_FREELIST_STATS > 0
-typedef struct RF_FreeListStats_s {
- char *file;
- int line;
- int allocations;
- int frees;
- int max_free;
- int grows;
- int outstanding;
- int max_outstanding;
-} RF_FreeListStats_t;
-#define RF_FREELIST_STAT_INIT(_fl_) { \
- bzero((char *)&((_fl_)->stats), sizeof(RF_FreeListStats_t)); \
- (_fl_)->stats.file = __FILE__; \
- (_fl_)->stats.line = __LINE__; \
-}
-
-#define RF_FREELIST_STAT_ALLOC(_fl_) { \
- (_fl_)->stats.allocations++; \
- (_fl_)->stats.outstanding++; \
- if ((_fl_)->stats.outstanding > (_fl_)->stats.max_outstanding) \
- (_fl_)->stats.max_outstanding = (_fl_)->stats.outstanding; \
-}
-
-#define RF_FREELIST_STAT_FREE_UPDATE(_fl_) { \
- if ((_fl_)->free_cnt > (_fl_)->stats.max_free) \
- (_fl_)->stats.max_free = (_fl_)->free_cnt; \
-}
-
-#define RF_FREELIST_STAT_FREE(_fl_) { \
- (_fl_)->stats.frees++; \
- (_fl_)->stats.outstanding--; \
- RF_FREELIST_STAT_FREE_UPDATE(_fl_); \
-}
-
-#define RF_FREELIST_STAT_GROW(_fl_) { \
- (_fl_)->stats.grows++; \
- RF_FREELIST_STAT_FREE_UPDATE(_fl_); \
-}
-
-#define RF_FREELIST_STAT_REPORT(_fl_) { \
- printf("Freelist at %s %d (%s)\n", (_fl_)->stats.file, (_fl_)->stats.line, RF_STRING(_fl_)); \
- printf(" %d allocations, %d frees\n", (_fl_)->stats.allocations, (_fl_)->stats.frees); \
- printf(" %d grows\n", (_fl_)->stats.grows); \
- printf(" %d outstanding\n", (_fl_)->stats.outstanding); \
- printf(" %d free (max)\n", (_fl_)->stats.max_free); \
- printf(" %d outstanding (max)\n", (_fl_)->stats.max_outstanding); \
-}
-
-#else /* RF_FREELIST_STATS > 0 */
-
-#define RF_FREELIST_STAT_INIT(_fl_)
-#define RF_FREELIST_STAT_ALLOC(_fl_)
-#define RF_FREELIST_STAT_FREE_UPDATE(_fl_)
-#define RF_FREELIST_STAT_FREE(_fl_)
-#define RF_FREELIST_STAT_GROW(_fl_)
-#define RF_FREELIST_STAT_REPORT(_fl_)
-
-#endif /* RF_FREELIST_STATS > 0 */
-
-struct RF_FreeList_s {
- void *objlist; /* list of free obj */
- int free_cnt; /* how many free obj */
- int max_free_cnt; /* max free arena size */
- int obj_inc; /* how many to allocate at a time */
- int obj_size; /* size of objects */
- RF_DECLARE_MUTEX(lock)
-#if RF_FREELIST_STATS > 0
- RF_FreeListStats_t stats; /* statistics */
-#endif /* RF_FREELIST_STATS > 0 */
-};
-/*
- * fl = freelist
- * maxcnt = max number of items in arena
- * inc = how many to allocate at a time
- * size = size of object
- */
-#define RF_FREELIST_CREATE(_fl_,_maxcnt_,_inc_,_size_) { \
- int rc; \
- RF_ASSERT((_inc_) > 0); \
- RF_Malloc(_fl_, sizeof(RF_FreeList_t), (RF_FreeList_t *)); \
- (_fl_)->objlist = NULL; \
- (_fl_)->free_cnt = 0; \
- (_fl_)->max_free_cnt = _maxcnt_; \
- (_fl_)->obj_inc = _inc_; \
- (_fl_)->obj_size = _size_; \
- rc = rf_mutex_init(&(_fl_)->lock, "RF_FREELIST"); \
- if (rc) { \
- RF_Free(_fl_, sizeof(RF_FreeList_t)); \
- _fl_ = NULL; \
- } \
- RF_FREELIST_STAT_INIT(_fl_); \
-}
-
-/*
- * fl = freelist
- * cnt = number to prime with
- * nextp = name of "next" pointer in obj
- * cast = object cast
- */
-#define RF_FREELIST_PRIME(_fl_,_cnt_,_nextp_,_cast_) { \
- void *_p; \
- int _i; \
- for(_i=0;_i<(_cnt_);_i++) { \
- RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \
- if (_p) { \
- RF_LOCK_MUTEX((_fl_)->lock); \
- (_cast_(_p))->_nextp_ = (_fl_)->objlist; \
- (_fl_)->objlist = _p; \
- (_fl_)->free_cnt++; \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
- } \
- else { \
- break; \
- } \
- } \
- RF_LOCK_MUTEX((_fl_)->lock); \
- RF_FREELIST_STAT_FREE_UPDATE(_fl_); \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
-}
-
-#define RF_FREELIST_MUTEX_OF(_fl_) ((_fl_)->lock)
-
-#define RF_FREELIST_DO_UNLOCK(_fl_) { \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
-}
-
-#define RF_FREELIST_DO_LOCK(_fl_) { \
- RF_LOCK_MUTEX((_fl_)->lock); \
-}
-
-/*
- * fl = freelist
- * cnt = number to prime with
- * nextp = name of "next" pointer in obj
- * cast = object cast
- * init = func to call to init obj
- */
-#define RF_FREELIST_PRIME_INIT(_fl_,_cnt_,_nextp_,_cast_,_init_) { \
- void *_p; \
- int _i; \
- for(_i=0;_i<(_cnt_);_i++) { \
- RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \
- if (_init_ (_cast_ _p)) { \
- RF_Free(_p,(_fl_)->obj_size); \
- _p = NULL; \
- } \
- if (_p) { \
- RF_LOCK_MUTEX((_fl_)->lock); \
- (_cast_(_p))->_nextp_ = (_fl_)->objlist; \
- (_fl_)->objlist = _p; \
- (_fl_)->free_cnt++; \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
- } \
- else { \
- break; \
- } \
- } \
- RF_LOCK_MUTEX((_fl_)->lock); \
- RF_FREELIST_STAT_FREE_UPDATE(_fl_); \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
-}
-
-/*
- * fl = freelist
- * cnt = number to prime with
- * nextp = name of "next" pointer in obj
- * cast = object cast
- * init = func to call to init obj
- * arg = arg to init obj func
- */
-#define RF_FREELIST_PRIME_INIT_ARG(_fl_,_cnt_,_nextp_,_cast_,_init_,_arg_) { \
- void *_p; \
- int _i; \
- for(_i=0;_i<(_cnt_);_i++) { \
- RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \
- if (_init_ (_cast_ _p,_arg_)) { \
- RF_Free(_p,(_fl_)->obj_size); \
- _p = NULL; \
- } \
- if (_p) { \
- RF_LOCK_MUTEX((_fl_)->lock); \
- (_cast_(_p))->_nextp_ = (_fl_)->objlist; \
- (_fl_)->objlist = _p; \
- (_fl_)->free_cnt++; \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
- } \
- else { \
- break; \
- } \
- } \
- RF_LOCK_MUTEX((_fl_)->lock); \
- RF_FREELIST_STAT_FREE_UPDATE(_fl_); \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
-}
-
-/*
- * fl = freelist
- * obj = object to allocate
- * nextp = name of "next" pointer in obj
- * cast = cast of obj assignment
- * init = init obj func
- */
-#define RF_FREELIST_GET_INIT(_fl_,_obj_,_nextp_,_cast_,_init_) { \
- void *_p; \
- int _i; \
- RF_LOCK_MUTEX((_fl_)->lock); \
- RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \
- if (_fl_->objlist) { \
- _obj_ = _cast_((_fl_)->objlist); \
- (_fl_)->objlist = (void *)((_obj_)->_nextp_); \
- (_fl_)->free_cnt--; \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
- } \
- else { \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
- /* \
- * Allocate one at a time so we can free \
- * one at a time without cleverness when arena \
- * is full. \
- */ \
- RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \
- if (_obj_) { \
- if (_init_ (_obj_)) { \
- RF_Free(_obj_,(_fl_)->obj_size); \
- _obj_ = NULL; \
- } \
- else { \
- for(_i=1;_i<(_fl_)->obj_inc;_i++) { \
- RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \
- if (_p) { \
- if (_init_ (_p)) { \
- RF_Free(_p,(_fl_)->obj_size); \
- _p = NULL; \
- break; \
- } \
- RF_LOCK_MUTEX((_fl_)->lock); \
- (_cast_(_p))->_nextp_ = (_fl_)->objlist; \
- (_fl_)->objlist = _p; \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
- } \
- else { \
- break; \
- } \
- } \
- } \
- } \
- RF_LOCK_MUTEX((_fl_)->lock); \
- RF_FREELIST_STAT_GROW(_fl_); \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
- } \
- RF_LOCK_MUTEX((_fl_)->lock); \
- RF_FREELIST_STAT_ALLOC(_fl_); \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
-}
-
-/*
- * fl = freelist
- * obj = object to allocate
- * nextp = name of "next" pointer in obj
- * cast = cast of obj assignment
- * init = init obj func
- * arg = arg to init obj func
- */
-#define RF_FREELIST_GET_INIT_ARG(_fl_,_obj_,_nextp_,_cast_,_init_,_arg_) { \
- void *_p; \
- int _i; \
- RF_LOCK_MUTEX((_fl_)->lock); \
- RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \
- if (_fl_->objlist) { \
- _obj_ = _cast_((_fl_)->objlist); \
- (_fl_)->objlist = (void *)((_obj_)->_nextp_); \
- (_fl_)->free_cnt--; \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
- } \
- else { \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
- /* \
- * Allocate one at a time so we can free \
- * one at a time without cleverness when arena \
- * is full. \
- */ \
- RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \
- if (_obj_) { \
- if (_init_ (_obj_,_arg_)) { \
- RF_Free(_obj_,(_fl_)->obj_size); \
- _obj_ = NULL; \
- } \
- else { \
- for(_i=1;_i<(_fl_)->obj_inc;_i++) { \
- RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \
- if (_p) { \
- if (_init_ (_p,_arg_)) { \
- RF_Free(_p,(_fl_)->obj_size); \
- _p = NULL; \
- break; \
- } \
- RF_LOCK_MUTEX((_fl_)->lock); \
- (_cast_(_p))->_nextp_ = (_fl_)->objlist; \
- (_fl_)->objlist = _p; \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
- } \
- else { \
- break; \
- } \
- } \
- } \
- } \
- RF_LOCK_MUTEX((_fl_)->lock); \
- RF_FREELIST_STAT_GROW(_fl_); \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
- } \
- RF_LOCK_MUTEX((_fl_)->lock); \
- RF_FREELIST_STAT_ALLOC(_fl_); \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
-}
-
-/*
- * fl = freelist
- * obj = object to allocate
- * nextp = name of "next" pointer in obj
- * cast = cast of obj assignment
- * init = init obj func
- */
-#define RF_FREELIST_GET_INIT_NOUNLOCK(_fl_,_obj_,_nextp_,_cast_,_init_) { \
- void *_p; \
- int _i; \
- RF_LOCK_MUTEX((_fl_)->lock); \
- RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \
- if (_fl_->objlist) { \
- _obj_ = _cast_((_fl_)->objlist); \
- (_fl_)->objlist = (void *)((_obj_)->_nextp_); \
- (_fl_)->free_cnt--; \
- } \
- else { \
- /* \
- * Allocate one at a time so we can free \
- * one at a time without cleverness when arena \
- * is full. \
- */ \
- RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \
- if (_obj_) { \
- if (_init_ (_obj_)) { \
- RF_Free(_obj_,(_fl_)->obj_size); \
- _obj_ = NULL; \
- } \
- else { \
- for(_i=1;_i<(_fl_)->obj_inc;_i++) { \
- RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \
- if (_p) { \
- if (_init_ (_p)) { \
- RF_Free(_p,(_fl_)->obj_size); \
- _p = NULL; \
- break; \
- } \
- (_cast_(_p))->_nextp_ = (_fl_)->objlist; \
- (_fl_)->objlist = _p; \
- } \
- else { \
- break; \
- } \
- } \
- } \
- } \
- RF_FREELIST_STAT_GROW(_fl_); \
- } \
- RF_FREELIST_STAT_ALLOC(_fl_); \
-}
-
-/*
- * fl = freelist
- * obj = object to allocate
- * nextp = name of "next" pointer in obj
- * cast = cast of obj assignment
- */
-#define RF_FREELIST_GET(_fl_,_obj_,_nextp_,_cast_) { \
- void *_p; \
- int _i; \
- RF_LOCK_MUTEX((_fl_)->lock); \
- RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \
- if (_fl_->objlist) { \
- _obj_ = _cast_((_fl_)->objlist); \
- (_fl_)->objlist = (void *)((_obj_)->_nextp_); \
- (_fl_)->free_cnt--; \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
- } \
- else { \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
- /* \
- * Allocate one at a time so we can free \
- * one at a time without cleverness when arena \
- * is full. \
- */ \
- RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \
- if (_obj_) { \
- for(_i=1;_i<(_fl_)->obj_inc;_i++) { \
- RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \
- if (_p) { \
- RF_LOCK_MUTEX((_fl_)->lock); \
- (_cast_(_p))->_nextp_ = (_fl_)->objlist; \
- (_fl_)->objlist = _p; \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
- } \
- else { \
- break; \
- } \
- } \
- } \
- RF_LOCK_MUTEX((_fl_)->lock); \
- RF_FREELIST_STAT_GROW(_fl_); \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
- } \
- RF_LOCK_MUTEX((_fl_)->lock); \
- RF_FREELIST_STAT_ALLOC(_fl_); \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
-}
-
-/*
- * fl = freelist
- * obj = object to allocate
- * nextp = name of "next" pointer in obj
- * cast = cast of obj assignment
- * num = num objs to return
- */
-#define RF_FREELIST_GET_N(_fl_,_obj_,_nextp_,_cast_,_num_) { \
- void *_p, *_l, *_f; \
- int _i, _n; \
- _l = _f = NULL; \
- _n = 0; \
- RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \
- for(_n=0;_n<_num_;_n++) { \
- RF_LOCK_MUTEX((_fl_)->lock); \
- if (_fl_->objlist) { \
- _obj_ = _cast_((_fl_)->objlist); \
- (_fl_)->objlist = (void *)((_obj_)->_nextp_); \
- (_fl_)->free_cnt--; \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
- } \
- else { \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
- /* \
- * Allocate one at a time so we can free \
- * one at a time without cleverness when arena \
- * is full. \
- */ \
- RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \
- if (_obj_) { \
- for(_i=1;_i<(_fl_)->obj_inc;_i++) { \
- RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \
- if (_p) { \
- RF_LOCK_MUTEX((_fl_)->lock); \
- (_cast_(_p))->_nextp_ = (_fl_)->objlist; \
- (_fl_)->objlist = _p; \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
- } \
- else { \
- break; \
- } \
- } \
- } \
- RF_LOCK_MUTEX((_fl_)->lock); \
- RF_FREELIST_STAT_GROW(_fl_); \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
- } \
- RF_LOCK_MUTEX((_fl_)->lock); \
- if (_f == NULL) \
- _f = _obj_; \
- if (_obj_) { \
- (_cast_(_obj_))->_nextp_ = _l; \
- _l = _obj_; \
- RF_FREELIST_STAT_ALLOC(_fl_); \
- } \
- else { \
- (_cast_(_f))->_nextp_ = (_fl_)->objlist; \
- (_fl_)->objlist = _l; \
- _n = _num_; \
- } \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
- } \
-}
-
-/*
- * fl = freelist
- * obj = object to free
- * nextp = name of "next" pointer in obj
- */
-#define RF_FREELIST_FREE(_fl_,_obj_,_nextp_) { \
- RF_LOCK_MUTEX((_fl_)->lock); \
- if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \
- RF_Free(_obj_,(_fl_)->obj_size); \
- } \
- else { \
- RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \
- (_obj_)->_nextp_ = (_fl_)->objlist; \
- (_fl_)->objlist = (void *)(_obj_); \
- (_fl_)->free_cnt++; \
- } \
- RF_FREELIST_STAT_FREE(_fl_); \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
-}
-
-/*
- * fl = freelist
- * obj = object to free
- * nextp = name of "next" pointer in obj
- * num = num to free (debugging)
- */
-#define RF_FREELIST_FREE_N(_fl_,_obj_,_nextp_,_cast_,_num_) { \
- void *_no; \
- int _n; \
- _n = 0; \
- RF_LOCK_MUTEX((_fl_)->lock); \
- while(_obj_) { \
- _no = (_cast_(_obj_))->_nextp_; \
- if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \
- RF_Free(_obj_,(_fl_)->obj_size); \
- } \
- else { \
- RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \
- (_obj_)->_nextp_ = (_fl_)->objlist; \
- (_fl_)->objlist = (void *)(_obj_); \
- (_fl_)->free_cnt++; \
- } \
- _n++; \
- _obj_ = _no; \
- RF_FREELIST_STAT_FREE(_fl_); \
- } \
- RF_ASSERT(_n==(_num_)); \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
-}
-
-/*
- * fl = freelist
- * obj = object to free
- * nextp = name of "next" pointer in obj
- * clean = undo for init
- */
-#define RF_FREELIST_FREE_CLEAN(_fl_,_obj_,_nextp_,_clean_) { \
- RF_LOCK_MUTEX((_fl_)->lock); \
- if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \
- _clean_ (_obj_); \
- RF_Free(_obj_,(_fl_)->obj_size); \
- } \
- else { \
- RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \
- (_obj_)->_nextp_ = (_fl_)->objlist; \
- (_fl_)->objlist = (void *)(_obj_); \
- (_fl_)->free_cnt++; \
- } \
- RF_FREELIST_STAT_FREE(_fl_); \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
-}
-
-/*
- * fl = freelist
- * obj = object to free
- * nextp = name of "next" pointer in obj
- * clean = undo for init
- * arg = arg for undo func
- */
-#define RF_FREELIST_FREE_CLEAN_ARG(_fl_,_obj_,_nextp_,_clean_,_arg_) { \
- RF_LOCK_MUTEX((_fl_)->lock); \
- if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \
- _clean_ (_obj_,_arg_); \
- RF_Free(_obj_,(_fl_)->obj_size); \
- } \
- else { \
- RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \
- (_obj_)->_nextp_ = (_fl_)->objlist; \
- (_fl_)->objlist = (void *)(_obj_); \
- (_fl_)->free_cnt++; \
- } \
- RF_FREELIST_STAT_FREE(_fl_); \
- RF_UNLOCK_MUTEX((_fl_)->lock); \
-}
-
-/*
- * fl = freelist
- * obj = object to free
- * nextp = name of "next" pointer in obj
- * clean = undo for init
- */
-#define RF_FREELIST_FREE_CLEAN_NOUNLOCK(_fl_,_obj_,_nextp_,_clean_) { \
- RF_LOCK_MUTEX((_fl_)->lock); \
- if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \
- _clean_ (_obj_); \
- RF_Free(_obj_,(_fl_)->obj_size); \
- } \
- else { \
- RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \
- (_obj_)->_nextp_ = (_fl_)->objlist; \
- (_fl_)->objlist = (void *)(_obj_); \
- (_fl_)->free_cnt++; \
- } \
- RF_FREELIST_STAT_FREE(_fl_); \
-}
-
-/*
- * fl = freelist
- * nextp = name of "next" pointer in obj
- * cast = cast to object type
- */
-#define RF_FREELIST_DESTROY(_fl_,_nextp_,_cast_) { \
- void *_cur, *_next; \
- RF_FREELIST_STAT_REPORT(_fl_); \
- rf_mutex_destroy(&((_fl_)->lock)); \
- for(_cur=(_fl_)->objlist;_cur;_cur=_next) { \
- _next = (_cast_ _cur)->_nextp_; \
- RF_Free(_cur,(_fl_)->obj_size); \
- } \
- RF_Free(_fl_,sizeof(RF_FreeList_t)); \
-}
-
-/*
- * fl = freelist
- * nextp = name of "next" pointer in obj
- * cast = cast to object type
- * clean = func to undo obj init
- */
-#define RF_FREELIST_DESTROY_CLEAN(_fl_,_nextp_,_cast_,_clean_) { \
- void *_cur, *_next; \
- RF_FREELIST_STAT_REPORT(_fl_); \
- rf_mutex_destroy(&((_fl_)->lock)); \
- for(_cur=(_fl_)->objlist;_cur;_cur=_next) { \
- _next = (_cast_ _cur)->_nextp_; \
- _clean_ (_cur); \
- RF_Free(_cur,(_fl_)->obj_size); \
- } \
- RF_Free(_fl_,sizeof(RF_FreeList_t)); \
-}
-
-/*
- * fl = freelist
- * nextp = name of "next" pointer in obj
- * cast = cast to object type
- * clean = func to undo obj init
- * arg = arg for undo func
- */
-#define RF_FREELIST_DESTROY_CLEAN_ARG(_fl_,_nextp_,_cast_,_clean_,_arg_) { \
- void *_cur, *_next; \
- RF_FREELIST_STAT_REPORT(_fl_); \
- rf_mutex_destroy(&((_fl_)->lock)); \
- for(_cur=(_fl_)->objlist;_cur;_cur=_next) { \
- _next = (_cast_ _cur)->_nextp_; \
- _clean_ (_cur,_arg_); \
- RF_Free(_cur,(_fl_)->obj_size); \
- } \
- RF_Free(_fl_,sizeof(RF_FreeList_t)); \
-}
-
-#endif /* !_RF__RF_FREELIST_H_ */
diff --git a/sys/dev/raidframe/rf_general.h b/sys/dev/raidframe/rf_general.h
deleted file mode 100644
index e709899..0000000
--- a/sys/dev/raidframe/rf_general.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_general.h,v 1.6 2000/12/15 02:12:58 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*
- * rf_general.h -- some general-use definitions
- */
-
-/*#define NOASSERT*/
-
-#ifndef _RF__RF_GENERAL_H_
-#define _RF__RF_GENERAL_H_
-
-/* error reporting and handling */
-
-#ifdef _KERNEL
-#include<sys/systm.h> /* printf, sprintf, and friends */
-#endif
-
-#define RF_ERRORMSG(s) printf((s))
-#define RF_ERRORMSG1(s,a) printf((s),(a))
-#define RF_ERRORMSG2(s,a,b) printf((s),(a),(b))
-#define RF_ERRORMSG3(s,a,b,c) printf((s),(a),(b),(c))
-
-void rf_print_panic_message(int, char *);
-void rf_print_assert_panic_message(int, char *, char *);
-
-extern char rf_panicbuf[];
-#define RF_PANIC() {rf_print_panic_message(__LINE__,__FILE__); panic(rf_panicbuf);}
-
-#ifdef _KERNEL
-#ifdef RF_ASSERT
-#undef RF_ASSERT
-#endif /* RF_ASSERT */
-#ifndef NOASSERT
-#define RF_ASSERT(_x_) { \
- if (!(_x_)) { \
- rf_print_assert_panic_message(__LINE__, __FILE__, #_x_); \
- panic(rf_panicbuf); \
- } \
-}
-#else /* !NOASSERT */
-#define RF_ASSERT(x) {/*noop*/}
-#endif /* !NOASSERT */
-#else /* _KERNEL */
-#define RF_ASSERT(x) {/*noop*/}
-#endif /* _KERNEL */
-
-/* random stuff */
-#define RF_MAX(a,b) (((a) > (b)) ? (a) : (b))
-#define RF_MIN(a,b) (((a) < (b)) ? (a) : (b))
-
-/* divide-by-zero check */
-#define RF_DB0_CHECK(a,b) ( ((b)==0) ? 0 : (a)/(b) )
-
-/* get time of day */
-#define RF_GETTIME(_t) microtime(&(_t))
-
-/*
- * zero memory- not all bzero calls go through here, only
- * those which in the kernel may have a user address
- */
-
-#define RF_BZERO(_bp,_b,_l) bzero(_b,_l) /* XXX This is likely
- * incorrect. GO */
-
-#if defined(__FreeBSD__)
-#define NBPG PAGE_SIZE
-#endif
-
-#define RF_UL(x) ((unsigned long) (x))
-#define RF_PGMASK RF_UL(NBPG-1)
-#define RF_BLIP(x) (NBPG - (RF_UL(x) & RF_PGMASK)) /* bytes left in page */
-#define RF_PAGE_ALIGNED(x) ((RF_UL(x) & RF_PGMASK) == 0)
-
-#ifdef __STDC__
-#define RF_STRING(_str_) #_str_
-#else /* __STDC__ */
-#define RF_STRING(_str_) "_str_"
-#endif /* __STDC__ */
-
-#endif /* !_RF__RF_GENERAL_H_ */
diff --git a/sys/dev/raidframe/rf_geniq.c b/sys/dev/raidframe/rf_geniq.c
deleted file mode 100644
index c21cb1b..0000000
--- a/sys/dev/raidframe/rf_geniq.c
+++ /dev/null
@@ -1,165 +0,0 @@
-/* $NetBSD: rf_geniq.c,v 1.3 1999/02/05 00:06:12 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Daniel Stodolsky
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/* rf_geniq.c
- * code which implements Reed-Solomon encoding for RAID level 6
- */
-
-
-#define RF_UTILITY 1
-#include <dev/raidframe/rf_pqdeg.h>
-
-/*
- five bit lfsr
- poly - feedback connections
-
- val = value;
-*/
-int
-lsfr_shift(val, poly)
- unsigned val, poly;
-{
- unsigned new;
- unsigned int i;
- unsigned high = (val >> 4) & 1;
- unsigned bit;
-
- new = (poly & 1) ? high : 0;
-
- for (i = 1; i <= 4; i++) {
- bit = (val >> (i - 1)) & 1;
- if (poly & (1 << i)) /* there is a feedback connection */
- new = new | ((bit ^ high) << i);
- else
- new = new | (bit << i);
- }
- return new;
-}
-/* generate Q matricies for the data */
-
-RF_ua32_t rf_qfor[32];
-
-void
-main()
-{
- unsigned int i, j, l, a, b;
- unsigned int val;
- unsigned int r;
- unsigned int m, p, q;
-
- RF_ua32_t k;
-
- printf("/*\n");
- printf(" * rf_invertq.h\n");
- printf(" */\n");
- printf("/*\n");
- printf(" * GENERATED FILE -- DO NOT EDIT\n");
- printf(" */\n");
- printf("\n");
- printf("#ifndef _RF__RF_INVERTQ_H_\n");
- printf("#define _RF__RF_INVERTQ_H_\n");
- printf("\n");
- printf("/*\n");
- printf(" * rf_geniq.c must include rf_archs.h before including\n");
- printf(" * this file (to get VPATH magic right with the way we\n");
- printf(" * generate this file in kernel trees)\n");
- printf(" */\n");
- printf("/* #include \"rf_archs.h\" */\n");
- printf("\n");
- printf("#if (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0)\n");
- printf("\n");
- printf("#define RF_Q_COLS 32\n");
- printf("RF_ua32_t rf_rn = {\n");
- k[0] = 1;
- for (j = 0; j < 31; j++)
- k[j + 1] = lsfr_shift(k[j], 5);
- for (j = 0; j < 32; j++)
- printf("%d, ", k[j]);
- printf("};\n");
-
- printf("RF_ua32_t rf_qfor[32] = {\n");
- for (i = 0; i < 32; i++) {
- printf("/* i = %d */ { 0, ", i);
- rf_qfor[i][0] = 0;
- for (j = 1; j < 32; j++) {
- val = j;
- for (l = 0; l < i; l++)
- val = lsfr_shift(val, 5);
- rf_qfor[i][j] = val;
- printf("%d, ", val);
- }
- printf("},\n");
- }
- printf("};\n");
- printf("#define RF_Q_DATA_COL(col_num) rf_rn[col_num],rf_qfor[28-(col_num)]\n");
-
- /* generate the inverse tables. (i,j,p,q) */
- /* The table just stores a. Get b back from the parity */
- printf("#ifdef KERNEL\n");
- printf("RF_ua1024_t rf_qinv[1]; /* don't compile monster table into kernel */\n");
- printf("#elif defined(NO_PQ)\n");
- printf("RF_ua1024_t rf_qinv[29*29];\n");
- printf("#else /* !KERNEL && NO_PQ */\n");
- printf("RF_ua1024_t rf_qinv[29*29] = {\n");
- for (i = 0; i < 29; i++) {
- for (j = 0; j < 29; j++) {
- printf("/* i %d, j %d */{ ", i, j);
- if (i == j)
- for (l = 0; l < 1023; l++)
- printf("0, ");
- else {
- for (p = 0; p < 32; p++)
- for (q = 0; q < 32; q++) {
- /* What are a, b such that a ^
- * b = p; and qfor[(28-i)][a
- * ^ rf_rn[i+1]] ^
- * qfor[(28-j)][b ^
- * rf_rn[j+1]] = q. Solve by
- * guessing a. Then testing. */
- for (a = 0; a < 32; a++) {
- b = a ^ p;
- if ((rf_qfor[28 - i][a ^ k[i + 1]] ^ rf_qfor[28 - j][b ^ k[j + 1]]) == q)
- break;
- }
- if (a == 32)
- printf("unable to solve %d %d %d %d\n", i, j, p, q);
- printf("%d,", a);
- }
- }
- printf("},\n");
- }
- }
- printf("};\n");
- printf("\n#endif /* (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */\n\n");
- printf("#endif /* !KERNEL && NO_PQ */\n");
- printf("#endif /* !_RF__RF_INVERTQ_H_ */\n");
- exit(0);
-}
diff --git a/sys/dev/raidframe/rf_hist.h b/sys/dev/raidframe/rf_hist.h
deleted file mode 100644
index b8b12c3..0000000
--- a/sys/dev/raidframe/rf_hist.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_hist.h,v 1.3 1999/02/05 00:06:12 oster Exp $ */
-/*
- * rf_hist.h
- *
- * Histgram operations for RAIDframe stats
- */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Jim Zelenka
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#ifndef _RF__RF_HIST_H_
-#define _RF__RF_HIST_H_
-
-#include <dev/raidframe/rf_types.h>
-
-#define RF_HIST_RESOLUTION 5
-#define RF_HIST_MIN_VAL 0
-#define RF_HIST_MAX_VAL 1000
-#define RF_HIST_RANGE (RF_HIST_MAX_VAL - RF_HIST_MIN_VAL)
-#define RF_HIST_NUM_BUCKETS (RF_HIST_RANGE / RF_HIST_RESOLUTION + 1)
-
-typedef RF_uint32 RF_Hist_t;
-
-#define RF_HIST_ADD(_hist_,_val_) { \
- RF_Hist_t val; \
- val = ((RF_Hist_t)(_val_)) / 1000; \
- if (val >= RF_HIST_MAX_VAL) \
- _hist_[RF_HIST_NUM_BUCKETS-1]++; \
- else \
- _hist_[(val - RF_HIST_MIN_VAL) / RF_HIST_RESOLUTION]++; \
-}
-
-#endif /* !_RF__RF_HIST_H_ */
diff --git a/sys/dev/raidframe/rf_interdecluster.c b/sys/dev/raidframe/rf_interdecluster.c
deleted file mode 100644
index 8b1dbdb..0000000
--- a/sys/dev/raidframe/rf_interdecluster.c
+++ /dev/null
@@ -1,285 +0,0 @@
-/* $NetBSD: rf_interdecluster.c,v 1.5 2001/01/26 05:09:13 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Khalil Amiri
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/************************************************************
- *
- * rf_interdecluster.c -- implements interleaved declustering
- *
- ************************************************************/
-
-#include <dev/raidframe/rf_archs.h>
-
-#if RF_INCLUDE_INTERDECLUSTER > 0
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_interdecluster.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_dagutils.h>
-#include <dev/raidframe/rf_dagfuncs.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_utils.h>
-#include <dev/raidframe/rf_dagffrd.h>
-#include <dev/raidframe/rf_dagdegrd.h>
-#include <dev/raidframe/rf_dagffwr.h>
-#include <dev/raidframe/rf_dagdegwr.h>
-
-typedef struct RF_InterdeclusterConfigInfo_s {
- RF_RowCol_t **stripeIdentifier; /* filled in at config time and used
- * by IdentifyStripe */
- RF_StripeCount_t numSparingRegions;
- RF_StripeCount_t stripeUnitsPerSparingRegion;
- RF_SectorNum_t mirrorStripeOffset;
-} RF_InterdeclusterConfigInfo_t;
-
-int
-rf_ConfigureInterDecluster(
- RF_ShutdownList_t ** listp,
- RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr)
-{
- RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
- RF_StripeCount_t num_used_stripeUnitsPerDisk;
- RF_InterdeclusterConfigInfo_t *info;
- RF_RowCol_t i, tmp, SUs_per_region;
-
- /* create an Interleaved Declustering configuration structure */
- RF_MallocAndAdd(info, sizeof(RF_InterdeclusterConfigInfo_t), (RF_InterdeclusterConfigInfo_t *),
- raidPtr->cleanupList);
- if (info == NULL)
- return (ENOMEM);
- layoutPtr->layoutSpecificInfo = (void *) info;
-
- /* fill in the config structure. */
- SUs_per_region = raidPtr->numCol * (raidPtr->numCol - 1);
- info->stripeIdentifier = rf_make_2d_array(SUs_per_region, 2, raidPtr->cleanupList);
- if (info->stripeIdentifier == NULL)
- return (ENOMEM);
- for (i = 0; i < SUs_per_region; i++) {
- info->stripeIdentifier[i][0] = i / (raidPtr->numCol - 1);
- tmp = i / raidPtr->numCol;
- info->stripeIdentifier[i][1] = (i + 1 + tmp) % raidPtr->numCol;
- }
-
- /* no spare tables */
- RF_ASSERT(raidPtr->numRow == 1);
-
- /* fill in the remaining layout parameters */
-
- /* total number of stripes should a multiple of 2*numCol: Each sparing
- * region consists of 2*numCol stripes: n-1 primary copy, n-1
- * secondary copy and 2 for spare .. */
- num_used_stripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk - (layoutPtr->stripeUnitsPerDisk %
- (2 * raidPtr->numCol));
- info->numSparingRegions = num_used_stripeUnitsPerDisk / (2 * raidPtr->numCol);
- /* this is in fact the number of stripe units (that are primary data
- * copies) in the sparing region */
- info->stripeUnitsPerSparingRegion = raidPtr->numCol * (raidPtr->numCol - 1);
- info->mirrorStripeOffset = info->numSparingRegions * (raidPtr->numCol + 1);
- layoutPtr->numStripe = info->numSparingRegions * info->stripeUnitsPerSparingRegion;
- layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
- layoutPtr->numDataCol = 1;
- layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
- layoutPtr->numParityCol = 1;
-
- layoutPtr->dataStripeUnitsPerDisk = num_used_stripeUnitsPerDisk;
-
- raidPtr->sectorsPerDisk =
- num_used_stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
-
- raidPtr->totalSectors =
- (layoutPtr->numStripe) * layoutPtr->sectorsPerStripeUnit;
-
- layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit;
-
- return (0);
-}
-
-int
-rf_GetDefaultNumFloatingReconBuffersInterDecluster(RF_Raid_t * raidPtr)
-{
- return (30);
-}
-
-RF_HeadSepLimit_t
-rf_GetDefaultHeadSepLimitInterDecluster(RF_Raid_t * raidPtr)
-{
- return (raidPtr->sectorsPerDisk);
-}
-
-RF_ReconUnitCount_t
-rf_GetNumSpareRUsInterDecluster(
- RF_Raid_t * raidPtr)
-{
- RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
-
- return (2 * ((RF_ReconUnitCount_t) info->numSparingRegions));
- /* the layout uses two stripe units per disk as spare within each
- * sparing region */
-}
-/* Maps to the primary copy of the data, i.e. the first mirror pair */
-void
-rf_MapSectorInterDecluster(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t raidSector,
- RF_RowCol_t * row,
- RF_RowCol_t * col,
- RF_SectorNum_t * diskSector,
- int remap)
-{
- RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
- RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
- RF_StripeNum_t su_offset_into_disk, mirror_su_offset_into_disk;
- RF_StripeNum_t sparing_region_id, index_within_region;
- int col_before_remap;
-
- *row = 0;
- sparing_region_id = SUID / info->stripeUnitsPerSparingRegion;
- index_within_region = SUID % info->stripeUnitsPerSparingRegion;
- su_offset_into_disk = index_within_region % (raidPtr->numCol - 1);
- mirror_su_offset_into_disk = index_within_region / raidPtr->numCol;
- col_before_remap = index_within_region / (raidPtr->numCol - 1);
-
- if (!remap) {
- *col = col_before_remap;;
- *diskSector = (su_offset_into_disk + ((raidPtr->numCol - 1) * sparing_region_id)) *
- raidPtr->Layout.sectorsPerStripeUnit;
- *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
- } else {
- /* remap sector to spare space... */
- *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit;
- *diskSector += (raidPtr->numCol - 1) * raidPtr->Layout.sectorsPerStripeUnit;
- *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
- *col = (index_within_region + 1 + mirror_su_offset_into_disk) % raidPtr->numCol;
- *col = (*col + 1) % raidPtr->numCol;
- if (*col == col_before_remap)
- *col = (*col + 1) % raidPtr->numCol;
- }
-}
-/* Maps to the second copy of the mirror pair. */
-void
-rf_MapParityInterDecluster(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t raidSector,
- RF_RowCol_t * row,
- RF_RowCol_t * col,
- RF_SectorNum_t * diskSector,
- int remap)
-{
- RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
- RF_StripeNum_t sparing_region_id, index_within_region, mirror_su_offset_into_disk;
- RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
- int col_before_remap;
-
- sparing_region_id = SUID / info->stripeUnitsPerSparingRegion;
- index_within_region = SUID % info->stripeUnitsPerSparingRegion;
- mirror_su_offset_into_disk = index_within_region / raidPtr->numCol;
- col_before_remap = (index_within_region + 1 + mirror_su_offset_into_disk) % raidPtr->numCol;
-
- *row = 0;
- if (!remap) {
- *col = col_before_remap;
- *diskSector = info->mirrorStripeOffset * raidPtr->Layout.sectorsPerStripeUnit;
- *diskSector += sparing_region_id * (raidPtr->numCol - 1) * raidPtr->Layout.sectorsPerStripeUnit;
- *diskSector += mirror_su_offset_into_disk * raidPtr->Layout.sectorsPerStripeUnit;
- *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
- } else {
- /* remap parity to spare space ... */
- *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit;
- *diskSector += (raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit;
- *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
- *col = index_within_region / (raidPtr->numCol - 1);
- *col = (*col + 1) % raidPtr->numCol;
- if (*col == col_before_remap)
- *col = (*col + 1) % raidPtr->numCol;
- }
-}
-
-void
-rf_IdentifyStripeInterDecluster(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t addr,
- RF_RowCol_t ** diskids,
- RF_RowCol_t * outRow)
-{
- RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
- RF_StripeNum_t SUID;
-
- SUID = addr / raidPtr->Layout.sectorsPerStripeUnit;
- SUID = SUID % info->stripeUnitsPerSparingRegion;
-
- *outRow = 0;
- *diskids = info->stripeIdentifier[SUID];
-}
-
-void
-rf_MapSIDToPSIDInterDecluster(
- RF_RaidLayout_t * layoutPtr,
- RF_StripeNum_t stripeID,
- RF_StripeNum_t * psID,
- RF_ReconUnitNum_t * which_ru)
-{
- *which_ru = 0;
- *psID = stripeID;
-}
-/******************************************************************************
- * select a graph to perform a single-stripe access
- *
- * Parameters: raidPtr - description of the physical array
- * type - type of operation (read or write) requested
- * asmap - logical & physical addresses for this access
- * createFunc - name of function to use to create the graph
- *****************************************************************************/
-
-void
-rf_RAIDIDagSelect(
- RF_Raid_t * raidPtr,
- RF_IoType_t type,
- RF_AccessStripeMap_t * asmap,
- RF_VoidFuncPtr * createFunc)
-{
- RF_ASSERT(RF_IO_IS_R_OR_W(type));
-
- if (asmap->numDataFailed + asmap->numParityFailed > 1) {
- RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n");
- *createFunc = NULL;
- return;
- }
- *createFunc = (type == RF_IO_TYPE_READ) ? (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG : (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG;
- if (type == RF_IO_TYPE_READ) {
- if (asmap->numDataFailed == 0)
- *createFunc = (RF_VoidFuncPtr) rf_CreateMirrorPartitionReadDAG;
- else
- *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneDegradedReadDAG;
- } else
- *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG;
-}
-#endif /* RF_INCLUDE_INTERDECLUSTER > 0 */
diff --git a/sys/dev/raidframe/rf_interdecluster.h b/sys/dev/raidframe/rf_interdecluster.h
deleted file mode 100644
index 9bf3825..0000000
--- a/sys/dev/raidframe/rf_interdecluster.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_interdecluster.h,v 1.3 1999/02/05 00:06:12 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Khalil Amiri
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/* rf_interdecluster.h
- * header file for Interleaved Declustering
- */
-
-#ifndef _RF__RF_INTERDECLUSTER_H_
-#define _RF__RF_INTERDECLUSTER_H_
-
-int
-rf_ConfigureInterDecluster(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr);
-int rf_GetDefaultNumFloatingReconBuffersInterDecluster(RF_Raid_t * raidPtr);
-RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitInterDecluster(RF_Raid_t * raidPtr);
-RF_ReconUnitCount_t rf_GetNumSpareRUsInterDecluster(RF_Raid_t * raidPtr);
-void
-rf_MapSectorInterDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
- RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap);
-void
-rf_MapParityInterDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
- RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap);
-void
-rf_IdentifyStripeInterDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t addr,
- RF_RowCol_t ** diskids, RF_RowCol_t * outRow);
-void
-rf_MapSIDToPSIDInterDecluster(RF_RaidLayout_t * layoutPtr,
- RF_StripeNum_t stripeID, RF_StripeNum_t * psID,
- RF_ReconUnitNum_t * which_ru);
-void
-rf_RAIDIDagSelect(RF_Raid_t * raidPtr, RF_IoType_t type,
- RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc);
-
-#endif /* !_RF__RF_INTERDECLUSTER_H_ */
diff --git a/sys/dev/raidframe/rf_invertq.c b/sys/dev/raidframe/rf_invertq.c
deleted file mode 100644
index 66337b6..0000000
--- a/sys/dev/raidframe/rf_invertq.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/* $NetBSD: rf_invertq.c,v 1.3 1999/02/05 00:06:12 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Daniel Stodolsky
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#include <dev/raidframe/rf_archs.h>
-#include <dev/raidframe/rf_pqdeg.h>
-#include <dev/raidframe/rf_invertq.h>
diff --git a/sys/dev/raidframe/rf_invertq.h b/sys/dev/raidframe/rf_invertq.h
deleted file mode 100644
index fde2cae..0000000
--- a/sys/dev/raidframe/rf_invertq.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_invertq.h,v 1.3 1999/02/05 00:06:12 oster Exp $ */
-/*
- * rf_invertq.h
- */
-/*
- * This is normally a generated file. Not so for NetBSD.
- */
-
-#ifndef _RF__RF_INVERTQ_H_
-#define _RF__RF_INVERTQ_H_
-
-/*
- * rf_geniq.c must include rf_archs.h before including
- * this file (to get VPATH magic right with the way we
- * generate this file in kernel trees)
- */
-/* #include <dev/raidframe/rf_archs.h> */
-
-#if (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
-
-#define RF_Q_COLS 32
-RF_ua32_t rf_rn = {
-1, 2, 4, 8, 16, 5, 10, 20, 13, 26, 17, 7, 14, 28, 29, 31, 27, 19, 3, 6, 12, 24, 21, 15, 30, 25, 23, 11, 22, 9, 18, 1,};
-RF_ua32_t rf_qfor[32] = {
- /* i = 0 */ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,},
- /* i = 1 */ {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 5, 7, 1, 3, 13, 15, 9, 11, 21, 23, 17, 19, 29, 31, 25, 27,},
- /* i = 2 */ {0, 4, 8, 12, 16, 20, 24, 28, 5, 1, 13, 9, 21, 17, 29, 25, 10, 14, 2, 6, 26, 30, 18, 22, 15, 11, 7, 3, 31, 27, 23, 19,},
- /* i = 3 */ {0, 8, 16, 24, 5, 13, 21, 29, 10, 2, 26, 18, 15, 7, 31, 23, 20, 28, 4, 12, 17, 25, 1, 9, 30, 22, 14, 6, 27, 19, 11, 3,},
- /* i = 4 */ {0, 16, 5, 21, 10, 26, 15, 31, 20, 4, 17, 1, 30, 14, 27, 11, 13, 29, 8, 24, 7, 23, 2, 18, 25, 9, 28, 12, 19, 3, 22, 6,},
- /* i = 5 */ {0, 5, 10, 15, 20, 17, 30, 27, 13, 8, 7, 2, 25, 28, 19, 22, 26, 31, 16, 21, 14, 11, 4, 1, 23, 18, 29, 24, 3, 6, 9, 12,},
- /* i = 6 */ {0, 10, 20, 30, 13, 7, 25, 19, 26, 16, 14, 4, 23, 29, 3, 9, 17, 27, 5, 15, 28, 22, 8, 2, 11, 1, 31, 21, 6, 12, 18, 24,},
- /* i = 7 */ {0, 20, 13, 25, 26, 14, 23, 3, 17, 5, 28, 8, 11, 31, 6, 18, 7, 19, 10, 30, 29, 9, 16, 4, 22, 2, 27, 15, 12, 24, 1, 21,},
- /* i = 8 */ {0, 13, 26, 23, 17, 28, 11, 6, 7, 10, 29, 16, 22, 27, 12, 1, 14, 3, 20, 25, 31, 18, 5, 8, 9, 4, 19, 30, 24, 21, 2, 15,},
- /* i = 9 */ {0, 26, 17, 11, 7, 29, 22, 12, 14, 20, 31, 5, 9, 19, 24, 2, 28, 6, 13, 23, 27, 1, 10, 16, 18, 8, 3, 25, 21, 15, 4, 30,},
- /* i = 10 */ {0, 17, 7, 22, 14, 31, 9, 24, 28, 13, 27, 10, 18, 3, 21, 4, 29, 12, 26, 11, 19, 2, 20, 5, 1, 16, 6, 23, 15, 30, 8, 25,},
- /* i = 11 */ {0, 7, 14, 9, 28, 27, 18, 21, 29, 26, 19, 20, 1, 6, 15, 8, 31, 24, 17, 22, 3, 4, 13, 10, 2, 5, 12, 11, 30, 25, 16, 23,},
- /* i = 12 */ {0, 14, 28, 18, 29, 19, 1, 15, 31, 17, 3, 13, 2, 12, 30, 16, 27, 21, 7, 9, 6, 8, 26, 20, 4, 10, 24, 22, 25, 23, 5, 11,},
- /* i = 13 */ {0, 28, 29, 1, 31, 3, 2, 30, 27, 7, 6, 26, 4, 24, 25, 5, 19, 15, 14, 18, 12, 16, 17, 13, 8, 20, 21, 9, 23, 11, 10, 22,},
- /* i = 14 */ {0, 29, 31, 2, 27, 6, 4, 25, 19, 14, 12, 17, 8, 21, 23, 10, 3, 30, 28, 1, 24, 5, 7, 26, 16, 13, 15, 18, 11, 22, 20, 9,},
- /* i = 15 */ {0, 31, 27, 4, 19, 12, 8, 23, 3, 28, 24, 7, 16, 15, 11, 20, 6, 25, 29, 2, 21, 10, 14, 17, 5, 26, 30, 1, 22, 9, 13, 18,},
- /* i = 16 */ {0, 27, 19, 8, 3, 24, 16, 11, 6, 29, 21, 14, 5, 30, 22, 13, 12, 23, 31, 4, 15, 20, 28, 7, 10, 17, 25, 2, 9, 18, 26, 1,},
- /* i = 17 */ {0, 19, 3, 16, 6, 21, 5, 22, 12, 31, 15, 28, 10, 25, 9, 26, 24, 11, 27, 8, 30, 13, 29, 14, 20, 7, 23, 4, 18, 1, 17, 2,},
- /* i = 18 */ {0, 3, 6, 5, 12, 15, 10, 9, 24, 27, 30, 29, 20, 23, 18, 17, 21, 22, 19, 16, 25, 26, 31, 28, 13, 14, 11, 8, 1, 2, 7, 4,},
- /* i = 19 */ {0, 6, 12, 10, 24, 30, 20, 18, 21, 19, 25, 31, 13, 11, 1, 7, 15, 9, 3, 5, 23, 17, 27, 29, 26, 28, 22, 16, 2, 4, 14, 8,},
- /* i = 20 */ {0, 12, 24, 20, 21, 25, 13, 1, 15, 3, 23, 27, 26, 22, 2, 14, 30, 18, 6, 10, 11, 7, 19, 31, 17, 29, 9, 5, 4, 8, 28, 16,},
- /* i = 21 */ {0, 24, 21, 13, 15, 23, 26, 2, 30, 6, 11, 19, 17, 9, 4, 28, 25, 1, 12, 20, 22, 14, 3, 27, 7, 31, 18, 10, 8, 16, 29, 5,},
- /* i = 22 */ {0, 21, 15, 26, 30, 11, 17, 4, 25, 12, 22, 3, 7, 18, 8, 29, 23, 2, 24, 13, 9, 28, 6, 19, 14, 27, 1, 20, 16, 5, 31, 10,},
- /* i = 23 */ {0, 15, 30, 17, 25, 22, 7, 8, 23, 24, 9, 6, 14, 1, 16, 31, 11, 4, 21, 26, 18, 29, 12, 3, 28, 19, 2, 13, 5, 10, 27, 20,},
- /* i = 24 */ {0, 30, 25, 7, 23, 9, 14, 16, 11, 21, 18, 12, 28, 2, 5, 27, 22, 8, 15, 17, 1, 31, 24, 6, 29, 3, 4, 26, 10, 20, 19, 13,},
- /* i = 25 */ {0, 25, 23, 14, 11, 18, 28, 5, 22, 15, 1, 24, 29, 4, 10, 19, 9, 16, 30, 7, 2, 27, 21, 12, 31, 6, 8, 17, 20, 13, 3, 26,},
- /* i = 26 */ {0, 23, 11, 28, 22, 1, 29, 10, 9, 30, 2, 21, 31, 8, 20, 3, 18, 5, 25, 14, 4, 19, 15, 24, 27, 12, 16, 7, 13, 26, 6, 17,},
- /* i = 27 */ {0, 11, 22, 29, 9, 2, 31, 20, 18, 25, 4, 15, 27, 16, 13, 6, 1, 10, 23, 28, 8, 3, 30, 21, 19, 24, 5, 14, 26, 17, 12, 7,},
- /* i = 28 */ {0, 22, 9, 31, 18, 4, 27, 13, 1, 23, 8, 30, 19, 5, 26, 12, 2, 20, 11, 29, 16, 6, 25, 15, 3, 21, 10, 28, 17, 7, 24, 14,},
- /* i = 29 */ {0, 9, 18, 27, 1, 8, 19, 26, 2, 11, 16, 25, 3, 10, 17, 24, 4, 13, 22, 31, 5, 12, 23, 30, 6, 15, 20, 29, 7, 14, 21, 28,},
- /* i = 30 */ {0, 18, 1, 19, 2, 16, 3, 17, 4, 22, 5, 23, 6, 20, 7, 21, 8, 26, 9, 27, 10, 24, 11, 25, 12, 30, 13, 31, 14, 28, 15, 29,},
- /* i = 31 */ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,},
-};
-#define RF_Q_DATA_COL(col_num) rf_rn[col_num],rf_qfor[28-(col_num)]
-RF_ua1024_t rf_qinv[1]; /* don't compile monster table into kernel */
-
-#endif /* (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 >
- * 0) */
-#endif /* !_RF__RF_INVERTQ_H_ */
diff --git a/sys/dev/raidframe/rf_kintf.h b/sys/dev/raidframe/rf_kintf.h
deleted file mode 100644
index ae2697b..0000000
--- a/sys/dev/raidframe/rf_kintf.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_kintf.h,v 1.15 2000/10/20 02:24:45 oster Exp $ */
-/*
- * rf_kintf.h
- *
- * RAIDframe exported kernel interface
- */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Jim Zelenka
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#ifndef _RF__RF_KINTF_H_
-#define _RF__RF_KINTF_H_
-
-#include <dev/raidframe/rf_types.h>
-
-#if defined(__NetBSD__)
-#define RF_LTSLEEP(cond, pri, text, time, mutex) \
- ltsleep(cond, pri, text, time, mutex)
-#elif defined(__FreeBSD__)
-#if __FreeBSD_version > 500005
-#define RF_LTSLEEP(cond, pri, text, time, mutex) \
- msleep(cond, mutex, pri, text, time);
-#else
-static __inline int
-RF_LTSLEEP(void *cond, int pri, const char *text, int time, struct simplelock *mutex)
-{
- int ret;
- if (mutex != NULL)
- simple_unlock(mutex);
- ret = tsleep(cond, pri, text, time);
- if (mutex != NULL)
- simple_lock(mutex);
- return (ret);
-}
-#endif
-#endif
-
-int rf_GetSpareTableFromDaemon(RF_SparetWait_t * req);
-
-void raidstart(RF_Raid_t * raidPtr);
-int rf_DispatchKernelIO(RF_DiskQueue_t * queue, RF_DiskQueueData_t * req);
-
-int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
-int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
-
-#define RF_NORMAL_COMPONENT_UPDATE 0
-#define RF_FINAL_COMPONENT_UPDATE 1
-void rf_update_component_labels(RF_Raid_t *, int);
-int raidlookup(char *, RF_Thread_t, struct vnode **);
-int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
-int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
-void raid_init_component_label(RF_Raid_t *, RF_ComponentLabel_t *);
-void rf_print_component_label(RF_ComponentLabel_t *);
-void rf_UnconfigureVnodes( RF_Raid_t * );
-void rf_close_component( RF_Raid_t *, struct vnode *, int);
-void rf_disk_unbusy(RF_RaidAccessDesc_t *);
-int raid_getcomponentsize(RF_Raid_t *, RF_RowCol_t, RF_RowCol_t);
-#endif /* _RF__RF_KINTF_H_ */
diff --git a/sys/dev/raidframe/rf_layout.c b/sys/dev/raidframe/rf_layout.c
deleted file mode 100644
index 53badbd..0000000
--- a/sys/dev/raidframe/rf_layout.c
+++ /dev/null
@@ -1,492 +0,0 @@
-/* $NetBSD: rf_layout.c,v 1.9 2001/01/27 19:34:43 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/* rf_layout.c -- driver code dealing with layout and mapping issues
- */
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_archs.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_configure.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_desc.h>
-#include <dev/raidframe/rf_decluster.h>
-#include <dev/raidframe/rf_pq.h>
-#include <dev/raidframe/rf_declusterPQ.h>
-#include <dev/raidframe/rf_raid0.h>
-#include <dev/raidframe/rf_raid1.h>
-#include <dev/raidframe/rf_raid4.h>
-#include <dev/raidframe/rf_raid5.h>
-#include <dev/raidframe/rf_states.h>
-#if RF_INCLUDE_RAID5_RS > 0
-#include <dev/raidframe/rf_raid5_rotatedspare.h>
-#endif /* RF_INCLUDE_RAID5_RS > 0 */
-#if RF_INCLUDE_CHAINDECLUSTER > 0
-#include <dev/raidframe/rf_chaindecluster.h>
-#endif /* RF_INCLUDE_CHAINDECLUSTER > 0 */
-#if RF_INCLUDE_INTERDECLUSTER > 0
-#include <dev/raidframe/rf_interdecluster.h>
-#endif /* RF_INCLUDE_INTERDECLUSTER > 0 */
-#if RF_INCLUDE_PARITYLOGGING > 0
-#include <dev/raidframe/rf_paritylogging.h>
-#endif /* RF_INCLUDE_PARITYLOGGING > 0 */
-#if RF_INCLUDE_EVENODD > 0
-#include <dev/raidframe/rf_evenodd.h>
-#endif /* RF_INCLUDE_EVENODD > 0 */
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_driver.h>
-#include <dev/raidframe/rf_parityscan.h>
-#include <dev/raidframe/rf_reconbuffer.h>
-#include <dev/raidframe/rf_reconutil.h>
-
-/***********************************************************************
- *
- * the layout switch defines all the layouts that are supported.
- * fields are: layout ID, init routine, shutdown routine, map
- * sector, map parity, identify stripe, dag selection, map stripeid
- * to parity stripe id (optional), num faults tolerated, special
- * flags.
- *
- ***********************************************************************/
-
-static RF_AccessState_t DefaultStates[] = {rf_QuiesceState,
- rf_IncrAccessesCountState,
- rf_MapState,
- rf_LockState,
- rf_CreateDAGState,
- rf_ExecuteDAGState,
- rf_ProcessDAGState,
- rf_DecrAccessesCountState,
- rf_CleanupState,
- rf_LastState};
-
-#define RF_NU(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p
-
-/* Note that if you add any new RAID types to this list, that you must
- also update the mapsw[] table in the raidctl sources */
-
-static RF_LayoutSW_t mapsw[] = {
-#if RF_INCLUDE_PARITY_DECLUSTERING > 0
- /* parity declustering */
- {'T', "Parity declustering",
- RF_NU(
- rf_ConfigureDeclustered,
- rf_MapSectorDeclustered, rf_MapParityDeclustered, NULL,
- rf_IdentifyStripeDeclustered,
- rf_RaidFiveDagSelect,
- rf_MapSIDToPSIDDeclustered,
- rf_GetDefaultHeadSepLimitDeclustered,
- rf_GetDefaultNumFloatingReconBuffersDeclustered,
- NULL, NULL,
- rf_SubmitReconBufferBasic,
- rf_VerifyParityBasic,
- 1,
- DefaultStates,
- 0)
- },
-#endif
-
-#if RF_INCLUDE_PARITY_DECLUSTERING_DS > 0
- /* parity declustering with distributed sparing */
- {'D', "Distributed sparing parity declustering",
- RF_NU(
- rf_ConfigureDeclusteredDS,
- rf_MapSectorDeclustered, rf_MapParityDeclustered, NULL,
- rf_IdentifyStripeDeclustered,
- rf_RaidFiveDagSelect,
- rf_MapSIDToPSIDDeclustered,
- rf_GetDefaultHeadSepLimitDeclustered,
- rf_GetDefaultNumFloatingReconBuffersDeclustered,
- rf_GetNumSpareRUsDeclustered, rf_InstallSpareTable,
- rf_SubmitReconBufferBasic,
- rf_VerifyParityBasic,
- 1,
- DefaultStates,
- RF_DISTRIBUTE_SPARE | RF_BD_DECLUSTERED)
- },
-#endif
-
-#if RF_INCLUDE_DECL_PQ > 0
- /* declustered P+Q */
- {'Q', "Declustered P+Q",
- RF_NU(
- rf_ConfigureDeclusteredPQ,
- rf_MapSectorDeclusteredPQ, rf_MapParityDeclusteredPQ, rf_MapQDeclusteredPQ,
- rf_IdentifyStripeDeclusteredPQ,
- rf_PQDagSelect,
- rf_MapSIDToPSIDDeclustered,
- rf_GetDefaultHeadSepLimitDeclustered,
- rf_GetDefaultNumFloatingReconBuffersPQ,
- NULL, NULL,
- NULL,
- rf_VerifyParityBasic,
- 2,
- DefaultStates,
- 0)
- },
-#endif /* RF_INCLUDE_DECL_PQ > 0 */
-
-#if RF_INCLUDE_RAID5_RS > 0
- /* RAID 5 with rotated sparing */
- {'R', "RAID Level 5 rotated sparing",
- RF_NU(
- rf_ConfigureRAID5_RS,
- rf_MapSectorRAID5_RS, rf_MapParityRAID5_RS, NULL,
- rf_IdentifyStripeRAID5_RS,
- rf_RaidFiveDagSelect,
- rf_MapSIDToPSIDRAID5_RS,
- rf_GetDefaultHeadSepLimitRAID5,
- rf_GetDefaultNumFloatingReconBuffersRAID5,
- rf_GetNumSpareRUsRAID5_RS, NULL,
- rf_SubmitReconBufferBasic,
- rf_VerifyParityBasic,
- 1,
- DefaultStates,
- RF_DISTRIBUTE_SPARE)
- },
-#endif /* RF_INCLUDE_RAID5_RS > 0 */
-
-#if RF_INCLUDE_CHAINDECLUSTER > 0
- /* Chained Declustering */
- {'C', "Chained Declustering",
- RF_NU(
- rf_ConfigureChainDecluster,
- rf_MapSectorChainDecluster, rf_MapParityChainDecluster, NULL,
- rf_IdentifyStripeChainDecluster,
- rf_RAIDCDagSelect,
- rf_MapSIDToPSIDChainDecluster,
- NULL,
- NULL,
- rf_GetNumSpareRUsChainDecluster, NULL,
- rf_SubmitReconBufferBasic,
- rf_VerifyParityBasic,
- 1,
- DefaultStates,
- 0)
- },
-#endif /* RF_INCLUDE_CHAINDECLUSTER > 0 */
-
-#if RF_INCLUDE_INTERDECLUSTER > 0
- /* Interleaved Declustering */
- {'I', "Interleaved Declustering",
- RF_NU(
- rf_ConfigureInterDecluster,
- rf_MapSectorInterDecluster, rf_MapParityInterDecluster, NULL,
- rf_IdentifyStripeInterDecluster,
- rf_RAIDIDagSelect,
- rf_MapSIDToPSIDInterDecluster,
- rf_GetDefaultHeadSepLimitInterDecluster,
- rf_GetDefaultNumFloatingReconBuffersInterDecluster,
- rf_GetNumSpareRUsInterDecluster, NULL,
- rf_SubmitReconBufferBasic,
- rf_VerifyParityBasic,
- 1,
- DefaultStates,
- RF_DISTRIBUTE_SPARE)
- },
-#endif /* RF_INCLUDE_INTERDECLUSTER > 0 */
-
-#if RF_INCLUDE_RAID0 > 0
- /* RAID level 0 */
- {'0', "RAID Level 0",
- RF_NU(
- rf_ConfigureRAID0,
- rf_MapSectorRAID0, rf_MapParityRAID0, NULL,
- rf_IdentifyStripeRAID0,
- rf_RAID0DagSelect,
- rf_MapSIDToPSIDRAID0,
- NULL,
- NULL,
- NULL, NULL,
- NULL,
- rf_VerifyParityRAID0,
- 0,
- DefaultStates,
- 0)
- },
-#endif /* RF_INCLUDE_RAID0 > 0 */
-
-#if RF_INCLUDE_RAID1 > 0
- /* RAID level 1 */
- {'1', "RAID Level 1",
- RF_NU(
- rf_ConfigureRAID1,
- rf_MapSectorRAID1, rf_MapParityRAID1, NULL,
- rf_IdentifyStripeRAID1,
- rf_RAID1DagSelect,
- rf_MapSIDToPSIDRAID1,
- NULL,
- NULL,
- NULL, NULL,
- rf_SubmitReconBufferRAID1,
- rf_VerifyParityRAID1,
- 1,
- DefaultStates,
- 0)
- },
-#endif /* RF_INCLUDE_RAID1 > 0 */
-
-#if RF_INCLUDE_RAID4 > 0
- /* RAID level 4 */
- {'4', "RAID Level 4",
- RF_NU(
- rf_ConfigureRAID4,
- rf_MapSectorRAID4, rf_MapParityRAID4, NULL,
- rf_IdentifyStripeRAID4,
- rf_RaidFiveDagSelect,
- rf_MapSIDToPSIDRAID4,
- rf_GetDefaultHeadSepLimitRAID4,
- rf_GetDefaultNumFloatingReconBuffersRAID4,
- NULL, NULL,
- rf_SubmitReconBufferBasic,
- rf_VerifyParityBasic,
- 1,
- DefaultStates,
- 0)
- },
-#endif /* RF_INCLUDE_RAID4 > 0 */
-
-#if RF_INCLUDE_RAID5 > 0
- /* RAID level 5 */
- {'5', "RAID Level 5",
- RF_NU(
- rf_ConfigureRAID5,
- rf_MapSectorRAID5, rf_MapParityRAID5, NULL,
- rf_IdentifyStripeRAID5,
- rf_RaidFiveDagSelect,
- rf_MapSIDToPSIDRAID5,
- rf_GetDefaultHeadSepLimitRAID5,
- rf_GetDefaultNumFloatingReconBuffersRAID5,
- NULL, NULL,
- rf_SubmitReconBufferBasic,
- rf_VerifyParityBasic,
- 1,
- DefaultStates,
- 0)
- },
-#endif /* RF_INCLUDE_RAID5 > 0 */
-
-#if RF_INCLUDE_EVENODD > 0
- /* Evenodd */
- {'E', "EvenOdd",
- RF_NU(
- rf_ConfigureEvenOdd,
- rf_MapSectorRAID5, rf_MapParityEvenOdd, rf_MapEEvenOdd,
- rf_IdentifyStripeEvenOdd,
- rf_EODagSelect,
- rf_MapSIDToPSIDRAID5,
- NULL,
- NULL,
- NULL, NULL,
- NULL, /* no reconstruction, yet */
- rf_VerifyParityEvenOdd,
- 2,
- DefaultStates,
- 0)
- },
-#endif /* RF_INCLUDE_EVENODD > 0 */
-
-#if RF_INCLUDE_EVENODD > 0
- /* Declustered Evenodd */
- {'e', "Declustered EvenOdd",
- RF_NU(
- rf_ConfigureDeclusteredPQ,
- rf_MapSectorDeclusteredPQ, rf_MapParityDeclusteredPQ, rf_MapQDeclusteredPQ,
- rf_IdentifyStripeDeclusteredPQ,
- rf_EODagSelect,
- rf_MapSIDToPSIDRAID5,
- rf_GetDefaultHeadSepLimitDeclustered,
- rf_GetDefaultNumFloatingReconBuffersPQ,
- NULL, NULL,
- NULL, /* no reconstruction, yet */
- rf_VerifyParityEvenOdd,
- 2,
- DefaultStates,
- 0)
- },
-#endif /* RF_INCLUDE_EVENODD > 0 */
-
-#if RF_INCLUDE_PARITYLOGGING > 0
- /* parity logging */
- {'L', "Parity logging",
- RF_NU(
- rf_ConfigureParityLogging,
- rf_MapSectorParityLogging, rf_MapParityParityLogging, NULL,
- rf_IdentifyStripeParityLogging,
- rf_ParityLoggingDagSelect,
- rf_MapSIDToPSIDParityLogging,
- rf_GetDefaultHeadSepLimitParityLogging,
- rf_GetDefaultNumFloatingReconBuffersParityLogging,
- NULL, NULL,
- rf_SubmitReconBufferBasic,
- NULL,
- 1,
- DefaultStates,
- 0)
- },
-#endif /* RF_INCLUDE_PARITYLOGGING > 0 */
-
- /* end-of-list marker */
- {'\0', NULL,
- RF_NU(
- NULL,
- NULL, NULL, NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL, NULL,
- NULL,
- NULL,
- 0,
- NULL,
- 0)
- }
-};
-
-RF_LayoutSW_t *
-rf_GetLayout(RF_ParityConfig_t parityConfig)
-{
- RF_LayoutSW_t *p;
-
- /* look up the specific layout */
- for (p = &mapsw[0]; p->parityConfig; p++)
- if (p->parityConfig == parityConfig)
- break;
- if (!p->parityConfig)
- return (NULL);
- RF_ASSERT(p->parityConfig == parityConfig);
- return (p);
-}
-
-/*****************************************************************************
- *
- * ConfigureLayout --
- *
- * read the configuration file and set up the RAID layout parameters.
- * After reading common params, invokes the layout-specific
- * configuration routine to finish the configuration.
- *
- ****************************************************************************/
-int
-rf_ConfigureLayout(
- RF_ShutdownList_t ** listp,
- RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr)
-{
- RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
- RF_ParityConfig_t parityConfig;
- RF_LayoutSW_t *p;
- int retval;
-
- layoutPtr->sectorsPerStripeUnit = cfgPtr->sectPerSU;
- layoutPtr->SUsPerPU = cfgPtr->SUsPerPU;
- layoutPtr->SUsPerRU = cfgPtr->SUsPerRU;
- parityConfig = cfgPtr->parityConfig;
-
- if (layoutPtr->sectorsPerStripeUnit <= 0) {
- RF_ERRORMSG2("raid%d: Invalid sectorsPerStripeUnit: %d\n",
- raidPtr->raidid,
- (int)layoutPtr->sectorsPerStripeUnit );
- return (EINVAL);
- }
-
- layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit;
-
- p = rf_GetLayout(parityConfig);
- if (p == NULL) {
- RF_ERRORMSG1("Unknown parity configuration '%c'", parityConfig);
- return (EINVAL);
- }
- RF_ASSERT(p->parityConfig == parityConfig);
- layoutPtr->map = p;
-
- /* initialize the specific layout */
-
- retval = (p->Configure) (listp, raidPtr, cfgPtr);
-
- if (retval)
- return (retval);
-
- layoutPtr->dataBytesPerStripe = layoutPtr->dataSectorsPerStripe << raidPtr->logBytesPerSector;
- raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
-
- if (rf_forceNumFloatingReconBufs >= 0) {
- raidPtr->numFloatingReconBufs = rf_forceNumFloatingReconBufs;
- } else {
- raidPtr->numFloatingReconBufs = rf_GetDefaultNumFloatingReconBuffers(raidPtr);
- }
-
- if (rf_forceHeadSepLimit >= 0) {
- raidPtr->headSepLimit = rf_forceHeadSepLimit;
- } else {
- raidPtr->headSepLimit = rf_GetDefaultHeadSepLimit(raidPtr);
- }
-
- printf("RAIDFRAME: Configure (%s): total number of sectors is %lu (%lu MB)\n",
- layoutPtr->map->configName,
- (unsigned long) raidPtr->totalSectors,
- (unsigned long) (raidPtr->totalSectors / 1024 * (1 << raidPtr->logBytesPerSector) / 1024));
- if (raidPtr->headSepLimit >= 0) {
- printf("RAIDFRAME(%s): Using %ld floating recon bufs with head sep limit %ld\n",
- layoutPtr->map->configName, (long) raidPtr->numFloatingReconBufs, (long) raidPtr->headSepLimit);
- } else {
- printf("RAIDFRAME(%s): Using %ld floating recon bufs with no head sep limit\n",
- layoutPtr->map->configName, (long) raidPtr->numFloatingReconBufs);
- }
-
- return (0);
-}
-/* typically there is a 1-1 mapping between stripes and parity stripes.
- * however, the declustering code supports packing multiple stripes into
- * a single parity stripe, so as to increase the size of the reconstruction
- * unit without affecting the size of the stripe unit. This routine finds
- * the parity stripe identifier associated with a stripe ID. There is also
- * a RaidAddressToParityStripeID macro in layout.h
- */
-RF_StripeNum_t
-rf_MapStripeIDToParityStripeID(layoutPtr, stripeID, which_ru)
- RF_RaidLayout_t *layoutPtr;
- RF_StripeNum_t stripeID;
- RF_ReconUnitNum_t *which_ru;
-{
- RF_StripeNum_t parityStripeID;
-
- /* quick exit in the common case of SUsPerPU==1 */
- if ((layoutPtr->SUsPerPU == 1) || !layoutPtr->map->MapSIDToPSID) {
- *which_ru = 0;
- return (stripeID);
- } else {
- (layoutPtr->map->MapSIDToPSID) (layoutPtr, stripeID, &parityStripeID, which_ru);
- }
- return (parityStripeID);
-}
diff --git a/sys/dev/raidframe/rf_layout.h b/sys/dev/raidframe/rf_layout.h
deleted file mode 100644
index 2482556..0000000
--- a/sys/dev/raidframe/rf_layout.h
+++ /dev/null
@@ -1,349 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_layout.h,v 1.5 2001/01/26 04:14:14 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/* rf_layout.h -- header file defining layout data structures
- */
-
-#ifndef _RF__RF_LAYOUT_H_
-#define _RF__RF_LAYOUT_H_
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_archs.h>
-#include <dev/raidframe/rf_alloclist.h>
-
-#ifndef _KERNEL
-#include <stdio.h>
-#endif
-
-/*****************************************************************************************
- *
- * This structure identifies all layout-specific operations and parameters.
- *
- ****************************************************************************************/
-
-typedef struct RF_LayoutSW_s {
- RF_ParityConfig_t parityConfig;
- const char *configName;
-
-#ifndef _KERNEL
- /* layout-specific parsing */
- int (*MakeLayoutSpecific) (FILE * fp, RF_Config_t * cfgPtr, void *arg);
- void *makeLayoutSpecificArg;
-#endif /* !KERNEL */
-
-#if RF_UTILITY == 0
- /* initialization routine */
- int (*Configure) (RF_ShutdownList_t ** shutdownListp, RF_Raid_t * raidPtr, RF_Config_t * cfgPtr);
-
- /* routine to map RAID sector address -> physical (row, col, offset) */
- void (*MapSector) (RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
- RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap);
-
- /* routine to map RAID sector address -> physical (r,c,o) of parity
- * unit */
- void (*MapParity) (RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
- RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap);
-
- /* routine to map RAID sector address -> physical (r,c,o) of Q unit */
- void (*MapQ) (RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, RF_RowCol_t * row,
- RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap);
-
- /* routine to identify the disks comprising a stripe */
- void (*IdentifyStripe) (RF_Raid_t * raidPtr, RF_RaidAddr_t addr,
- RF_RowCol_t ** diskids, RF_RowCol_t * outRow);
-
- /* routine to select a dag */
- void (*SelectionFunc) (RF_Raid_t * raidPtr, RF_IoType_t type,
- RF_AccessStripeMap_t * asmap,
- RF_VoidFuncPtr *);
-#if 0
- void (**createFunc) (RF_Raid_t *,
- RF_AccessStripeMap_t *,
- RF_DagHeader_t *, void *,
- RF_RaidAccessFlags_t,
- RF_AllocListElem_t *);
-
-#endif
-
- /* map a stripe ID to a parity stripe ID. This is typically the
- * identity mapping */
- void (*MapSIDToPSID) (RF_RaidLayout_t * layoutPtr, RF_StripeNum_t stripeID,
- RF_StripeNum_t * psID, RF_ReconUnitNum_t * which_ru);
-
- /* get default head separation limit (may be NULL) */
- RF_HeadSepLimit_t(*GetDefaultHeadSepLimit) (RF_Raid_t * raidPtr);
-
- /* get default num recon buffers (may be NULL) */
- int (*GetDefaultNumFloatingReconBuffers) (RF_Raid_t * raidPtr);
-
- /* get number of spare recon units (may be NULL) */
- RF_ReconUnitCount_t(*GetNumSpareRUs) (RF_Raid_t * raidPtr);
-
- /* spare table installation (may be NULL) */
- int (*InstallSpareTable) (RF_Raid_t * raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol);
-
- /* recon buffer submission function */
- int (*SubmitReconBuffer) (RF_ReconBuffer_t * rbuf, int keep_it,
- int use_committed);
-
- /*
- * verify that parity information for a stripe is correct
- * see rf_parityscan.h for return vals
- */
- int (*VerifyParity) (RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr,
- RF_PhysDiskAddr_t * parityPDA, int correct_it, RF_RaidAccessFlags_t flags);
-
- /* number of faults tolerated by this mapping */
- int faultsTolerated;
-
- /* states to step through in an access. Must end with "LastState". The
- * default is DefaultStates in rf_layout.c */
- RF_AccessState_t *states;
-
- RF_AccessStripeMapFlags_t flags;
-#endif /* RF_UTILITY == 0 */
-} RF_LayoutSW_t;
-/* enables remapping to spare location under dist sparing */
-#define RF_REMAP 1
-#define RF_DONT_REMAP 0
-
-/*
- * Flags values for RF_AccessStripeMapFlags_t
- */
-#define RF_NO_STRIPE_LOCKS 0x0001 /* suppress stripe locks */
-#define RF_DISTRIBUTE_SPARE 0x0002 /* distribute spare space in archs
- * that support it */
-#define RF_BD_DECLUSTERED 0x0004 /* declustering uses block designs */
-
-/*************************************************************************
- *
- * this structure forms the layout component of the main Raid
- * structure. It describes everything needed to define and perform
- * the mapping of logical RAID addresses <-> physical disk addresses.
- *
- *************************************************************************/
-struct RF_RaidLayout_s {
- /* configuration parameters */
- RF_SectorCount_t sectorsPerStripeUnit; /* number of sectors in one
- * stripe unit */
- RF_StripeCount_t SUsPerPU; /* stripe units per parity unit */
- RF_StripeCount_t SUsPerRU; /* stripe units per reconstruction
- * unit */
-
- /* redundant-but-useful info computed from the above, used in all
- * layouts */
- RF_StripeCount_t numStripe; /* total number of stripes in the
- * array */
- RF_SectorCount_t dataSectorsPerStripe;
- RF_StripeCount_t dataStripeUnitsPerDisk;
- u_int bytesPerStripeUnit;
- u_int dataBytesPerStripe;
- RF_StripeCount_t numDataCol; /* number of SUs of data per stripe
- * (name here is a la RAID4) */
- RF_StripeCount_t numParityCol; /* number of SUs of parity per stripe.
- * Always 1 for now */
- RF_StripeCount_t numParityLogCol; /* number of SUs of parity log
- * per stripe. Always 1 for
- * now */
- RF_StripeCount_t stripeUnitsPerDisk;
-
- RF_LayoutSW_t *map; /* ptr to struct holding mapping fns and
- * information */
- void *layoutSpecificInfo; /* ptr to a structure holding
- * layout-specific params */
-};
-/*****************************************************************************************
- *
- * The mapping code returns a pointer to a list of AccessStripeMap structures, which
- * describes all the mapping information about an access. The list contains one
- * AccessStripeMap structure per stripe touched by the access. Each element in the list
- * contains a stripe identifier and a pointer to a list of PhysDiskAddr structuress. Each
- * element in this latter list describes the physical location of a stripe unit accessed
- * within the corresponding stripe.
- *
- ****************************************************************************************/
-
-#define RF_PDA_TYPE_DATA 0
-#define RF_PDA_TYPE_PARITY 1
-#define RF_PDA_TYPE_Q 2
-
-struct RF_PhysDiskAddr_s {
- RF_RowCol_t row, col; /* disk identifier */
- RF_SectorNum_t startSector; /* sector offset into the disk */
- RF_SectorCount_t numSector; /* number of sectors accessed */
- int type; /* used by higher levels: currently, data,
- * parity, or q */
- caddr_t bufPtr; /* pointer to buffer supplying/receiving data */
- RF_RaidAddr_t raidAddress; /* raid address corresponding to this
- * physical disk address */
- RF_PhysDiskAddr_t *next;
-};
-#define RF_MAX_FAILED_PDA RF_MAXCOL
-
-struct RF_AccessStripeMap_s {
- RF_StripeNum_t stripeID;/* the stripe index */
- RF_RaidAddr_t raidAddress; /* the starting raid address within
- * this stripe */
- RF_RaidAddr_t endRaidAddress; /* raid address one sector past the
- * end of the access */
- RF_SectorCount_t totalSectorsAccessed; /* total num sectors
- * identified in physInfo list */
- RF_StripeCount_t numStripeUnitsAccessed; /* total num elements in
- * physInfo list */
- int numDataFailed; /* number of failed data disks accessed */
- int numParityFailed;/* number of failed parity disks accessed (0
- * or 1) */
- int numQFailed; /* number of failed Q units accessed (0 or 1) */
- RF_AccessStripeMapFlags_t flags; /* various flags */
-#if 0
- RF_PhysDiskAddr_t *failedPDA; /* points to the PDA that has failed */
- RF_PhysDiskAddr_t *failedPDAtwo; /* points to the second PDA
- * that has failed, if any */
-#else
- int numFailedPDAs; /* number of failed phys addrs */
- RF_PhysDiskAddr_t *failedPDAs[RF_MAX_FAILED_PDA]; /* array of failed phys
- * addrs */
-#endif
- RF_PhysDiskAddr_t *physInfo; /* a list of PhysDiskAddr structs */
- RF_PhysDiskAddr_t *parityInfo; /* list of physical addrs for the
- * parity (P of P + Q ) */
- RF_PhysDiskAddr_t *qInfo; /* list of physical addrs for the Q of
- * P + Q */
- RF_LockReqDesc_t lockReqDesc; /* used for stripe locking */
- RF_RowCol_t origRow; /* the original row: we may redirect the acc
- * to a different row */
- RF_AccessStripeMap_t *next;
-};
-/* flag values */
-#define RF_ASM_REDIR_LARGE_WRITE 0x00000001 /* allows large-write creation
- * code to redirect failed
- * accs */
-#define RF_ASM_BAILOUT_DAG_USED 0x00000002 /* allows us to detect
- * recursive calls to the
- * bailout write dag */
-#define RF_ASM_FLAGS_LOCK_TRIED 0x00000004 /* we've acquired the lock on
- * the first parity range in
- * this parity stripe */
-#define RF_ASM_FLAGS_LOCK_TRIED2 0x00000008 /* we've acquired the lock on
- * the 2nd parity range in
- * this parity stripe */
-#define RF_ASM_FLAGS_FORCE_TRIED 0x00000010 /* we've done the force-recon
- * call on this parity stripe */
-#define RF_ASM_FLAGS_RECON_BLOCKED 0x00000020 /* we blocked recon => we must
- * unblock it later */
-
-struct RF_AccessStripeMapHeader_s {
- RF_StripeCount_t numStripes; /* total number of stripes touched by
- * this acc */
- RF_AccessStripeMap_t *stripeMap; /* pointer to the actual map.
- * Also used for making lists */
- RF_AccessStripeMapHeader_t *next;
-};
-/*****************************************************************************************
- *
- * various routines mapping addresses in the RAID address space. These work across
- * all layouts. DON'T PUT ANY LAYOUT-SPECIFIC CODE HERE.
- *
- ****************************************************************************************/
-
-/* return the identifier of the stripe containing the given address */
-#define rf_RaidAddressToStripeID(_layoutPtr_, _addr_) \
- ( ((_addr_) / (_layoutPtr_)->sectorsPerStripeUnit) / (_layoutPtr_)->numDataCol )
-
-/* return the raid address of the start of the indicates stripe ID */
-#define rf_StripeIDToRaidAddress(_layoutPtr_, _sid_) \
- ( ((_sid_) * (_layoutPtr_)->sectorsPerStripeUnit) * (_layoutPtr_)->numDataCol )
-
-/* return the identifier of the stripe containing the given stripe unit id */
-#define rf_StripeUnitIDToStripeID(_layoutPtr_, _addr_) \
- ( (_addr_) / (_layoutPtr_)->numDataCol )
-
-/* return the identifier of the stripe unit containing the given address */
-#define rf_RaidAddressToStripeUnitID(_layoutPtr_, _addr_) \
- ( ((_addr_) / (_layoutPtr_)->sectorsPerStripeUnit) )
-
-/* return the RAID address of next stripe boundary beyond the given address */
-#define rf_RaidAddressOfNextStripeBoundary(_layoutPtr_, _addr_) \
- ( (((_addr_)/(_layoutPtr_)->dataSectorsPerStripe)+1) * (_layoutPtr_)->dataSectorsPerStripe )
-
-/* return the RAID address of the start of the stripe containing the given address */
-#define rf_RaidAddressOfPrevStripeBoundary(_layoutPtr_, _addr_) \
- ( (((_addr_)/(_layoutPtr_)->dataSectorsPerStripe)+0) * (_layoutPtr_)->dataSectorsPerStripe )
-
-/* return the RAID address of next stripe unit boundary beyond the given address */
-#define rf_RaidAddressOfNextStripeUnitBoundary(_layoutPtr_, _addr_) \
- ( (((_addr_)/(_layoutPtr_)->sectorsPerStripeUnit)+1L)*(_layoutPtr_)->sectorsPerStripeUnit )
-
-/* return the RAID address of the start of the stripe unit containing RAID address _addr_ */
-#define rf_RaidAddressOfPrevStripeUnitBoundary(_layoutPtr_, _addr_) \
- ( (((_addr_)/(_layoutPtr_)->sectorsPerStripeUnit)+0)*(_layoutPtr_)->sectorsPerStripeUnit )
-
-/* returns the offset into the stripe. used by RaidAddressStripeAligned */
-#define rf_RaidAddressStripeOffset(_layoutPtr_, _addr_) \
- ( (_addr_) % ((_layoutPtr_)->dataSectorsPerStripe) )
-
-/* returns the offset into the stripe unit. */
-#define rf_StripeUnitOffset(_layoutPtr_, _addr_) \
- ( (_addr_) % ((_layoutPtr_)->sectorsPerStripeUnit) )
-
-/* returns nonzero if the given RAID address is stripe-aligned */
-#define rf_RaidAddressStripeAligned( __layoutPtr__, __addr__ ) \
- ( rf_RaidAddressStripeOffset(__layoutPtr__, __addr__) == 0 )
-
-/* returns nonzero if the given address is stripe-unit aligned */
-#define rf_StripeUnitAligned( __layoutPtr__, __addr__ ) \
- ( rf_StripeUnitOffset(__layoutPtr__, __addr__) == 0 )
-
-/* convert an address expressed in RAID blocks to/from an addr expressed in bytes */
-#define rf_RaidAddressToByte(_raidPtr_, _addr_) \
- ( (_addr_) << ( (_raidPtr_)->logBytesPerSector ) )
-
-#define rf_ByteToRaidAddress(_raidPtr_, _addr_) \
- ( (_addr_) >> ( (_raidPtr_)->logBytesPerSector ) )
-
-/* convert a raid address to/from a parity stripe ID. Conversion to raid address is easy,
- * since we're asking for the address of the first sector in the parity stripe. Conversion to a
- * parity stripe ID is more complex, since stripes are not contiguously allocated in
- * parity stripes.
- */
-#define rf_RaidAddressToParityStripeID(_layoutPtr_, _addr_, _ru_num_) \
- rf_MapStripeIDToParityStripeID( (_layoutPtr_), rf_RaidAddressToStripeID( (_layoutPtr_), (_addr_) ), (_ru_num_) )
-
-#define rf_ParityStripeIDToRaidAddress(_layoutPtr_, _psid_) \
- ( (_psid_) * (_layoutPtr_)->SUsPerPU * (_layoutPtr_)->numDataCol * (_layoutPtr_)->sectorsPerStripeUnit )
-
-RF_LayoutSW_t *rf_GetLayout(RF_ParityConfig_t parityConfig);
-int
-rf_ConfigureLayout(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr);
-RF_StripeNum_t
-rf_MapStripeIDToParityStripeID(RF_RaidLayout_t * layoutPtr,
- RF_StripeNum_t stripeID, RF_ReconUnitNum_t * which_ru);
-
-#endif /* !_RF__RF_LAYOUT_H_ */
diff --git a/sys/dev/raidframe/rf_map.c b/sys/dev/raidframe/rf_map.c
deleted file mode 100644
index 22af549..0000000
--- a/sys/dev/raidframe/rf_map.c
+++ /dev/null
@@ -1,909 +0,0 @@
-/* $NetBSD: rf_map.c,v 1.5 2000/06/29 00:22:27 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/**************************************************************************
- *
- * map.c -- main code for mapping RAID addresses to physical disk addresses
- *
- **************************************************************************/
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_threadstuff.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_map.h>
-#include <dev/raidframe/rf_freelist.h>
-#include <dev/raidframe/rf_shutdown.h>
-
-static void rf_FreePDAList(RF_PhysDiskAddr_t * start, RF_PhysDiskAddr_t * end, int count);
-static void
-rf_FreeASMList(RF_AccessStripeMap_t * start, RF_AccessStripeMap_t * end,
- int count);
-
-/*****************************************************************************************
- *
- * MapAccess -- main 1st order mapping routine.
- *
- * Maps an access in the RAID address space to the corresponding set of physical disk
- * addresses. The result is returned as a list of AccessStripeMap structures, one per
- * stripe accessed. Each ASM structure contains a pointer to a list of PhysDiskAddr
- * structures, which describe the physical locations touched by the user access. Note
- * that this routine returns only static mapping information, i.e. the list of physical
- * addresses returned does not necessarily identify the set of physical locations that
- * will actually be read or written.
- *
- * The routine also maps the parity. The physical disk location returned always
- * indicates the entire parity unit, even when only a subset of it is being accessed.
- * This is because an access that is not stripe unit aligned but that spans a stripe
- * unit boundary may require access two distinct portions of the parity unit, and we
- * can't yet tell which portion(s) we'll actually need. We leave it up to the algorithm
- * selection code to decide what subset of the parity unit to access.
- *
- * Note that addresses in the RAID address space must always be maintained as
- * longs, instead of ints.
- *
- * This routine returns NULL if numBlocks is 0
- *
- ****************************************************************************************/
-
-RF_AccessStripeMapHeader_t *
-rf_MapAccess(raidPtr, raidAddress, numBlocks, buffer, remap)
- RF_Raid_t *raidPtr;
- RF_RaidAddr_t raidAddress; /* starting address in RAID address
- * space */
- RF_SectorCount_t numBlocks; /* number of blocks in RAID address
- * space to access */
- caddr_t buffer; /* buffer to supply/receive data */
- int remap; /* 1 => remap addresses to spare space */
-{
- RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
- RF_AccessStripeMapHeader_t *asm_hdr = NULL;
- RF_AccessStripeMap_t *asm_list = NULL, *asm_p = NULL;
- int faultsTolerated = layoutPtr->map->faultsTolerated;
- RF_RaidAddr_t startAddress = raidAddress; /* we'll change
- * raidAddress along the
- * way */
- RF_RaidAddr_t endAddress = raidAddress + numBlocks;
- RF_RaidDisk_t **disks = raidPtr->Disks;
-
- RF_PhysDiskAddr_t *pda_p, *pda_q;
- RF_StripeCount_t numStripes = 0;
- RF_RaidAddr_t stripeRealEndAddress, stripeEndAddress, nextStripeUnitAddress;
- RF_RaidAddr_t startAddrWithinStripe, lastRaidAddr;
- RF_StripeCount_t totStripes;
- RF_StripeNum_t stripeID, lastSID, SUID, lastSUID;
- RF_AccessStripeMap_t *asmList, *t_asm;
- RF_PhysDiskAddr_t *pdaList, *t_pda;
-
- /* allocate all the ASMs and PDAs up front */
- lastRaidAddr = raidAddress + numBlocks - 1;
- stripeID = rf_RaidAddressToStripeID(layoutPtr, raidAddress);
- lastSID = rf_RaidAddressToStripeID(layoutPtr, lastRaidAddr);
- totStripes = lastSID - stripeID + 1;
- SUID = rf_RaidAddressToStripeUnitID(layoutPtr, raidAddress);
- lastSUID = rf_RaidAddressToStripeUnitID(layoutPtr, lastRaidAddr);
-
- asmList = rf_AllocASMList(totStripes);
- pdaList = rf_AllocPDAList(lastSUID - SUID + 1 + faultsTolerated * totStripes); /* may also need pda(s)
- * per stripe for parity */
-
- if (raidAddress + numBlocks > raidPtr->totalSectors) {
- RF_ERRORMSG1("Unable to map access because offset (%d) was invalid\n",
- (int) raidAddress);
- return (NULL);
- }
- if (rf_mapDebug)
- rf_PrintRaidAddressInfo(raidPtr, raidAddress, numBlocks);
- for (; raidAddress < endAddress;) {
- /* make the next stripe structure */
- RF_ASSERT(asmList);
- t_asm = asmList;
- asmList = asmList->next;
- bzero((char *) t_asm, sizeof(RF_AccessStripeMap_t));
- if (!asm_p)
- asm_list = asm_p = t_asm;
- else {
- asm_p->next = t_asm;
- asm_p = asm_p->next;
- }
- numStripes++;
-
- /* map SUs from current location to the end of the stripe */
- asm_p->stripeID = /* rf_RaidAddressToStripeID(layoutPtr,
- raidAddress) */ stripeID++;
- stripeRealEndAddress = rf_RaidAddressOfNextStripeBoundary(layoutPtr, raidAddress);
- stripeEndAddress = RF_MIN(endAddress, stripeRealEndAddress);
- asm_p->raidAddress = raidAddress;
- asm_p->endRaidAddress = stripeEndAddress;
-
- /* map each stripe unit in the stripe */
- pda_p = NULL;
- startAddrWithinStripe = raidAddress; /* Raid addr of start of
- * portion of access
- * that is within this
- * stripe */
- for (; raidAddress < stripeEndAddress;) {
- RF_ASSERT(pdaList);
- t_pda = pdaList;
- pdaList = pdaList->next;
- bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t));
- if (!pda_p)
- asm_p->physInfo = pda_p = t_pda;
- else {
- pda_p->next = t_pda;
- pda_p = pda_p->next;
- }
-
- pda_p->type = RF_PDA_TYPE_DATA;
- (layoutPtr->map->MapSector) (raidPtr, raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), remap);
-
- /* mark any failures we find. failedPDA is don't-care
- * if there is more than one failure */
- pda_p->raidAddress = raidAddress; /* the RAID address
- * corresponding to this
- * physical disk address */
- nextStripeUnitAddress = rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, raidAddress);
- pda_p->numSector = RF_MIN(endAddress, nextStripeUnitAddress) - raidAddress;
- RF_ASSERT(pda_p->numSector != 0);
- rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 0);
- pda_p->bufPtr = buffer + rf_RaidAddressToByte(raidPtr, (raidAddress - startAddress));
- asm_p->totalSectorsAccessed += pda_p->numSector;
- asm_p->numStripeUnitsAccessed++;
- asm_p->origRow = pda_p->row; /* redundant but
- * harmless to do this
- * in every loop
- * iteration */
-
- raidAddress = RF_MIN(endAddress, nextStripeUnitAddress);
- }
-
- /* Map the parity. At this stage, the startSector and
- * numSector fields for the parity unit are always set to
- * indicate the entire parity unit. We may modify this after
- * mapping the data portion. */
- switch (faultsTolerated) {
- case 0:
- break;
- case 1: /* single fault tolerant */
- RF_ASSERT(pdaList);
- t_pda = pdaList;
- pdaList = pdaList->next;
- bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t));
- pda_p = asm_p->parityInfo = t_pda;
- pda_p->type = RF_PDA_TYPE_PARITY;
- (layoutPtr->map->MapParity) (raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe),
- &(pda_p->row), &(pda_p->col), &(pda_p->startSector), remap);
- pda_p->numSector = layoutPtr->sectorsPerStripeUnit;
- /* raidAddr may be needed to find unit to redirect to */
- pda_p->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe);
- rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 1);
- rf_ASMParityAdjust(asm_p->parityInfo, startAddrWithinStripe, endAddress, layoutPtr, asm_p);
-
- break;
- case 2: /* two fault tolerant */
- RF_ASSERT(pdaList && pdaList->next);
- t_pda = pdaList;
- pdaList = pdaList->next;
- bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t));
- pda_p = asm_p->parityInfo = t_pda;
- pda_p->type = RF_PDA_TYPE_PARITY;
- t_pda = pdaList;
- pdaList = pdaList->next;
- bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t));
- pda_q = asm_p->qInfo = t_pda;
- pda_q->type = RF_PDA_TYPE_Q;
- (layoutPtr->map->MapParity) (raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe),
- &(pda_p->row), &(pda_p->col), &(pda_p->startSector), remap);
- (layoutPtr->map->MapQ) (raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe),
- &(pda_q->row), &(pda_q->col), &(pda_q->startSector), remap);
- pda_q->numSector = pda_p->numSector = layoutPtr->sectorsPerStripeUnit;
- /* raidAddr may be needed to find unit to redirect to */
- pda_p->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe);
- pda_q->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe);
- /* failure mode stuff */
- rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 1);
- rf_ASMCheckStatus(raidPtr, pda_q, asm_p, disks, 1);
- rf_ASMParityAdjust(asm_p->parityInfo, startAddrWithinStripe, endAddress, layoutPtr, asm_p);
- rf_ASMParityAdjust(asm_p->qInfo, startAddrWithinStripe, endAddress, layoutPtr, asm_p);
- break;
- }
- }
- RF_ASSERT(asmList == NULL && pdaList == NULL);
- /* make the header structure */
- asm_hdr = rf_AllocAccessStripeMapHeader();
- RF_ASSERT(numStripes == totStripes);
- asm_hdr->numStripes = numStripes;
- asm_hdr->stripeMap = asm_list;
-
- if (rf_mapDebug)
- rf_PrintAccessStripeMap(asm_hdr);
- return (asm_hdr);
-}
-/*****************************************************************************************
- * This routine walks through an ASM list and marks the PDAs that have failed.
- * It's called only when a disk failure causes an in-flight DAG to fail.
- * The parity may consist of two components, but we want to use only one failedPDA
- * pointer. Thus we set failedPDA to point to the first parity component, and rely
- * on the rest of the code to do the right thing with this.
- ****************************************************************************************/
-
-void
-rf_MarkFailuresInASMList(raidPtr, asm_h)
- RF_Raid_t *raidPtr;
- RF_AccessStripeMapHeader_t *asm_h;
-{
- RF_RaidDisk_t **disks = raidPtr->Disks;
- RF_AccessStripeMap_t *asmap;
- RF_PhysDiskAddr_t *pda;
-
- for (asmap = asm_h->stripeMap; asmap; asmap = asmap->next) {
- asmap->numDataFailed = asmap->numParityFailed = asmap->numQFailed = 0;
- asmap->numFailedPDAs = 0;
- bzero((char *) asmap->failedPDAs,
- RF_MAX_FAILED_PDA * sizeof(RF_PhysDiskAddr_t *));
- for (pda = asmap->physInfo; pda; pda = pda->next) {
- if (RF_DEAD_DISK(disks[pda->row][pda->col].status)) {
- asmap->numDataFailed++;
- asmap->failedPDAs[asmap->numFailedPDAs] = pda;
- asmap->numFailedPDAs++;
- }
- }
- pda = asmap->parityInfo;
- if (pda && RF_DEAD_DISK(disks[pda->row][pda->col].status)) {
- asmap->numParityFailed++;
- asmap->failedPDAs[asmap->numFailedPDAs] = pda;
- asmap->numFailedPDAs++;
- }
- pda = asmap->qInfo;
- if (pda && RF_DEAD_DISK(disks[pda->row][pda->col].status)) {
- asmap->numQFailed++;
- asmap->failedPDAs[asmap->numFailedPDAs] = pda;
- asmap->numFailedPDAs++;
- }
- }
-}
-/*****************************************************************************************
- *
- * DuplicateASM -- duplicates an ASM and returns the new one
- *
- ****************************************************************************************/
-RF_AccessStripeMap_t *
-rf_DuplicateASM(asmap)
- RF_AccessStripeMap_t *asmap;
-{
- RF_AccessStripeMap_t *new_asm;
- RF_PhysDiskAddr_t *pda, *new_pda, *t_pda;
-
- new_pda = NULL;
- new_asm = rf_AllocAccessStripeMapComponent();
- bcopy((char *) asmap, (char *) new_asm, sizeof(RF_AccessStripeMap_t));
- new_asm->numFailedPDAs = 0; /* ??? */
- new_asm->failedPDAs[0] = NULL;
- new_asm->physInfo = NULL;
- new_asm->parityInfo = NULL;
- new_asm->next = NULL;
-
- for (pda = asmap->physInfo; pda; pda = pda->next) { /* copy the physInfo
- * list */
- t_pda = rf_AllocPhysDiskAddr();
- bcopy((char *) pda, (char *) t_pda, sizeof(RF_PhysDiskAddr_t));
- t_pda->next = NULL;
- if (!new_asm->physInfo) {
- new_asm->physInfo = t_pda;
- new_pda = t_pda;
- } else {
- new_pda->next = t_pda;
- new_pda = new_pda->next;
- }
- if (pda == asmap->failedPDAs[0])
- new_asm->failedPDAs[0] = t_pda;
- }
- for (pda = asmap->parityInfo; pda; pda = pda->next) { /* copy the parityInfo
- * list */
- t_pda = rf_AllocPhysDiskAddr();
- bcopy((char *) pda, (char *) t_pda, sizeof(RF_PhysDiskAddr_t));
- t_pda->next = NULL;
- if (!new_asm->parityInfo) {
- new_asm->parityInfo = t_pda;
- new_pda = t_pda;
- } else {
- new_pda->next = t_pda;
- new_pda = new_pda->next;
- }
- if (pda == asmap->failedPDAs[0])
- new_asm->failedPDAs[0] = t_pda;
- }
- return (new_asm);
-}
-/*****************************************************************************************
- *
- * DuplicatePDA -- duplicates a PDA and returns the new one
- *
- ****************************************************************************************/
-RF_PhysDiskAddr_t *
-rf_DuplicatePDA(pda)
- RF_PhysDiskAddr_t *pda;
-{
- RF_PhysDiskAddr_t *new;
-
- new = rf_AllocPhysDiskAddr();
- bcopy((char *) pda, (char *) new, sizeof(RF_PhysDiskAddr_t));
- return (new);
-}
-/*****************************************************************************************
- *
- * routines to allocate and free list elements. All allocation routines zero the
- * structure before returning it.
- *
- * FreePhysDiskAddr is static. It should never be called directly, because
- * FreeAccessStripeMap takes care of freeing the PhysDiskAddr list.
- *
- ****************************************************************************************/
-
-static RF_FreeList_t *rf_asmhdr_freelist;
-#define RF_MAX_FREE_ASMHDR 128
-#define RF_ASMHDR_INC 16
-#define RF_ASMHDR_INITIAL 32
-
-static RF_FreeList_t *rf_asm_freelist;
-#define RF_MAX_FREE_ASM 192
-#define RF_ASM_INC 24
-#define RF_ASM_INITIAL 64
-
-static RF_FreeList_t *rf_pda_freelist;
-#define RF_MAX_FREE_PDA 192
-#define RF_PDA_INC 24
-#define RF_PDA_INITIAL 64
-
-/* called at shutdown time. So far, all that is necessary is to release all the free lists */
-static void rf_ShutdownMapModule(void *);
-static void
-rf_ShutdownMapModule(ignored)
- void *ignored;
-{
- RF_FREELIST_DESTROY(rf_asmhdr_freelist, next, (RF_AccessStripeMapHeader_t *));
- RF_FREELIST_DESTROY(rf_pda_freelist, next, (RF_PhysDiskAddr_t *));
- RF_FREELIST_DESTROY(rf_asm_freelist, next, (RF_AccessStripeMap_t *));
-}
-
-int
-rf_ConfigureMapModule(listp)
- RF_ShutdownList_t **listp;
-{
- int rc;
-
- RF_FREELIST_CREATE(rf_asmhdr_freelist, RF_MAX_FREE_ASMHDR,
- RF_ASMHDR_INC, sizeof(RF_AccessStripeMapHeader_t));
- if (rf_asmhdr_freelist == NULL) {
- return (ENOMEM);
- }
- RF_FREELIST_CREATE(rf_asm_freelist, RF_MAX_FREE_ASM,
- RF_ASM_INC, sizeof(RF_AccessStripeMap_t));
- if (rf_asm_freelist == NULL) {
- RF_FREELIST_DESTROY(rf_asmhdr_freelist, next, (RF_AccessStripeMapHeader_t *));
- return (ENOMEM);
- }
- RF_FREELIST_CREATE(rf_pda_freelist, RF_MAX_FREE_PDA,
- RF_PDA_INC, sizeof(RF_PhysDiskAddr_t));
- if (rf_pda_freelist == NULL) {
- RF_FREELIST_DESTROY(rf_asmhdr_freelist, next, (RF_AccessStripeMapHeader_t *));
- RF_FREELIST_DESTROY(rf_pda_freelist, next, (RF_PhysDiskAddr_t *));
- return (ENOMEM);
- }
- rc = rf_ShutdownCreate(listp, rf_ShutdownMapModule, NULL);
- if (rc) {
- RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- rf_ShutdownMapModule(NULL);
- return (rc);
- }
- RF_FREELIST_PRIME(rf_asmhdr_freelist, RF_ASMHDR_INITIAL, next,
- (RF_AccessStripeMapHeader_t *));
- RF_FREELIST_PRIME(rf_asm_freelist, RF_ASM_INITIAL, next,
- (RF_AccessStripeMap_t *));
- RF_FREELIST_PRIME(rf_pda_freelist, RF_PDA_INITIAL, next,
- (RF_PhysDiskAddr_t *));
-
- return (0);
-}
-
-RF_AccessStripeMapHeader_t *
-rf_AllocAccessStripeMapHeader()
-{
- RF_AccessStripeMapHeader_t *p;
-
- RF_FREELIST_GET(rf_asmhdr_freelist, p, next, (RF_AccessStripeMapHeader_t *));
- bzero((char *) p, sizeof(RF_AccessStripeMapHeader_t));
-
- return (p);
-}
-
-
-void
-rf_FreeAccessStripeMapHeader(p)
- RF_AccessStripeMapHeader_t *p;
-{
- RF_FREELIST_FREE(rf_asmhdr_freelist, p, next);
-}
-
-RF_PhysDiskAddr_t *
-rf_AllocPhysDiskAddr()
-{
- RF_PhysDiskAddr_t *p;
-
- RF_FREELIST_GET(rf_pda_freelist, p, next, (RF_PhysDiskAddr_t *));
- bzero((char *) p, sizeof(RF_PhysDiskAddr_t));
-
- return (p);
-}
-/* allocates a list of PDAs, locking the free list only once
- * when we have to call calloc, we do it one component at a time to simplify
- * the process of freeing the list at program shutdown. This should not be
- * much of a performance hit, because it should be very infrequently executed.
- */
-RF_PhysDiskAddr_t *
-rf_AllocPDAList(count)
- int count;
-{
- RF_PhysDiskAddr_t *p = NULL;
-
- RF_FREELIST_GET_N(rf_pda_freelist, p, next, (RF_PhysDiskAddr_t *), count);
- return (p);
-}
-
-void
-rf_FreePhysDiskAddr(p)
- RF_PhysDiskAddr_t *p;
-{
- RF_FREELIST_FREE(rf_pda_freelist, p, next);
-}
-
-static void
-rf_FreePDAList(l_start, l_end, count)
- RF_PhysDiskAddr_t *l_start, *l_end; /* pointers to start and end
- * of list */
- int count; /* number of elements in list */
-{
- RF_FREELIST_FREE_N(rf_pda_freelist, l_start, next, (RF_PhysDiskAddr_t *), count);
-}
-
-RF_AccessStripeMap_t *
-rf_AllocAccessStripeMapComponent()
-{
- RF_AccessStripeMap_t *p;
-
- RF_FREELIST_GET(rf_asm_freelist, p, next, (RF_AccessStripeMap_t *));
- bzero((char *) p, sizeof(RF_AccessStripeMap_t));
-
- return (p);
-}
-/* this is essentially identical to AllocPDAList. I should combine the two.
- * when we have to call calloc, we do it one component at a time to simplify
- * the process of freeing the list at program shutdown. This should not be
- * much of a performance hit, because it should be very infrequently executed.
- */
-RF_AccessStripeMap_t *
-rf_AllocASMList(count)
- int count;
-{
- RF_AccessStripeMap_t *p = NULL;
-
- RF_FREELIST_GET_N(rf_asm_freelist, p, next, (RF_AccessStripeMap_t *), count);
- return (p);
-}
-
-void
-rf_FreeAccessStripeMapComponent(p)
- RF_AccessStripeMap_t *p;
-{
- RF_FREELIST_FREE(rf_asm_freelist, p, next);
-}
-
-static void
-rf_FreeASMList(l_start, l_end, count)
- RF_AccessStripeMap_t *l_start, *l_end;
- int count;
-{
- RF_FREELIST_FREE_N(rf_asm_freelist, l_start, next, (RF_AccessStripeMap_t *), count);
-}
-
-void
-rf_FreeAccessStripeMap(hdr)
- RF_AccessStripeMapHeader_t *hdr;
-{
- RF_AccessStripeMap_t *p, *pt = NULL;
- RF_PhysDiskAddr_t *pdp, *trailer, *pdaList = NULL, *pdaEnd = NULL;
- int count = 0, t, asm_count = 0;
-
- for (p = hdr->stripeMap; p; p = p->next) {
-
- /* link the 3 pda lists into the accumulating pda list */
-
- if (!pdaList)
- pdaList = p->qInfo;
- else
- pdaEnd->next = p->qInfo;
- for (trailer = NULL, pdp = p->qInfo; pdp;) {
- trailer = pdp;
- pdp = pdp->next;
- count++;
- }
- if (trailer)
- pdaEnd = trailer;
-
- if (!pdaList)
- pdaList = p->parityInfo;
- else
- pdaEnd->next = p->parityInfo;
- for (trailer = NULL, pdp = p->parityInfo; pdp;) {
- trailer = pdp;
- pdp = pdp->next;
- count++;
- }
- if (trailer)
- pdaEnd = trailer;
-
- if (!pdaList)
- pdaList = p->physInfo;
- else
- pdaEnd->next = p->physInfo;
- for (trailer = NULL, pdp = p->physInfo; pdp;) {
- trailer = pdp;
- pdp = pdp->next;
- count++;
- }
- if (trailer)
- pdaEnd = trailer;
-
- pt = p;
- asm_count++;
- }
-
- /* debug only */
- for (t = 0, pdp = pdaList; pdp; pdp = pdp->next)
- t++;
- RF_ASSERT(t == count);
-
- if (pdaList)
- rf_FreePDAList(pdaList, pdaEnd, count);
- rf_FreeASMList(hdr->stripeMap, pt, asm_count);
- rf_FreeAccessStripeMapHeader(hdr);
-}
-/* We can't use the large write optimization if there are any failures in the stripe.
- * In the declustered layout, there is no way to immediately determine what disks
- * constitute a stripe, so we actually have to hunt through the stripe looking for failures.
- * The reason we map the parity instead of just using asm->parityInfo->col is because
- * the latter may have been already redirected to a spare drive, which would
- * mess up the computation of the stripe offset.
- *
- * ASSUMES AT MOST ONE FAILURE IN THE STRIPE.
- */
-int
-rf_CheckStripeForFailures(raidPtr, asmap)
- RF_Raid_t *raidPtr;
- RF_AccessStripeMap_t *asmap;
-{
- RF_RowCol_t trow, tcol, prow, pcol, *diskids, row, i;
- RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
- RF_StripeCount_t stripeOffset;
- int numFailures;
- RF_RaidAddr_t sosAddr;
- RF_SectorNum_t diskOffset, poffset;
- RF_RowCol_t testrow;
-
- /* quick out in the fault-free case. */
- RF_LOCK_MUTEX(raidPtr->mutex);
- numFailures = raidPtr->numFailures;
- RF_UNLOCK_MUTEX(raidPtr->mutex);
- if (numFailures == 0)
- return (0);
-
- sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
- row = asmap->physInfo->row;
- (layoutPtr->map->IdentifyStripe) (raidPtr, asmap->raidAddress, &diskids, &testrow);
- (layoutPtr->map->MapParity) (raidPtr, asmap->raidAddress, &prow, &pcol, &poffset, 0); /* get pcol */
-
- /* this need not be true if we've redirected the access to a spare in
- * another row RF_ASSERT(row == testrow); */
- stripeOffset = 0;
- for (i = 0; i < layoutPtr->numDataCol + layoutPtr->numParityCol; i++) {
- if (diskids[i] != pcol) {
- if (RF_DEAD_DISK(raidPtr->Disks[testrow][diskids[i]].status)) {
- if (raidPtr->status[testrow] != rf_rs_reconstructing)
- return (1);
- RF_ASSERT(raidPtr->reconControl[testrow]->fcol == diskids[i]);
- layoutPtr->map->MapSector(raidPtr,
- sosAddr + stripeOffset * layoutPtr->sectorsPerStripeUnit,
- &trow, &tcol, &diskOffset, 0);
- RF_ASSERT((trow == testrow) && (tcol == diskids[i]));
- if (!rf_CheckRUReconstructed(raidPtr->reconControl[testrow]->reconMap, diskOffset))
- return (1);
- asmap->flags |= RF_ASM_REDIR_LARGE_WRITE;
- return (0);
- }
- stripeOffset++;
- }
- }
- return (0);
-}
-/*
- return the number of failed data units in the stripe.
-*/
-
-int
-rf_NumFailedDataUnitsInStripe(raidPtr, asmap)
- RF_Raid_t *raidPtr;
- RF_AccessStripeMap_t *asmap;
-{
- RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
- RF_RowCol_t trow, tcol, row, i;
- RF_SectorNum_t diskOffset;
- RF_RaidAddr_t sosAddr;
- int numFailures;
-
- /* quick out in the fault-free case. */
- RF_LOCK_MUTEX(raidPtr->mutex);
- numFailures = raidPtr->numFailures;
- RF_UNLOCK_MUTEX(raidPtr->mutex);
- if (numFailures == 0)
- return (0);
- numFailures = 0;
-
- sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
- row = asmap->physInfo->row;
- for (i = 0; i < layoutPtr->numDataCol; i++) {
- (layoutPtr->map->MapSector) (raidPtr, sosAddr + i * layoutPtr->sectorsPerStripeUnit,
- &trow, &tcol, &diskOffset, 0);
- if (RF_DEAD_DISK(raidPtr->Disks[trow][tcol].status))
- numFailures++;
- }
-
- return numFailures;
-}
-
-
-/*****************************************************************************************
- *
- * debug routines
- *
- ****************************************************************************************/
-
-void
-rf_PrintAccessStripeMap(asm_h)
- RF_AccessStripeMapHeader_t *asm_h;
-{
- rf_PrintFullAccessStripeMap(asm_h, 0);
-}
-
-void
-rf_PrintFullAccessStripeMap(asm_h, prbuf)
- RF_AccessStripeMapHeader_t *asm_h;
- int prbuf; /* flag to print buffer pointers */
-{
- int i;
- RF_AccessStripeMap_t *asmap = asm_h->stripeMap;
- RF_PhysDiskAddr_t *p;
- printf("%d stripes total\n", (int) asm_h->numStripes);
- for (; asmap; asmap = asmap->next) {
- /* printf("Num failures: %d\n",asmap->numDataFailed); */
- /* printf("Num sectors:
- * %d\n",(int)asmap->totalSectorsAccessed); */
- printf("Stripe %d (%d sectors), failures: %d data, %d parity: ",
- (int) asmap->stripeID,
- (int) asmap->totalSectorsAccessed,
- (int) asmap->numDataFailed,
- (int) asmap->numParityFailed);
- if (asmap->parityInfo) {
- printf("Parity [r%d c%d s%d-%d", asmap->parityInfo->row, asmap->parityInfo->col,
- (int) asmap->parityInfo->startSector,
- (int) (asmap->parityInfo->startSector +
- asmap->parityInfo->numSector - 1));
- if (prbuf)
- printf(" b0x%lx", (unsigned long) asmap->parityInfo->bufPtr);
- if (asmap->parityInfo->next) {
- printf(", r%d c%d s%d-%d", asmap->parityInfo->next->row,
- asmap->parityInfo->next->col,
- (int) asmap->parityInfo->next->startSector,
- (int) (asmap->parityInfo->next->startSector +
- asmap->parityInfo->next->numSector - 1));
- if (prbuf)
- printf(" b0x%lx", (unsigned long) asmap->parityInfo->next->bufPtr);
- RF_ASSERT(asmap->parityInfo->next->next == NULL);
- }
- printf("]\n\t");
- }
- for (i = 0, p = asmap->physInfo; p; p = p->next, i++) {
- printf("SU r%d c%d s%d-%d ", p->row, p->col, (int) p->startSector,
- (int) (p->startSector + p->numSector - 1));
- if (prbuf)
- printf("b0x%lx ", (unsigned long) p->bufPtr);
- if (i && !(i & 1))
- printf("\n\t");
- }
- printf("\n");
- p = asm_h->stripeMap->failedPDAs[0];
- if (asm_h->stripeMap->numDataFailed + asm_h->stripeMap->numParityFailed > 1)
- printf("[multiple failures]\n");
- else
- if (asm_h->stripeMap->numDataFailed + asm_h->stripeMap->numParityFailed > 0)
- printf("\t[Failed PDA: r%d c%d s%d-%d]\n", p->row, p->col,
- (int) p->startSector, (int) (p->startSector + p->numSector - 1));
- }
-}
-
-void
-rf_PrintRaidAddressInfo(raidPtr, raidAddr, numBlocks)
- RF_Raid_t *raidPtr;
- RF_RaidAddr_t raidAddr;
- RF_SectorCount_t numBlocks;
-{
- RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
- RF_RaidAddr_t ra, sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr);
-
- printf("Raid addrs of SU boundaries from start of stripe to end of access:\n\t");
- for (ra = sosAddr; ra <= raidAddr + numBlocks; ra += layoutPtr->sectorsPerStripeUnit) {
- printf("%d (0x%x), ", (int) ra, (int) ra);
- }
- printf("\n");
- printf("Offset into stripe unit: %d (0x%x)\n",
- (int) (raidAddr % layoutPtr->sectorsPerStripeUnit),
- (int) (raidAddr % layoutPtr->sectorsPerStripeUnit));
-}
-/*
- given a parity descriptor and the starting address within a stripe,
- range restrict the parity descriptor to touch only the correct stuff.
-*/
-void
-rf_ASMParityAdjust(
- RF_PhysDiskAddr_t * toAdjust,
- RF_StripeNum_t startAddrWithinStripe,
- RF_SectorNum_t endAddress,
- RF_RaidLayout_t * layoutPtr,
- RF_AccessStripeMap_t * asm_p)
-{
- RF_PhysDiskAddr_t *new_pda;
-
- /* when we're accessing only a portion of one stripe unit, we want the
- * parity descriptor to identify only the chunk of parity associated
- * with the data. When the access spans exactly one stripe unit
- * boundary and is less than a stripe unit in size, it uses two
- * disjoint regions of the parity unit. When an access spans more
- * than one stripe unit boundary, it uses all of the parity unit.
- *
- * To better handle the case where stripe units are small, we may
- * eventually want to change the 2nd case so that if the SU size is
- * below some threshold, we just read/write the whole thing instead of
- * breaking it up into two accesses. */
- if (asm_p->numStripeUnitsAccessed == 1) {
- int x = (startAddrWithinStripe % layoutPtr->sectorsPerStripeUnit);
- toAdjust->startSector += x;
- toAdjust->raidAddress += x;
- toAdjust->numSector = asm_p->physInfo->numSector;
- RF_ASSERT(toAdjust->numSector != 0);
- } else
- if (asm_p->numStripeUnitsAccessed == 2 && asm_p->totalSectorsAccessed < layoutPtr->sectorsPerStripeUnit) {
- int x = (startAddrWithinStripe % layoutPtr->sectorsPerStripeUnit);
-
- /* create a second pda and copy the parity map info
- * into it */
- RF_ASSERT(toAdjust->next == NULL);
- new_pda = toAdjust->next = rf_AllocPhysDiskAddr();
- *new_pda = *toAdjust; /* structure assignment */
- new_pda->next = NULL;
-
- /* adjust the start sector & number of blocks for the
- * first parity pda */
- toAdjust->startSector += x;
- toAdjust->raidAddress += x;
- toAdjust->numSector = rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, startAddrWithinStripe) - startAddrWithinStripe;
- RF_ASSERT(toAdjust->numSector != 0);
-
- /* adjust the second pda */
- new_pda->numSector = endAddress - rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, endAddress);
- /* new_pda->raidAddress =
- * rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr,
- * toAdjust->raidAddress); */
- RF_ASSERT(new_pda->numSector != 0);
- }
-}
-/*
- Check if a disk has been spared or failed. If spared,
- redirect the I/O.
- If it has been failed, record it in the asm pointer.
- Fourth arg is whether data or parity.
-*/
-void
-rf_ASMCheckStatus(
- RF_Raid_t * raidPtr,
- RF_PhysDiskAddr_t * pda_p,
- RF_AccessStripeMap_t * asm_p,
- RF_RaidDisk_t ** disks,
- int parity)
-{
- RF_DiskStatus_t dstatus;
- RF_RowCol_t frow, fcol;
-
- dstatus = disks[pda_p->row][pda_p->col].status;
-
- if (dstatus == rf_ds_spared) {
- /* if the disk has been spared, redirect access to the spare */
- frow = pda_p->row;
- fcol = pda_p->col;
- pda_p->row = disks[frow][fcol].spareRow;
- pda_p->col = disks[frow][fcol].spareCol;
- } else
- if (dstatus == rf_ds_dist_spared) {
- /* ditto if disk has been spared to dist spare space */
- RF_RowCol_t or = pda_p->row, oc = pda_p->col;
- RF_SectorNum_t oo = pda_p->startSector;
-
- if (pda_p->type == RF_PDA_TYPE_DATA)
- raidPtr->Layout.map->MapSector(raidPtr, pda_p->raidAddress, &pda_p->row, &pda_p->col, &pda_p->startSector, RF_REMAP);
- else
- raidPtr->Layout.map->MapParity(raidPtr, pda_p->raidAddress, &pda_p->row, &pda_p->col, &pda_p->startSector, RF_REMAP);
-
- if (rf_mapDebug) {
- printf("Redirected r %d c %d o %d -> r%d c %d o %d\n", or, oc, (int) oo,
- pda_p->row, pda_p->col, (int) pda_p->startSector);
- }
- } else
- if (RF_DEAD_DISK(dstatus)) {
- /* if the disk is inaccessible, mark the
- * failure */
- if (parity)
- asm_p->numParityFailed++;
- else {
- asm_p->numDataFailed++;
-#if 0
- /* XXX Do we really want this spewing
- * out on the console? GO */
- printf("DATA_FAILED!\n");
-#endif
- }
- asm_p->failedPDAs[asm_p->numFailedPDAs] = pda_p;
- asm_p->numFailedPDAs++;
-#if 0
- switch (asm_p->numParityFailed + asm_p->numDataFailed) {
- case 1:
- asm_p->failedPDAs[0] = pda_p;
- break;
- case 2:
- asm_p->failedPDAs[1] = pda_p;
- default:
- break;
- }
-#endif
- }
- /* the redirected access should never span a stripe unit boundary */
- RF_ASSERT(rf_RaidAddressToStripeUnitID(&raidPtr->Layout, pda_p->raidAddress) ==
- rf_RaidAddressToStripeUnitID(&raidPtr->Layout, pda_p->raidAddress + pda_p->numSector - 1));
- RF_ASSERT(pda_p->col != -1);
-}
diff --git a/sys/dev/raidframe/rf_map.h b/sys/dev/raidframe/rf_map.h
deleted file mode 100644
index d7c6d19..0000000
--- a/sys/dev/raidframe/rf_map.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_map.h,v 1.3 1999/02/05 00:06:12 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/* rf_map.h */
-
-#ifndef _RF__RF_MAP_H_
-#define _RF__RF_MAP_H_
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_alloclist.h>
-#include <dev/raidframe/rf_raid.h>
-
-/* mapping structure allocation and free routines */
-RF_AccessStripeMapHeader_t *
-rf_MapAccess(RF_Raid_t * raidPtr,
- RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks,
- caddr_t buffer, int remap);
-
-void
-rf_MarkFailuresInASMList(RF_Raid_t * raidPtr,
- RF_AccessStripeMapHeader_t * asm_h);
-
-RF_AccessStripeMap_t *rf_DuplicateASM(RF_AccessStripeMap_t * asmap);
-
-RF_PhysDiskAddr_t *rf_DuplicatePDA(RF_PhysDiskAddr_t * pda);
-
-int rf_ConfigureMapModule(RF_ShutdownList_t ** listp);
-
-RF_AccessStripeMapHeader_t *rf_AllocAccessStripeMapHeader(void);
-
-void rf_FreeAccessStripeMapHeader(RF_AccessStripeMapHeader_t * p);
-
-RF_PhysDiskAddr_t *rf_AllocPhysDiskAddr(void);
-
-RF_PhysDiskAddr_t *rf_AllocPDAList(int count);
-
-void rf_FreePhysDiskAddr(RF_PhysDiskAddr_t * p);
-
-RF_AccessStripeMap_t *rf_AllocAccessStripeMapComponent(void);
-
-RF_AccessStripeMap_t *rf_AllocASMList(int count);
-
-void rf_FreeAccessStripeMapComponent(RF_AccessStripeMap_t * p);
-
-void rf_FreeAccessStripeMap(RF_AccessStripeMapHeader_t * hdr);
-
-int rf_CheckStripeForFailures(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap);
-
-int rf_NumFailedDataUnitsInStripe(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap);
-
-void rf_PrintAccessStripeMap(RF_AccessStripeMapHeader_t * asm_h);
-
-void rf_PrintFullAccessStripeMap(RF_AccessStripeMapHeader_t * asm_h, int prbuf);
-
-void
-rf_PrintRaidAddressInfo(RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr,
- RF_SectorCount_t numBlocks);
-
-void
-rf_ASMParityAdjust(RF_PhysDiskAddr_t * toAdjust,
- RF_StripeNum_t startAddrWithinStripe, RF_SectorNum_t endAddress,
- RF_RaidLayout_t * layoutPtr, RF_AccessStripeMap_t * asm_p);
-
-void
-rf_ASMCheckStatus(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * pda_p,
- RF_AccessStripeMap_t * asm_p, RF_RaidDisk_t ** disks, int parity);
-
-#endif /* !_RF__RF_MAP_H_ */
diff --git a/sys/dev/raidframe/rf_mcpair.c b/sys/dev/raidframe/rf_mcpair.c
deleted file mode 100644
index 7b327ac..0000000
--- a/sys/dev/raidframe/rf_mcpair.c
+++ /dev/null
@@ -1,143 +0,0 @@
-/* $NetBSD: rf_mcpair.c,v 1.4 2000/09/11 02:23:14 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Jim Zelenka
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/* rf_mcpair.c
- * an mcpair is a structure containing a mutex and a condition variable.
- * it's used to block the current thread until some event occurs.
- */
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_threadstuff.h>
-#include <dev/raidframe/rf_mcpair.h>
-#include <dev/raidframe/rf_debugMem.h>
-#include <dev/raidframe/rf_freelist.h>
-#include <dev/raidframe/rf_shutdown.h>
-
-#include <sys/proc.h>
-
-static RF_FreeList_t *rf_mcpair_freelist;
-
-#define RF_MAX_FREE_MCPAIR 128
-#define RF_MCPAIR_INC 16
-#define RF_MCPAIR_INITIAL 24
-
-static int init_mcpair(RF_MCPair_t *);
-static void clean_mcpair(RF_MCPair_t *);
-static void rf_ShutdownMCPair(void *);
-
-
-
-static int
-init_mcpair(t)
- RF_MCPair_t *t;
-{
- int rc;
-
- rc = rf_mutex_init(&t->mutex, __FUNCTION__);
- if (rc) {
- RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- return (rc);
- }
- rc = rf_cond_init(&t->cond);
- if (rc) {
- RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- rf_mutex_destroy(&t->mutex);
- return (rc);
- }
- return (0);
-}
-
-static void
-clean_mcpair(t)
- RF_MCPair_t *t;
-{
- rf_mutex_destroy(&t->mutex);
- rf_cond_destroy(&t->cond);
-}
-
-static void
-rf_ShutdownMCPair(ignored)
- void *ignored;
-{
- RF_FREELIST_DESTROY_CLEAN(rf_mcpair_freelist, next, (RF_MCPair_t *), clean_mcpair);
-}
-
-int
-rf_ConfigureMCPair(listp)
- RF_ShutdownList_t **listp;
-{
- int rc;
-
- RF_FREELIST_CREATE(rf_mcpair_freelist, RF_MAX_FREE_MCPAIR,
- RF_MCPAIR_INC, sizeof(RF_MCPair_t));
- rc = rf_ShutdownCreate(listp, rf_ShutdownMCPair, NULL);
- if (rc) {
- RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n",
- __FILE__, __LINE__, rc);
- rf_ShutdownMCPair(NULL);
- return (rc);
- }
- RF_FREELIST_PRIME_INIT(rf_mcpair_freelist, RF_MCPAIR_INITIAL, next,
- (RF_MCPair_t *), init_mcpair);
- return (0);
-}
-
-RF_MCPair_t *
-rf_AllocMCPair()
-{
- RF_MCPair_t *t;
-
- RF_FREELIST_GET_INIT(rf_mcpair_freelist, t, next, (RF_MCPair_t *), init_mcpair);
- if (t) {
- t->flag = 0;
- t->next = NULL;
- }
- return (t);
-}
-
-void
-rf_FreeMCPair(t)
- RF_MCPair_t *t;
-{
- RF_FREELIST_FREE_CLEAN(rf_mcpair_freelist, t, next, clean_mcpair);
-}
-/* the callback function used to wake you up when you use an mcpair to wait for something */
-void
-rf_MCPairWakeupFunc(mcpair)
- RF_MCPair_t *mcpair;
-{
- RF_LOCK_MUTEX(mcpair->mutex);
- mcpair->flag = 1;
- wakeup(&(mcpair->cond));
- RF_UNLOCK_MUTEX(mcpair->mutex);
-}
diff --git a/sys/dev/raidframe/rf_mcpair.h b/sys/dev/raidframe/rf_mcpair.h
deleted file mode 100644
index d43c728..0000000
--- a/sys/dev/raidframe/rf_mcpair.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_mcpair.h,v 1.6 2000/09/21 01:45:46 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/* rf_mcpair.h
- * see comments in rf_mcpair.c
- */
-
-#ifndef _RF__RF_MCPAIR_H_
-#define _RF__RF_MCPAIR_H_
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_threadstuff.h>
-
-struct RF_MCPair_s {
- RF_DECLARE_MUTEX(mutex)
- RF_DECLARE_COND(cond)
- int flag;
- RF_MCPair_t *next;
-};
-#define RF_WAIT_MCPAIR(_mcp) \
- RF_LTSLEEP(&((_mcp)->cond), PRIBIO, "mcpair", 0, &((_mcp)->mutex))
-
-int rf_ConfigureMCPair(RF_ShutdownList_t ** listp);
-RF_MCPair_t *rf_AllocMCPair(void);
-void rf_FreeMCPair(RF_MCPair_t * t);
-void rf_MCPairWakeupFunc(RF_MCPair_t * t);
-
-#endif /* !_RF__RF_MCPAIR_H_ */
diff --git a/sys/dev/raidframe/rf_memchunk.c b/sys/dev/raidframe/rf_memchunk.c
deleted file mode 100644
index b6e8bd9..0000000
--- a/sys/dev/raidframe/rf_memchunk.c
+++ /dev/null
@@ -1,213 +0,0 @@
-/* $NetBSD: rf_memchunk.c,v 1.4 1999/08/13 03:41:56 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*********************************************************************************
- * rf_memchunk.c
- *
- * experimental code. I've found that the malloc and free calls in the DAG
- * creation code are very expensive. Since for any given workload the DAGs
- * created for different accesses are likely to be similar to each other, the
- * amount of memory used for any given DAG data structure is likely to be one
- * of a small number of values. For example, in UNIX, all reads and writes will
- * be less than 8k and will not span stripe unit boundaries. Thus in the absence
- * of failure, the only DAGs that will ever get created are single-node reads
- * and single-stripe-unit atomic read-modify-writes. So, I'm very likely to
- * be continually asking for chunks of memory equal to the sizes of these two
- * DAGs.
- *
- * This leads to the idea of holding on to these chunks of memory when the DAG is
- * freed and then, when a new DAG is created, trying to find such a chunk before
- * calling malloc.
- *
- * the "chunk list" is a list of lists. Each header node contains a size value
- * and a pointer to a list of chunk descriptors, each of which holds a pointer
- * to a chunk of memory of the indicated size.
- *
- * There is currently no way to purge memory out of the chunk list. My
- * initial thought on this is to have a low-priority thread that wakes up every
- * 1 or 2 seconds, purges all the chunks with low reuse counts, and sets all
- * the reuse counts to zero.
- *
- * This whole idea may be bad, since malloc may be able to do this more efficiently.
- * It's worth a try, though, and it can be turned off by setting useMemChunks to 0.
- *
- ********************************************************************************/
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_threadstuff.h>
-#include <dev/raidframe/rf_debugMem.h>
-#include <dev/raidframe/rf_memchunk.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_options.h>
-#include <dev/raidframe/rf_shutdown.h>
-
-typedef struct RF_ChunkHdr_s RF_ChunkHdr_t;
-struct RF_ChunkHdr_s {
- int size;
- RF_ChunkDesc_t *list;
- RF_ChunkHdr_t *next;
-};
-
-static RF_ChunkHdr_t *chunklist, *chunk_hdr_free_list;
-static RF_ChunkDesc_t *chunk_desc_free_list;
-RF_DECLARE_STATIC_MUTEX(chunkmutex)
- static void rf_ShutdownMemChunk(void *);
- static RF_ChunkDesc_t *NewMemChunk(int, char *);
-
-
- static void rf_ShutdownMemChunk(ignored)
- void *ignored;
-{
- RF_ChunkDesc_t *pt, *p;
- RF_ChunkHdr_t *hdr, *ht;
-
- if (rf_memChunkDebug)
- printf("Chunklist:\n");
- for (hdr = chunklist; hdr;) {
- for (p = hdr->list; p;) {
- if (rf_memChunkDebug)
- printf("Size %d reuse count %d\n", p->size, p->reuse_count);
- pt = p;
- p = p->next;
- RF_Free(pt->buf, pt->size);
- RF_Free(pt, sizeof(*pt));
- }
- ht = hdr;
- hdr = hdr->next;
- RF_Free(ht, sizeof(*ht));
- }
-
- rf_mutex_destroy(&chunkmutex);
-}
-
-int
-rf_ConfigureMemChunk(listp)
- RF_ShutdownList_t **listp;
-{
- int rc;
-
- chunklist = NULL;
- chunk_hdr_free_list = NULL;
- chunk_desc_free_list = NULL;
- rc = rf_mutex_init(&chunkmutex, __FUNCTION__);
- if (rc) {
- RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- }
- rc = rf_ShutdownCreate(listp, rf_ShutdownMemChunk, NULL);
- if (rc) {
- RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- rf_mutex_destroy(&chunkmutex);
- }
- return (rc);
-}
-/* called to get a chunk descriptor for a newly-allocated chunk of memory
- * MUTEX MUST BE LOCKED
- *
- * free list is not currently used
- */
-static RF_ChunkDesc_t *
-NewMemChunk(size, buf)
- int size;
- char *buf;
-{
- RF_ChunkDesc_t *p;
-
- if (chunk_desc_free_list) {
- p = chunk_desc_free_list;
- chunk_desc_free_list = p->next;
- } else
- RF_Malloc(p, sizeof(RF_ChunkDesc_t), (RF_ChunkDesc_t *));
- p->size = size;
- p->buf = buf;
- p->next = NULL;
- p->reuse_count = 0;
- return (p);
-}
-/* looks for a chunk of memory of acceptable size. If none, allocates one and returns
- * a chunk descriptor for it, but does not install anything in the list. This is done
- * when the chunk is released.
- */
-RF_ChunkDesc_t *
-rf_GetMemChunk(size)
- int size;
-{
- RF_ChunkHdr_t *hdr = chunklist;
- RF_ChunkDesc_t *p = NULL;
- char *buf;
-
- RF_LOCK_MUTEX(chunkmutex);
- for (hdr = chunklist; hdr; hdr = hdr->next)
- if (hdr->size >= size) {
- p = hdr->list;
- if (p) {
- hdr->list = p->next;
- p->next = NULL;
- p->reuse_count++;
- }
- break;
- }
- if (!p) {
- RF_Malloc(buf, size, (char *));
- p = NewMemChunk(size, buf);
- }
- RF_UNLOCK_MUTEX(chunkmutex);
- (void) bzero(p->buf, size);
- return (p);
-}
-
-void
-rf_ReleaseMemChunk(chunk)
- RF_ChunkDesc_t *chunk;
-{
- RF_ChunkHdr_t *hdr, *ht = NULL, *new;
-
- RF_LOCK_MUTEX(chunkmutex);
- for (hdr = chunklist; hdr && hdr->size < chunk->size; ht = hdr, hdr = hdr->next);
- if (hdr && hdr->size == chunk->size) {
- chunk->next = hdr->list;
- hdr->list = chunk;
- } else {
- RF_Malloc(new, sizeof(RF_ChunkHdr_t), (RF_ChunkHdr_t *));
- new->size = chunk->size;
- new->list = chunk;
- chunk->next = NULL;
- if (ht) {
- new->next = ht->next;
- ht->next = new;
- } else {
- new->next = hdr;
- chunklist = new;
- }
- }
- RF_UNLOCK_MUTEX(chunkmutex);
-}
diff --git a/sys/dev/raidframe/rf_memchunk.h b/sys/dev/raidframe/rf_memchunk.h
deleted file mode 100644
index 5806d20..0000000
--- a/sys/dev/raidframe/rf_memchunk.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_memchunk.h,v 1.3 1999/02/05 00:06:13 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/* header file for rf_memchunk.c. See comments there */
-
-#ifndef _RF__RF_MEMCHUNK_H_
-#define _RF__RF_MEMCHUNK_H_
-
-#include <dev/raidframe/rf_types.h>
-
-struct RF_ChunkDesc_s {
- int size;
- int reuse_count;
- char *buf;
- RF_ChunkDesc_t *next;
-};
-
-int rf_ConfigureMemChunk(RF_ShutdownList_t ** listp);
-RF_ChunkDesc_t *rf_GetMemChunk(int size);
-void rf_ReleaseMemChunk(RF_ChunkDesc_t * chunk);
-
-#endif /* !_RF__RF_MEMCHUNK_H_ */
diff --git a/sys/dev/raidframe/rf_nwayxor.c b/sys/dev/raidframe/rf_nwayxor.c
deleted file mode 100644
index 170db6a..0000000
--- a/sys/dev/raidframe/rf_nwayxor.c
+++ /dev/null
@@ -1,451 +0,0 @@
-/* $NetBSD: rf_nwayxor.c,v 1.4 2000/03/30 12:45:41 augustss Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland, Daniel Stodolsky
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/************************************************************
- *
- * nwayxor.c -- code to do N-way xors for reconstruction
- *
- * nWayXorN xors N input buffers into the destination buffer.
- * adapted from danner's longword_bxor code.
- *
- ************************************************************/
-
-#include <dev/raidframe/rf_nwayxor.h>
-#include <dev/raidframe/rf_shutdown.h>
-
-static int callcount[10];
-static void rf_ShutdownNWayXor(void *);
-
-static void
-rf_ShutdownNWayXor(ignored)
- void *ignored;
-{
- int i;
-
- if (rf_showXorCallCounts == 0)
- return;
- printf("Call counts for n-way xor routines: ");
- for (i = 0; i < 10; i++)
- printf("%d ", callcount[i]);
- printf("\n");
-}
-
-int
-rf_ConfigureNWayXor(listp)
- RF_ShutdownList_t **listp;
-{
- int i, rc;
-
- for (i = 0; i < 10; i++)
- callcount[i] = 0;
- rc = rf_ShutdownCreate(listp, rf_ShutdownNWayXor, NULL);
- return (rc);
-}
-
-void
-rf_nWayXor1(src_rbs, dest_rb, len)
- RF_ReconBuffer_t **src_rbs;
- RF_ReconBuffer_t *dest_rb;
- int len;
-{
- unsigned long *src = (unsigned long *) src_rbs[0]->buffer;
- unsigned long *dest = (unsigned long *) dest_rb->buffer;
- unsigned long *end = src + len;
- unsigned long d0, d1, d2, d3, s0, s1, s2, s3;
-
- callcount[1]++;
- while (len >= 4) {
- d0 = dest[0];
- d1 = dest[1];
- d2 = dest[2];
- d3 = dest[3];
- s0 = src[0];
- s1 = src[1];
- s2 = src[2];
- s3 = src[3];
- dest[0] = d0 ^ s0;
- dest[1] = d1 ^ s1;
- dest[2] = d2 ^ s2;
- dest[3] = d3 ^ s3;
- src += 4;
- dest += 4;
- len -= 4;
- }
- while (src < end) {
- *dest++ ^= *src++;
- }
-}
-
-void
-rf_nWayXor2(src_rbs, dest_rb, len)
- RF_ReconBuffer_t **src_rbs;
- RF_ReconBuffer_t *dest_rb;
- int len;
-{
- unsigned long *dst = (unsigned long *) dest_rb->buffer;
- unsigned long *a = dst;
- unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
- unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
- unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
-
- callcount[2]++;
- /* align dest to cache line */
- while ((((unsigned long) dst) & 0x1f)) {
- *dst++ = *a++ ^ *b++ ^ *c++;
- len--;
- }
- while (len > 4) {
- a0 = a[0];
- len -= 4;
-
- a1 = a[1];
- a2 = a[2];
-
- a3 = a[3];
- a += 4;
-
- b0 = b[0];
- b1 = b[1];
-
- b2 = b[2];
- b3 = b[3];
- /* start dual issue */
- a0 ^= b0;
- b0 = c[0];
-
- b += 4;
- a1 ^= b1;
-
- a2 ^= b2;
- a3 ^= b3;
-
- b1 = c[1];
- a0 ^= b0;
-
- b2 = c[2];
- a1 ^= b1;
-
- b3 = c[3];
- a2 ^= b2;
-
- dst[0] = a0;
- a3 ^= b3;
- dst[1] = a1;
- c += 4;
- dst[2] = a2;
- dst[3] = a3;
- dst += 4;
- }
- while (len) {
- *dst++ = *a++ ^ *b++ ^ *c++;
- len--;
- }
-}
-/* note that first arg is not incremented but 2nd arg is */
-#define LOAD_FIRST(_dst,_b) \
- a0 = _dst[0]; len -= 4; \
- a1 = _dst[1]; \
- a2 = _dst[2]; \
- a3 = _dst[3]; \
- b0 = _b[0]; \
- b1 = _b[1]; \
- b2 = _b[2]; \
- b3 = _b[3]; _b += 4;
-
-/* note: arg is incremented */
-#define XOR_AND_LOAD_NEXT(_n) \
- a0 ^= b0; b0 = _n[0]; \
- a1 ^= b1; b1 = _n[1]; \
- a2 ^= b2; b2 = _n[2]; \
- a3 ^= b3; b3 = _n[3]; \
- _n += 4;
-
-/* arg is incremented */
-#define XOR_AND_STORE(_dst) \
- a0 ^= b0; _dst[0] = a0; \
- a1 ^= b1; _dst[1] = a1; \
- a2 ^= b2; _dst[2] = a2; \
- a3 ^= b3; _dst[3] = a3; \
- _dst += 4;
-
-
-void
-rf_nWayXor3(src_rbs, dest_rb, len)
- RF_ReconBuffer_t **src_rbs;
- RF_ReconBuffer_t *dest_rb;
- int len;
-{
- unsigned long *dst = (unsigned long *) dest_rb->buffer;
- unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
- unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
- unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
- unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
-
- callcount[3]++;
- /* align dest to cache line */
- while ((((unsigned long) dst) & 0x1f)) {
- *dst++ ^= *b++ ^ *c++ ^ *d++;
- len--;
- }
- while (len > 4) {
- LOAD_FIRST(dst, b);
- XOR_AND_LOAD_NEXT(c);
- XOR_AND_LOAD_NEXT(d);
- XOR_AND_STORE(dst);
- }
- while (len) {
- *dst++ ^= *b++ ^ *c++ ^ *d++;
- len--;
- }
-}
-
-void
-rf_nWayXor4(src_rbs, dest_rb, len)
- RF_ReconBuffer_t **src_rbs;
- RF_ReconBuffer_t *dest_rb;
- int len;
-{
- unsigned long *dst = (unsigned long *) dest_rb->buffer;
- unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
- unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
- unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
- unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
- unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
-
- callcount[4]++;
- /* align dest to cache line */
- while ((((unsigned long) dst) & 0x1f)) {
- *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++;
- len--;
- }
- while (len > 4) {
- LOAD_FIRST(dst, b);
- XOR_AND_LOAD_NEXT(c);
- XOR_AND_LOAD_NEXT(d);
- XOR_AND_LOAD_NEXT(e);
- XOR_AND_STORE(dst);
- }
- while (len) {
- *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++;
- len--;
- }
-}
-
-void
-rf_nWayXor5(src_rbs, dest_rb, len)
- RF_ReconBuffer_t **src_rbs;
- RF_ReconBuffer_t *dest_rb;
- int len;
-{
- unsigned long *dst = (unsigned long *) dest_rb->buffer;
- unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
- unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
- unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
- unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
- unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
- unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
-
- callcount[5]++;
- /* align dest to cache line */
- while ((((unsigned long) dst) & 0x1f)) {
- *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++;
- len--;
- }
- while (len > 4) {
- LOAD_FIRST(dst, b);
- XOR_AND_LOAD_NEXT(c);
- XOR_AND_LOAD_NEXT(d);
- XOR_AND_LOAD_NEXT(e);
- XOR_AND_LOAD_NEXT(f);
- XOR_AND_STORE(dst);
- }
- while (len) {
- *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++;
- len--;
- }
-}
-
-void
-rf_nWayXor6(src_rbs, dest_rb, len)
- RF_ReconBuffer_t **src_rbs;
- RF_ReconBuffer_t *dest_rb;
- int len;
-{
- unsigned long *dst = (unsigned long *) dest_rb->buffer;
- unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
- unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
- unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
- unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
- unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
- unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
- unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
-
- callcount[6]++;
- /* align dest to cache line */
- while ((((unsigned long) dst) & 0x1f)) {
- *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++;
- len--;
- }
- while (len > 4) {
- LOAD_FIRST(dst, b);
- XOR_AND_LOAD_NEXT(c);
- XOR_AND_LOAD_NEXT(d);
- XOR_AND_LOAD_NEXT(e);
- XOR_AND_LOAD_NEXT(f);
- XOR_AND_LOAD_NEXT(g);
- XOR_AND_STORE(dst);
- }
- while (len) {
- *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++;
- len--;
- }
-}
-
-void
-rf_nWayXor7(src_rbs, dest_rb, len)
- RF_ReconBuffer_t **src_rbs;
- RF_ReconBuffer_t *dest_rb;
- int len;
-{
- unsigned long *dst = (unsigned long *) dest_rb->buffer;
- unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
- unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
- unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
- unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
- unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
- unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
- unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
- unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
-
- callcount[7]++;
- /* align dest to cache line */
- while ((((unsigned long) dst) & 0x1f)) {
- *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++;
- len--;
- }
- while (len > 4) {
- LOAD_FIRST(dst, b);
- XOR_AND_LOAD_NEXT(c);
- XOR_AND_LOAD_NEXT(d);
- XOR_AND_LOAD_NEXT(e);
- XOR_AND_LOAD_NEXT(f);
- XOR_AND_LOAD_NEXT(g);
- XOR_AND_LOAD_NEXT(h);
- XOR_AND_STORE(dst);
- }
- while (len) {
- *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++;
- len--;
- }
-}
-
-void
-rf_nWayXor8(src_rbs, dest_rb, len)
- RF_ReconBuffer_t **src_rbs;
- RF_ReconBuffer_t *dest_rb;
- int len;
-{
- unsigned long *dst = (unsigned long *) dest_rb->buffer;
- unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
- unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
- unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
- unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
- unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
- unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
- unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
- unsigned long *i = (unsigned long *) src_rbs[7]->buffer;
- unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
-
- callcount[8]++;
- /* align dest to cache line */
- while ((((unsigned long) dst) & 0x1f)) {
- *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++;
- len--;
- }
- while (len > 4) {
- LOAD_FIRST(dst, b);
- XOR_AND_LOAD_NEXT(c);
- XOR_AND_LOAD_NEXT(d);
- XOR_AND_LOAD_NEXT(e);
- XOR_AND_LOAD_NEXT(f);
- XOR_AND_LOAD_NEXT(g);
- XOR_AND_LOAD_NEXT(h);
- XOR_AND_LOAD_NEXT(i);
- XOR_AND_STORE(dst);
- }
- while (len) {
- *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++;
- len--;
- }
-}
-
-
-void
-rf_nWayXor9(src_rbs, dest_rb, len)
- RF_ReconBuffer_t **src_rbs;
- RF_ReconBuffer_t *dest_rb;
- int len;
-{
- unsigned long *dst = (unsigned long *) dest_rb->buffer;
- unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
- unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
- unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
- unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
- unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
- unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
- unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
- unsigned long *i = (unsigned long *) src_rbs[7]->buffer;
- unsigned long *j = (unsigned long *) src_rbs[8]->buffer;
- unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
-
- callcount[9]++;
- /* align dest to cache line */
- while ((((unsigned long) dst) & 0x1f)) {
- *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++;
- len--;
- }
- while (len > 4) {
- LOAD_FIRST(dst, b);
- XOR_AND_LOAD_NEXT(c);
- XOR_AND_LOAD_NEXT(d);
- XOR_AND_LOAD_NEXT(e);
- XOR_AND_LOAD_NEXT(f);
- XOR_AND_LOAD_NEXT(g);
- XOR_AND_LOAD_NEXT(h);
- XOR_AND_LOAD_NEXT(i);
- XOR_AND_LOAD_NEXT(j);
- XOR_AND_STORE(dst);
- }
- while (len) {
- *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++;
- len--;
- }
-}
diff --git a/sys/dev/raidframe/rf_nwayxor.h b/sys/dev/raidframe/rf_nwayxor.h
deleted file mode 100644
index 1460d9b..0000000
--- a/sys/dev/raidframe/rf_nwayxor.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_nwayxor.h,v 1.3 1999/02/05 00:06:13 oster Exp $ */
-/*
- * rf_nwayxor.h
- */
-/*
- * Copyright (c) 1996 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Jim Zelenka
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-/*
- * rf_nwayxor.h -- types and prototypes for nwayxor module
- */
-
-#ifndef _RF__RF_NWAYXOR_H_
-#define _RF__RF_NWAYXOR_H_
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_reconstruct.h>
-
-int rf_ConfigureNWayXor(RF_ShutdownList_t ** listp);
-void rf_nWayXor1(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len);
-void rf_nWayXor2(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len);
-void rf_nWayXor3(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len);
-void rf_nWayXor4(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len);
-void rf_nWayXor5(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len);
-void rf_nWayXor6(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len);
-void rf_nWayXor7(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len);
-void rf_nWayXor8(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len);
-void rf_nWayXor9(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len);
-
-#endif /* !_RF__RF_NWAYXOR_H_ */
diff --git a/sys/dev/raidframe/rf_options.c b/sys/dev/raidframe/rf_options.c
deleted file mode 100644
index 107c509..0000000
--- a/sys/dev/raidframe/rf_options.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/* $NetBSD: rf_options.c,v 1.3 1999/02/05 00:06:13 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * rf_options.c
- */
-/*
- * Copyright (c) 1996 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Jim Zelenka
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-
-#include <dev/raidframe/rf_threadstuff.h>
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_archs.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_options.h>
-
-#ifdef RF_DBG_OPTION
-#undef RF_DBG_OPTION
-#endif /* RF_DBG_OPTION */
-
-#ifdef __STDC__
-#define RF_DBG_OPTION(_option_,_defval_) long rf_##_option_ = _defval_;
-#else /* __STDC__ */
-#define RF_DBG_OPTION(_option_,_defval_) long rf_/**/_option_ = _defval_;
-#endif /* __STDC__ */
-
-#include <dev/raidframe/rf_optnames.h>
-
-#undef RF_DBG_OPTION
-
-#ifdef __STDC__
-#define RF_DBG_OPTION(_option_,_defval_) { RF_STRING(_option_), &rf_##_option_ },
-#else /* __STDC__ */
-#define RF_DBG_OPTION(_option_,_defval_) { RF_STRING(_option_), &rf_/**/_option_ },
-#endif /* __STDC__ */
-
-RF_DebugName_t rf_debugNames[] = {
-#include <dev/raidframe/rf_optnames.h>
- {NULL, NULL}
-};
-#undef RF_DBG_OPTION
-
-#ifdef __STDC__
-#define RF_DBG_OPTION(_option_,_defval_) rf_##_option_ = _defval_ ;
-#else /* __STDC__ */
-#define RF_DBG_OPTION(_option_,_defval_) rf_/**/_option_ = _defval_ ;
-#endif /* __STDC__ */
-
-void
-rf_ResetDebugOptions()
-{
-#include <dev/raidframe/rf_optnames.h>
-}
diff --git a/sys/dev/raidframe/rf_options.h b/sys/dev/raidframe/rf_options.h
deleted file mode 100644
index 22b6341..0000000
--- a/sys/dev/raidframe/rf_options.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_options.h,v 1.3 1999/02/05 00:06:13 oster Exp $ */
-/*
- * rf_options.h
- */
-/*
- * Copyright (c) 1996 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Jim Zelenka
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#ifndef _RF__RF_OPTIONS_H_
-#define _RF__RF_OPTIONS_H_
-
-#define RF_DEFAULT_LOCK_TABLE_SIZE 256
-
-typedef struct RF_DebugNames_s {
- char *name;
- long *ptr;
-} RF_DebugName_t;
-
-extern RF_DebugName_t rf_debugNames[];
-
-#ifdef RF_DBG_OPTION
-#undef RF_DBG_OPTION
-#endif /* RF_DBG_OPTION */
-
-#ifdef __STDC__
-#define RF_DBG_OPTION(_option_,_defval_) extern long rf_##_option_;
-#else /* __STDC__ */
-#define RF_DBG_OPTION(_option_,_defval_) extern long rf_/**/_option_;
-#endif /* __STDC__ */
-#include <dev/raidframe/rf_optnames.h>
-
-void rf_ResetDebugOptions(void);
-
-#endif /* !_RF__RF_OPTIONS_H_ */
diff --git a/sys/dev/raidframe/rf_optnames.h b/sys/dev/raidframe/rf_optnames.h
deleted file mode 100644
index f04fbc1..0000000
--- a/sys/dev/raidframe/rf_optnames.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_optnames.h,v 1.6 1999/12/07 02:54:08 oster Exp $ */
-/*
- * rf_optnames.h
- */
-/*
- * Copyright (c) 1996 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Jim Zelenka
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*
- * Don't protect against multiple inclusion here- we actually want this.
- */
-
-RF_DBG_OPTION(accessDebug, 0)
-RF_DBG_OPTION(accessTraceBufSize, 0)
-RF_DBG_OPTION(cscanDebug, 0) /* debug CSCAN sorting */
-RF_DBG_OPTION(dagDebug, 0)
-RF_DBG_OPTION(debugPrintUseBuffer, 0)
-RF_DBG_OPTION(degDagDebug, 0)
-RF_DBG_OPTION(disableAsyncAccs, 0)
-RF_DBG_OPTION(diskDebug, 0)
-RF_DBG_OPTION(enableAtomicRMW, 0) /* this debug var enables locking of
- * the disk arm during small-write
- * operations. Setting this variable
- * to anything other than 0 will
- * result in deadlock. (wvcii) */
-RF_DBG_OPTION(engineDebug, 0)
-RF_DBG_OPTION(fifoDebug, 0) /* debug fifo queueing */
-RF_DBG_OPTION(floatingRbufDebug, 0)
-RF_DBG_OPTION(forceHeadSepLimit, -1)
-RF_DBG_OPTION(forceNumFloatingReconBufs, -1) /* wire down number of
- * extra recon buffers
- * to use */
-RF_DBG_OPTION(keepAccTotals, 0) /* turn on keep_acc_totals */
-RF_DBG_OPTION(lockTableSize, RF_DEFAULT_LOCK_TABLE_SIZE)
-RF_DBG_OPTION(mapDebug, 0)
-RF_DBG_OPTION(maxNumTraces, -1)
-
-RF_DBG_OPTION(memChunkDebug, 0)
-RF_DBG_OPTION(memDebug, 0)
-RF_DBG_OPTION(memDebugAddress, 0)
-RF_DBG_OPTION(numBufsToAccumulate, 1) /* number of buffers to
- * accumulate before doing XOR */
-RF_DBG_OPTION(prReconSched, 0)
-RF_DBG_OPTION(printDAGsDebug, 0)
-RF_DBG_OPTION(printStatesDebug, 0)
-RF_DBG_OPTION(protectedSectors, 64L) /* # of sectors at start of
- * disk to exclude from RAID
- * address space */
-RF_DBG_OPTION(pssDebug, 0)
-RF_DBG_OPTION(queueDebug, 0)
-RF_DBG_OPTION(quiesceDebug, 0)
-RF_DBG_OPTION(raidSectorOffset, 0) /* added to all incoming sectors to
- * debug alignment problems */
-RF_DBG_OPTION(reconDebug, 0)
-RF_DBG_OPTION(reconbufferDebug, 0)
-RF_DBG_OPTION(scanDebug, 0) /* debug SCAN sorting */
-RF_DBG_OPTION(showXorCallCounts, 0) /* show n-way Xor call counts */
-RF_DBG_OPTION(shutdownDebug, 0) /* show shutdown calls */
-RF_DBG_OPTION(sizePercentage, 100)
-RF_DBG_OPTION(sstfDebug, 0) /* turn on debugging info for sstf queueing */
-RF_DBG_OPTION(stripeLockDebug, 0)
-RF_DBG_OPTION(suppressLocksAndLargeWrites, 0)
-RF_DBG_OPTION(suppressTraceDelays, 0)
-RF_DBG_OPTION(useMemChunks, 1)
-RF_DBG_OPTION(validateDAGDebug, 0)
-RF_DBG_OPTION(validateVisitedDebug, 1) /* XXX turn to zero by
- * default? */
-RF_DBG_OPTION(verifyParityDebug, 0)
-RF_DBG_OPTION(debugKernelAccess, 0) /* DoAccessKernel debugging */
-
-#if RF_INCLUDE_PARITYLOGGING > 0
-RF_DBG_OPTION(forceParityLogReint, 0)
-RF_DBG_OPTION(numParityRegions, 0) /* number of regions in the array */
-RF_DBG_OPTION(numReintegrationThreads, 1)
-RF_DBG_OPTION(parityLogDebug, 0) /* if nonzero, enables debugging of
- * parity logging */
-RF_DBG_OPTION(totalInCoreLogCapacity, 1024 * 1024) /* target bytes
- * available for in-core
- * logs */
-#endif /* RF_INCLUDE_PARITYLOGGING > 0 */
-
diff --git a/sys/dev/raidframe/rf_paritylog.c b/sys/dev/raidframe/rf_paritylog.c
deleted file mode 100644
index 87c33e6..0000000
--- a/sys/dev/raidframe/rf_paritylog.c
+++ /dev/null
@@ -1,871 +0,0 @@
-/* $NetBSD: rf_paritylog.c,v 1.5 2000/01/07 03:41:01 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: William V. Courtright II
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/* Code for manipulating in-core parity logs
- *
- */
-
-#include <dev/raidframe/rf_archs.h>
-
-#if RF_INCLUDE_PARITYLOGGING > 0
-
-/*
- * Append-only log for recording parity "update" and "overwrite" records
- */
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_threadstuff.h>
-#include <dev/raidframe/rf_mcpair.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_dagfuncs.h>
-#include <dev/raidframe/rf_desc.h>
-#include <dev/raidframe/rf_layout.h>
-#include <dev/raidframe/rf_diskqueue.h>
-#include <dev/raidframe/rf_etimer.h>
-#include <dev/raidframe/rf_paritylog.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_map.h>
-#include <dev/raidframe/rf_paritylogging.h>
-#include <dev/raidframe/rf_paritylogDiskMgr.h>
-
-static RF_CommonLogData_t *
-AllocParityLogCommonData(RF_Raid_t * raidPtr)
-{
- RF_CommonLogData_t *common = NULL;
- int rc;
-
- /* Return a struct for holding common parity log information from the
- * free list (rf_parityLogDiskQueue.freeCommonList). If the free list
- * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */
-
- RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- if (raidPtr->parityLogDiskQueue.freeCommonList) {
- common = raidPtr->parityLogDiskQueue.freeCommonList;
- raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
- RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- } else {
- RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *));
- rc = rf_mutex_init(&common->mutex, __FUNCTION__);
- if (rc) {
- RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- RF_Free(common, sizeof(RF_CommonLogData_t));
- common = NULL;
- }
- }
- common->next = NULL;
- return (common);
-}
-
-static void
-FreeParityLogCommonData(RF_CommonLogData_t * common)
-{
- RF_Raid_t *raidPtr;
-
- /* Insert a single struct for holding parity log information (data)
- * into the free list (rf_parityLogDiskQueue.freeCommonList).
- * NON-BLOCKING */
-
- raidPtr = common->raidPtr;
- RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- common->next = raidPtr->parityLogDiskQueue.freeCommonList;
- raidPtr->parityLogDiskQueue.freeCommonList = common;
- RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
-}
-
-static RF_ParityLogData_t *
-AllocParityLogData(RF_Raid_t * raidPtr)
-{
- RF_ParityLogData_t *data = NULL;
-
- /* Return a struct for holding parity log information from the free
- * list (rf_parityLogDiskQueue.freeList). If the free list is empty,
- * call RF_Malloc to create a new structure. NON-BLOCKING */
-
- RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- if (raidPtr->parityLogDiskQueue.freeDataList) {
- data = raidPtr->parityLogDiskQueue.freeDataList;
- raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
- RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- } else {
- RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *));
- }
- data->next = NULL;
- data->prev = NULL;
- return (data);
-}
-
-
-static void
-FreeParityLogData(RF_ParityLogData_t * data)
-{
- RF_ParityLogData_t *nextItem;
- RF_Raid_t *raidPtr;
-
- /* Insert a linked list of structs for holding parity log information
- * (data) into the free list (parityLogDiskQueue.freeList).
- * NON-BLOCKING */
-
- raidPtr = data->common->raidPtr;
- RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- while (data) {
- nextItem = data->next;
- data->next = raidPtr->parityLogDiskQueue.freeDataList;
- raidPtr->parityLogDiskQueue.freeDataList = data;
- data = nextItem;
- }
- RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
-}
-
-
-static void
-EnqueueParityLogData(
- RF_ParityLogData_t * data,
- RF_ParityLogData_t ** head,
- RF_ParityLogData_t ** tail)
-{
- RF_Raid_t *raidPtr;
-
- /* Insert an in-core parity log (*data) into the head of a disk queue
- * (*head, *tail). NON-BLOCKING */
-
- raidPtr = data->common->raidPtr;
- if (rf_parityLogDebug)
- printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
- RF_ASSERT(data->prev == NULL);
- RF_ASSERT(data->next == NULL);
- RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- if (*head) {
- /* insert into head of queue */
- RF_ASSERT((*head)->prev == NULL);
- RF_ASSERT((*tail)->next == NULL);
- data->next = *head;
- (*head)->prev = data;
- *head = data;
- } else {
- /* insert into empty list */
- RF_ASSERT(*head == NULL);
- RF_ASSERT(*tail == NULL);
- *head = data;
- *tail = data;
- }
- RF_ASSERT((*head)->prev == NULL);
- RF_ASSERT((*tail)->next == NULL);
- RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
-}
-
-static RF_ParityLogData_t *
-DequeueParityLogData(
- RF_Raid_t * raidPtr,
- RF_ParityLogData_t ** head,
- RF_ParityLogData_t ** tail,
- int ignoreLocks)
-{
- RF_ParityLogData_t *data;
-
- /* Remove and return an in-core parity log from the tail of a disk
- * queue (*head, *tail). NON-BLOCKING */
-
- /* remove from tail, preserving FIFO order */
- if (!ignoreLocks)
- RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- data = *tail;
- if (data) {
- if (*head == *tail) {
- /* removing last item from queue */
- *head = NULL;
- *tail = NULL;
- } else {
- *tail = (*tail)->prev;
- (*tail)->next = NULL;
- RF_ASSERT((*head)->prev == NULL);
- RF_ASSERT((*tail)->next == NULL);
- }
- data->next = NULL;
- data->prev = NULL;
- if (rf_parityLogDebug)
- printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
- }
- if (*head) {
- RF_ASSERT((*head)->prev == NULL);
- RF_ASSERT((*tail)->next == NULL);
- }
- if (!ignoreLocks)
- RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- return (data);
-}
-
-
-static void
-RequeueParityLogData(
- RF_ParityLogData_t * data,
- RF_ParityLogData_t ** head,
- RF_ParityLogData_t ** tail)
-{
- RF_Raid_t *raidPtr;
-
- /* Insert an in-core parity log (*data) into the tail of a disk queue
- * (*head, *tail). NON-BLOCKING */
-
- raidPtr = data->common->raidPtr;
- RF_ASSERT(data);
- if (rf_parityLogDebug)
- printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
- RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- if (*tail) {
- /* append to tail of list */
- data->prev = *tail;
- data->next = NULL;
- (*tail)->next = data;
- *tail = data;
- } else {
- /* inserting into an empty list */
- *head = data;
- *tail = data;
- (*head)->prev = NULL;
- (*tail)->next = NULL;
- }
- RF_ASSERT((*head)->prev == NULL);
- RF_ASSERT((*tail)->next == NULL);
- RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
-}
-
-RF_ParityLogData_t *
-rf_CreateParityLogData(
- RF_ParityRecordType_t operation,
- RF_PhysDiskAddr_t * pda,
- caddr_t bufPtr,
- RF_Raid_t * raidPtr,
- int (*wakeFunc) (RF_DagNode_t * node, int status),
- void *wakeArg,
- RF_AccTraceEntry_t * tracerec,
- RF_Etimer_t startTime)
-{
- RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
- RF_CommonLogData_t *common;
- RF_PhysDiskAddr_t *diskAddress;
- int boundary, offset = 0;
-
- /* Return an initialized struct of info to be logged. Build one item
- * per physical disk address, one item per region.
- *
- * NON-BLOCKING */
-
- diskAddress = pda;
- common = AllocParityLogCommonData(raidPtr);
- RF_ASSERT(common);
-
- common->operation = operation;
- common->bufPtr = bufPtr;
- common->raidPtr = raidPtr;
- common->wakeFunc = wakeFunc;
- common->wakeArg = wakeArg;
- common->tracerec = tracerec;
- common->startTime = startTime;
- common->cnt = 0;
-
- if (rf_parityLogDebug)
- printf("[entering CreateParityLogData]\n");
- while (diskAddress) {
- common->cnt++;
- data = AllocParityLogData(raidPtr);
- RF_ASSERT(data);
- data->common = common;
- data->next = NULL;
- data->prev = NULL;
- data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
- if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) {
- /* disk address does not cross a region boundary */
- data->diskAddress = *diskAddress;
- data->bufOffset = offset;
- offset = offset + diskAddress->numSector;
- EnqueueParityLogData(data, &resultHead, &resultTail);
- /* adjust disk address */
- diskAddress = diskAddress->next;
- } else {
- /* disk address crosses a region boundary */
- /* find address where region is crossed */
- boundary = 0;
- while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
- boundary++;
-
- /* enter data before the boundary */
- data->diskAddress = *diskAddress;
- data->diskAddress.numSector = boundary;
- data->bufOffset = offset;
- offset += boundary;
- EnqueueParityLogData(data, &resultHead, &resultTail);
- /* adjust disk address */
- diskAddress->startSector += boundary;
- diskAddress->numSector -= boundary;
- }
- }
- if (rf_parityLogDebug)
- printf("[leaving CreateParityLogData]\n");
- return (resultHead);
-}
-
-
-RF_ParityLogData_t *
-rf_SearchAndDequeueParityLogData(
- RF_Raid_t * raidPtr,
- int regionID,
- RF_ParityLogData_t ** head,
- RF_ParityLogData_t ** tail,
- int ignoreLocks)
-{
- RF_ParityLogData_t *w;
-
- /* Remove and return an in-core parity log from a specified region
- * (regionID). If a matching log is not found, return NULL.
- *
- * NON-BLOCKING. */
-
- /* walk backward through a list, looking for an entry with a matching
- * region ID */
- if (!ignoreLocks)
- RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- w = (*tail);
- while (w) {
- if (w->regionID == regionID) {
- /* remove an element from the list */
- if (w == *tail) {
- if (*head == *tail) {
- /* removing only element in the list */
- *head = NULL;
- *tail = NULL;
- } else {
- /* removing last item in the list */
- *tail = (*tail)->prev;
- (*tail)->next = NULL;
- RF_ASSERT((*head)->prev == NULL);
- RF_ASSERT((*tail)->next == NULL);
- }
- } else {
- if (w == *head) {
- /* removing first item in the list */
- *head = (*head)->next;
- (*head)->prev = NULL;
- RF_ASSERT((*head)->prev == NULL);
- RF_ASSERT((*tail)->next == NULL);
- } else {
- /* removing an item from the middle of
- * the list */
- w->prev->next = w->next;
- w->next->prev = w->prev;
- RF_ASSERT((*head)->prev == NULL);
- RF_ASSERT((*tail)->next == NULL);
- }
- }
- w->prev = NULL;
- w->next = NULL;
- if (rf_parityLogDebug)
- printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector);
- return (w);
- } else
- w = w->prev;
- }
- if (!ignoreLocks)
- RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- return (NULL);
-}
-
-static RF_ParityLogData_t *
-DequeueMatchingLogData(
- RF_Raid_t * raidPtr,
- RF_ParityLogData_t ** head,
- RF_ParityLogData_t ** tail)
-{
- RF_ParityLogData_t *logDataList, *logData;
- int regionID;
-
- /* Remove and return an in-core parity log from the tail of a disk
- * queue (*head, *tail). Then remove all matching (identical
- * regionIDs) logData and return as a linked list.
- *
- * NON-BLOCKING */
-
- logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
- if (logDataList) {
- regionID = logDataList->regionID;
- logData = logDataList;
- logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
- while (logData->next) {
- logData = logData->next;
- logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
- }
- }
- return (logDataList);
-}
-
-
-static RF_ParityLog_t *
-AcquireParityLog(
- RF_ParityLogData_t * logData,
- int finish)
-{
- RF_ParityLog_t *log = NULL;
- RF_Raid_t *raidPtr;
-
- /* Grab a log buffer from the pool and return it. If no buffers are
- * available, return NULL. NON-BLOCKING */
- raidPtr = logData->common->raidPtr;
- RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
- if (raidPtr->parityLogPool.parityLogs) {
- log = raidPtr->parityLogPool.parityLogs;
- raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
- log->regionID = logData->regionID;
- log->numRecords = 0;
- log->next = NULL;
- raidPtr->logsInUse++;
- RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
- } else {
- /* no logs available, so place ourselves on the queue of work
- * waiting on log buffers this is done while
- * parityLogPool.mutex is held, to ensure synchronization with
- * ReleaseParityLogs. */
- if (rf_parityLogDebug)
- printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
- if (finish)
- RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
- else
- EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
- }
- RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
- return (log);
-}
-
-void
-rf_ReleaseParityLogs(
- RF_Raid_t * raidPtr,
- RF_ParityLog_t * firstLog)
-{
- RF_ParityLogData_t *logDataList;
- RF_ParityLog_t *log, *lastLog;
- int cnt;
-
- /* Insert a linked list of parity logs (firstLog) to the free list
- * (parityLogPool.parityLogPool)
- *
- * NON-BLOCKING. */
-
- RF_ASSERT(firstLog);
-
- /* Before returning logs to global free list, service all requests
- * which are blocked on logs. Holding mutexes for parityLogPool and
- * parityLogDiskQueue forces synchronization with AcquireParityLog(). */
- RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
- RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
- log = firstLog;
- if (firstLog)
- firstLog = firstLog->next;
- log->numRecords = 0;
- log->next = NULL;
- while (logDataList && log) {
- RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
- RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
- if (rf_parityLogDebug)
- printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
- if (log == NULL) {
- log = firstLog;
- if (firstLog) {
- firstLog = firstLog->next;
- log->numRecords = 0;
- log->next = NULL;
- }
- }
- RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
- RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- if (log)
- logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
- }
- /* return remaining logs to pool */
- if (log) {
- log->next = firstLog;
- firstLog = log;
- }
- if (firstLog) {
- lastLog = firstLog;
- raidPtr->logsInUse--;
- RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
- while (lastLog->next) {
- lastLog = lastLog->next;
- raidPtr->logsInUse--;
- RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
- }
- lastLog->next = raidPtr->parityLogPool.parityLogs;
- raidPtr->parityLogPool.parityLogs = firstLog;
- cnt = 0;
- log = raidPtr->parityLogPool.parityLogs;
- while (log) {
- cnt++;
- log = log->next;
- }
- RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
- }
- RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
- RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
-}
-
-static void
-ReintLog(
- RF_Raid_t * raidPtr,
- int regionID,
- RF_ParityLog_t * log)
-{
- RF_ASSERT(log);
-
- /* Insert an in-core parity log (log) into the disk queue of
- * reintegration work. Set the flag (reintInProgress) for the
- * specified region (regionID) to indicate that reintegration is in
- * progress for this region. NON-BLOCKING */
-
- RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
- raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint
- * complete */
-
- if (rf_parityLogDebug)
- printf("[requesting reintegration of region %d]\n", log->regionID);
- /* move record to reintegration queue */
- RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- log->next = raidPtr->parityLogDiskQueue.reintQueue;
- raidPtr->parityLogDiskQueue.reintQueue = log;
- RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
- RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
-}
-
-static void
-FlushLog(
- RF_Raid_t * raidPtr,
- RF_ParityLog_t * log)
-{
- /* insert a core log (log) into a list of logs
- * (parityLogDiskQueue.flushQueue) waiting to be written to disk.
- * NON-BLOCKING */
-
- RF_ASSERT(log);
- RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
- RF_ASSERT(log->next == NULL);
- /* move log to flush queue */
- RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- log->next = raidPtr->parityLogDiskQueue.flushQueue;
- raidPtr->parityLogDiskQueue.flushQueue = log;
- RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
-}
-
-static int
-DumpParityLogToDisk(
- int finish,
- RF_ParityLogData_t * logData)
-{
- int i, diskCount, regionID = logData->regionID;
- RF_ParityLog_t *log;
- RF_Raid_t *raidPtr;
-
- raidPtr = logData->common->raidPtr;
-
- /* Move a core log to disk. If the log disk is full, initiate
- * reintegration.
- *
- * Return (0) if we can enqueue the dump immediately, otherwise return
- * (1) to indicate we are blocked on reintegration and control of the
- * thread should be relinquished.
- *
- * Caller must hold regionInfo[regionID].mutex
- *
- * NON-BLOCKING */
-
- if (rf_parityLogDebug)
- printf("[dumping parity log to disk, region %d]\n", regionID);
- log = raidPtr->regionInfo[regionID].coreLog;
- RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
- RF_ASSERT(log->next == NULL);
-
- /* if reintegration is in progress, must queue work */
- RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
- if (raidPtr->regionInfo[regionID].reintInProgress) {
- /* Can not proceed since this region is currently being
- * reintegrated. We can not block, so queue remaining work and
- * return */
- if (rf_parityLogDebug)
- printf("[region %d waiting on reintegration]\n", regionID);
- /* XXX not sure about the use of finish - shouldn't this
- * always be "Enqueue"? */
- if (finish)
- RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
- else
- EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
- RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
- return (1); /* relenquish control of this thread */
- }
- RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
- raidPtr->regionInfo[regionID].coreLog = NULL;
- if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
- /* IMPORTANT!! this loop bound assumes region disk holds an
- * integral number of core logs */
- {
- /* update disk map for this region */
- diskCount = raidPtr->regionInfo[regionID].diskCount;
- for (i = 0; i < raidPtr->numSectorsPerLog; i++) {
- raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
- raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
- }
- log->diskOffset = diskCount;
- raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
- FlushLog(raidPtr, log);
- } else {
- /* no room for log on disk, send it to disk manager and
- * request reintegration */
- RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
- ReintLog(raidPtr, regionID, log);
- }
- if (rf_parityLogDebug)
- printf("[finished dumping parity log to disk, region %d]\n", regionID);
- return (0);
-}
-
-int
-rf_ParityLogAppend(
- RF_ParityLogData_t * logData,
- int finish,
- RF_ParityLog_t ** incomingLog,
- int clearReintFlag)
-{
- int regionID, logItem, itemDone;
- RF_ParityLogData_t *item;
- int punt, done = RF_FALSE;
- RF_ParityLog_t *log;
- RF_Raid_t *raidPtr;
- RF_Etimer_t timer;
- int (*wakeFunc) (RF_DagNode_t * node, int status);
- void *wakeArg;
-
- /* Add parity to the appropriate log, one sector at a time. This
- * routine is called is called by dag functions ParityLogUpdateFunc
- * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
- *
- * Parity to be logged is contained in a linked-list (logData). When
- * this routine returns, every sector in the list will be in one of
- * three places: 1) entered into the parity log 2) queued, waiting on
- * reintegration 3) queued, waiting on a core log
- *
- * Blocked work is passed to the ParityLoggingDiskManager for completion.
- * Later, as conditions which required the block are removed, the work
- * reenters this routine with the "finish" parameter set to "RF_TRUE."
- *
- * NON-BLOCKING */
-
- RF_ASSERT(logData != NULL);
- raidPtr = logData->common->raidPtr;
- /* lock the region for the first item in logData */
- regionID = logData->regionID;
- RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
- RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
-
- if (clearReintFlag) {
- /* Enable flushing for this region. Holding both locks
- * provides a synchronization barrier with DumpParityLogToDisk */
- RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
- RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
- raidPtr->regionInfo[regionID].diskCount = 0;
- raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
- RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now
- * enabled */
- RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- }
- /* process each item in logData */
- while (logData) {
- /* remove an item from logData */
- item = logData;
- logData = logData->next;
- item->next = NULL;
- item->prev = NULL;
-
- if (rf_parityLogDebug)
- printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector);
-
- /* see if we moved to a new region */
- if (regionID != item->regionID) {
- RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
- regionID = item->regionID;
- RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
- RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
- }
- punt = RF_FALSE;/* Set to RF_TRUE if work is blocked. This
- * can happen in one of two ways: 1) no core
- * log (AcquireParityLog) 2) waiting on
- * reintegration (DumpParityLogToDisk) If punt
- * is RF_TRUE, the dataItem was queued, so
- * skip to next item. */
-
- /* process item, one sector at a time, until all sectors
- * processed or we punt */
- if (item->diskAddress.numSector > 0)
- done = RF_FALSE;
- else
- RF_ASSERT(0);
- while (!punt && !done) {
- /* verify that a core log exists for this region */
- if (!raidPtr->regionInfo[regionID].coreLog) {
- /* Attempt to acquire a parity log. If
- * acquisition fails, queue remaining work in
- * data item and move to nextItem. */
- if (incomingLog)
- if (*incomingLog) {
- RF_ASSERT((*incomingLog)->next == NULL);
- raidPtr->regionInfo[regionID].coreLog = *incomingLog;
- raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
- *incomingLog = NULL;
- } else
- raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
- else
- raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
- /* Note: AcquireParityLog either returns a log
- * or enqueues currentItem */
- }
- if (!raidPtr->regionInfo[regionID].coreLog)
- punt = RF_TRUE; /* failed to find a core log */
- else {
- RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
- /* verify that the log has room for new
- * entries */
- /* if log is full, dump it to disk and grab a
- * new log */
- if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) {
- /* log is full, dump it to disk */
- if (DumpParityLogToDisk(finish, item))
- punt = RF_TRUE; /* dump unsuccessful,
- * blocked on
- * reintegration */
- else {
- /* dump was successful */
- if (incomingLog)
- if (*incomingLog) {
- RF_ASSERT((*incomingLog)->next == NULL);
- raidPtr->regionInfo[regionID].coreLog = *incomingLog;
- raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
- *incomingLog = NULL;
- } else
- raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
- else
- raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
- /* if a core log is not
- * available, must queue work
- * and return */
- if (!raidPtr->regionInfo[regionID].coreLog)
- punt = RF_TRUE; /* blocked on log
- * availability */
- }
- }
- }
- /* if we didn't punt on this item, attempt to add a
- * sector to the core log */
- if (!punt) {
- RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
- /* at this point, we have a core log with
- * enough room for a sector */
- /* copy a sector into the log */
- log = raidPtr->regionInfo[regionID].coreLog;
- RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
- logItem = log->numRecords++;
- log->records[logItem].parityAddr = item->diskAddress;
- RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
- RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
- log->records[logItem].parityAddr.numSector = 1;
- log->records[logItem].operation = item->common->operation;
- bcopy((item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (1 << item->common->raidPtr->logBytesPerSector));
- item->diskAddress.numSector--;
- item->diskAddress.startSector++;
- if (item->diskAddress.numSector == 0)
- done = RF_TRUE;
- }
- }
-
- if (!punt) {
- /* Processed this item completely, decrement count of
- * items to be processed. */
- RF_ASSERT(item->diskAddress.numSector == 0);
- RF_LOCK_MUTEX(item->common->mutex);
- item->common->cnt--;
- if (item->common->cnt == 0)
- itemDone = RF_TRUE;
- else
- itemDone = RF_FALSE;
- RF_UNLOCK_MUTEX(item->common->mutex);
- if (itemDone) {
- /* Finished processing all log data for this
- * IO Return structs to free list and invoke
- * wakeup function. */
- timer = item->common->startTime; /* grab initial value of
- * timer */
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
- if (rf_parityLogDebug)
- printf("[waking process for region %d]\n", item->regionID);
- wakeFunc = item->common->wakeFunc;
- wakeArg = item->common->wakeArg;
- FreeParityLogCommonData(item->common);
- FreeParityLogData(item);
- (wakeFunc) (wakeArg, 0);
- } else
- FreeParityLogData(item);
- }
- }
- RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
- if (rf_parityLogDebug)
- printf("[exiting ParityLogAppend]\n");
- return (0);
-}
-
-
-void
-rf_EnableParityLogging(RF_Raid_t * raidPtr)
-{
- int regionID;
-
- for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
- RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
- raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
- RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
- }
- if (rf_parityLogDebug)
- printf("[parity logging enabled]\n");
-}
-#endif /* RF_INCLUDE_PARITYLOGGING > 0 */
diff --git a/sys/dev/raidframe/rf_paritylog.h b/sys/dev/raidframe/rf_paritylog.h
deleted file mode 100644
index 1f2b80d..0000000
--- a/sys/dev/raidframe/rf_paritylog.h
+++ /dev/null
@@ -1,181 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_paritylog.h,v 1.3 1999/02/05 00:06:14 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: William V. Courtright II
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/* header file for parity log
- *
- */
-
-#ifndef _RF__RF_PARITYLOG_H_
-#define _RF__RF_PARITYLOG_H_
-
-#include <dev/raidframe/rf_types.h>
-
-#define RF_DEFAULT_NUM_SECTORS_PER_LOG 64
-
-typedef int RF_RegionId_t;
-
-typedef enum RF_ParityRecordType_e {
- RF_STOP,
- RF_UPDATE,
- RF_OVERWRITE
-} RF_ParityRecordType_t;
-
-struct RF_CommonLogData_s {
- RF_DECLARE_MUTEX(mutex) /* protects cnt */
- int cnt; /* when 0, time to call wakeFunc */
- RF_Raid_t *raidPtr;
-/* int (*wakeFunc)(RF_Buf_t); */
- int (*wakeFunc) (RF_DagNode_t * node, int status);
- void *wakeArg;
- RF_AccTraceEntry_t *tracerec;
- RF_Etimer_t startTime;
- caddr_t bufPtr;
- RF_ParityRecordType_t operation;
- RF_CommonLogData_t *next;
-};
-
-struct RF_ParityLogData_s {
- RF_RegionId_t regionID; /* this struct guaranteed to span a single
- * region */
- int bufOffset; /* offset from common->bufPtr */
- RF_PhysDiskAddr_t diskAddress;
- RF_CommonLogData_t *common; /* info shared by one or more
- * parityLogData structs */
- RF_ParityLogData_t *next;
- RF_ParityLogData_t *prev;
-};
-
-struct RF_ParityLogAppendQueue_s {
- RF_DECLARE_MUTEX(mutex)
-};
-
-struct RF_ParityLogRecord_s {
- RF_PhysDiskAddr_t parityAddr;
- RF_ParityRecordType_t operation;
-};
-
-struct RF_ParityLog_s {
- RF_RegionId_t regionID;
- int numRecords;
- int diskOffset;
- RF_ParityLogRecord_t *records;
- caddr_t bufPtr;
- RF_ParityLog_t *next;
-};
-
-struct RF_ParityLogQueue_s {
- RF_DECLARE_MUTEX(mutex)
- RF_ParityLog_t *parityLogs;
-};
-
-struct RF_RegionBufferQueue_s {
- RF_DECLARE_MUTEX(mutex)
- RF_DECLARE_COND(cond)
- int bufferSize;
- int totalBuffers; /* size of array 'buffers' */
- int availableBuffers; /* num available 'buffers' */
- int emptyBuffersIndex; /* stick next freed buffer here */
- int availBuffersIndex; /* grab next buffer from here */
- caddr_t *buffers; /* array buffers used to hold parity */
-};
-#define RF_PLOG_CREATED (1<<0)/* thread is created */
-#define RF_PLOG_RUNNING (1<<1)/* thread is running */
-#define RF_PLOG_TERMINATE (1<<2)/* thread is terminated (should exit) */
-#define RF_PLOG_SHUTDOWN (1<<3)/* thread is aware and exiting/exited */
-
-struct RF_ParityLogDiskQueue_s {
- RF_DECLARE_MUTEX(mutex) /* protects all vars in this struct */
- RF_DECLARE_COND(cond)
- int threadState; /* is thread running, should it shutdown (see
- * above) */
- RF_ParityLog_t *flushQueue; /* list of parity logs to be flushed
- * to log disk */
- RF_ParityLog_t *reintQueue; /* list of parity logs waiting to be
- * reintegrated */
- RF_ParityLogData_t *bufHead; /* head of FIFO list of log data,
- * waiting on a buffer */
- RF_ParityLogData_t *bufTail; /* tail of FIFO list of log data,
- * waiting on a buffer */
- RF_ParityLogData_t *reintHead; /* head of FIFO list of log data,
- * waiting on reintegration */
- RF_ParityLogData_t *reintTail; /* tail of FIFO list of log data,
- * waiting on reintegration */
- RF_ParityLogData_t *logBlockHead; /* queue of work, blocked
- * until a log is available */
- RF_ParityLogData_t *logBlockTail;
- RF_ParityLogData_t *reintBlockHead; /* queue of work, blocked
- * until reintegration is
- * complete */
- RF_ParityLogData_t *reintBlockTail;
- RF_CommonLogData_t *freeCommonList; /* list of unused common data
- * structs */
- RF_ParityLogData_t *freeDataList; /* list of unused log data
- * structs */
-};
-
-struct RF_DiskMap_s {
- RF_PhysDiskAddr_t parityAddr;
- RF_ParityRecordType_t operation;
-};
-
-struct RF_RegionInfo_s {
- RF_DECLARE_MUTEX(mutex) /* protects: diskCount, diskMap,
- * loggingEnabled, coreLog */
- RF_DECLARE_MUTEX(reintMutex) /* protects: reintInProgress */
- int reintInProgress;/* flag used to suspend flushing operations */
- RF_SectorCount_t capacity; /* capacity of this region in sectors */
- RF_SectorNum_t regionStartAddr; /* starting disk address for this
- * region */
- RF_SectorNum_t parityStartAddr; /* starting disk address for this
- * region */
- RF_SectorCount_t numSectorsParity; /* number of parity sectors
- * protected by this region */
- RF_SectorCount_t diskCount; /* num of sectors written to this
- * region's disk log */
- RF_DiskMap_t *diskMap; /* in-core map of what's in this region's disk
- * log */
- int loggingEnabled; /* logging enable for this region */
- RF_ParityLog_t *coreLog;/* in-core log for this region */
-};
-
-RF_ParityLogData_t *
-rf_CreateParityLogData(RF_ParityRecordType_t operation,
- RF_PhysDiskAddr_t * pda, caddr_t bufPtr, RF_Raid_t * raidPtr,
- int (*wakeFunc) (RF_DagNode_t * node, int status),
- void *wakeArg, RF_AccTraceEntry_t * tracerec,
- RF_Etimer_t startTime);
- RF_ParityLogData_t *rf_SearchAndDequeueParityLogData(RF_Raid_t * raidPtr,
- RF_RegionId_t regionID, RF_ParityLogData_t ** head,
- RF_ParityLogData_t ** tail, int ignoreLocks);
- void rf_ReleaseParityLogs(RF_Raid_t * raidPtr, RF_ParityLog_t * firstLog);
- int rf_ParityLogAppend(RF_ParityLogData_t * logData, int finish,
- RF_ParityLog_t ** incomingLog, int clearReintFlag);
- void rf_EnableParityLogging(RF_Raid_t * raidPtr);
-
-#endif /* !_RF__RF_PARITYLOG_H_ */
diff --git a/sys/dev/raidframe/rf_paritylogDiskMgr.c b/sys/dev/raidframe/rf_paritylogDiskMgr.c
deleted file mode 100644
index 5d864e2..0000000
--- a/sys/dev/raidframe/rf_paritylogDiskMgr.c
+++ /dev/null
@@ -1,703 +0,0 @@
-/* $NetBSD: rf_paritylogDiskMgr.c,v 1.10 2000/01/15 01:57:57 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: William V. Courtright II
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-/* Code for flushing and reintegration operations related to parity logging.
- *
- */
-
-#include <dev/raidframe/rf_archs.h>
-
-#if RF_INCLUDE_PARITYLOGGING > 0
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_threadstuff.h>
-#include <dev/raidframe/rf_mcpair.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_dagfuncs.h>
-#include <dev/raidframe/rf_desc.h>
-#include <dev/raidframe/rf_layout.h>
-#include <dev/raidframe/rf_diskqueue.h>
-#include <dev/raidframe/rf_paritylog.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_etimer.h>
-#include <dev/raidframe/rf_paritylogging.h>
-#include <dev/raidframe/rf_engine.h>
-#include <dev/raidframe/rf_dagutils.h>
-#include <dev/raidframe/rf_map.h>
-#include <dev/raidframe/rf_parityscan.h>
-#include <dev/raidframe/rf_kintf.h>
-
-#include <dev/raidframe/rf_paritylogDiskMgr.h>
-
-static caddr_t AcquireReintBuffer(RF_RegionBufferQueue_t *);
-
-static caddr_t
-AcquireReintBuffer(pool)
- RF_RegionBufferQueue_t *pool;
-{
- caddr_t bufPtr = NULL;
-
- /* Return a region buffer from the free list (pool). If the free list
- * is empty, WAIT. BLOCKING */
-
- RF_LOCK_MUTEX(pool->mutex);
- if (pool->availableBuffers > 0) {
- bufPtr = pool->buffers[pool->availBuffersIndex];
- pool->availableBuffers--;
- pool->availBuffersIndex++;
- if (pool->availBuffersIndex == pool->totalBuffers)
- pool->availBuffersIndex = 0;
- RF_UNLOCK_MUTEX(pool->mutex);
- } else {
- RF_PANIC(); /* should never happen in correct config,
- * single reint */
- RF_WAIT_COND(pool->cond, pool->mutex);
- }
- return (bufPtr);
-}
-
-static void
-ReleaseReintBuffer(
- RF_RegionBufferQueue_t * pool,
- caddr_t bufPtr)
-{
- /* Insert a region buffer (bufPtr) into the free list (pool).
- * NON-BLOCKING */
-
- RF_LOCK_MUTEX(pool->mutex);
- pool->availableBuffers++;
- pool->buffers[pool->emptyBuffersIndex] = bufPtr;
- pool->emptyBuffersIndex++;
- if (pool->emptyBuffersIndex == pool->totalBuffers)
- pool->emptyBuffersIndex = 0;
- RF_ASSERT(pool->availableBuffers <= pool->totalBuffers);
- RF_UNLOCK_MUTEX(pool->mutex);
- RF_SIGNAL_COND(pool->cond);
-}
-
-
-
-static void
-ReadRegionLog(
- RF_RegionId_t regionID,
- RF_MCPair_t * rrd_mcpair,
- caddr_t regionBuffer,
- RF_Raid_t * raidPtr,
- RF_DagHeader_t ** rrd_dag_h,
- RF_AllocListElem_t ** rrd_alloclist,
- RF_PhysDiskAddr_t ** rrd_pda)
-{
- /* Initiate the read a region log from disk. Once initiated, return
- * to the calling routine.
- *
- * NON-BLOCKING */
-
- RF_AccTraceEntry_t *tracerec;
- RF_DagNode_t *rrd_rdNode;
-
- /* create DAG to read region log from disk */
- rf_MakeAllocList(*rrd_alloclist);
- *rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer,
- rf_DiskReadFunc, rf_DiskReadUndoFunc,
- "Rrl", *rrd_alloclist,
- RF_DAG_FLAGS_NONE,
- RF_IO_NORMAL_PRIORITY);
-
- /* create and initialize PDA for the core log */
- /* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
- * *)); */
- *rrd_pda = rf_AllocPDAList(1);
- rf_MapLogParityLogging(raidPtr, regionID, 0, &((*rrd_pda)->row),
- &((*rrd_pda)->col), &((*rrd_pda)->startSector));
- (*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity;
-
- if ((*rrd_pda)->next) {
- (*rrd_pda)->next = NULL;
- printf("set rrd_pda->next to NULL\n");
- }
- /* initialize DAG parameters */
- RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
- bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t));
- (*rrd_dag_h)->tracerec = tracerec;
- rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0];
- rrd_rdNode->params[0].p = *rrd_pda;
-/* rrd_rdNode->params[1] = regionBuffer; */
- rrd_rdNode->params[2].v = 0;
- rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
- 0, 0, 0);
-
- /* launch region log read dag */
- rf_DispatchDAG(*rrd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
- (void *) rrd_mcpair);
-}
-
-
-
-static void
-WriteCoreLog(
- RF_ParityLog_t * log,
- RF_MCPair_t * fwr_mcpair,
- RF_Raid_t * raidPtr,
- RF_DagHeader_t ** fwr_dag_h,
- RF_AllocListElem_t ** fwr_alloclist,
- RF_PhysDiskAddr_t ** fwr_pda)
-{
- RF_RegionId_t regionID = log->regionID;
- RF_AccTraceEntry_t *tracerec;
- RF_SectorNum_t regionOffset;
- RF_DagNode_t *fwr_wrNode;
-
- /* Initiate the write of a core log to a region log disk. Once
- * initiated, return to the calling routine.
- *
- * NON-BLOCKING */
-
- /* create DAG to write a core log to a region log disk */
- rf_MakeAllocList(*fwr_alloclist);
- *fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr,
- rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
- "Wcl", *fwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
-
- /* create and initialize PDA for the region log */
- /* RF_Malloc(*fwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
- * *)); */
- *fwr_pda = rf_AllocPDAList(1);
- regionOffset = log->diskOffset;
- rf_MapLogParityLogging(raidPtr, regionID, regionOffset,
- &((*fwr_pda)->row), &((*fwr_pda)->col),
- &((*fwr_pda)->startSector));
- (*fwr_pda)->numSector = raidPtr->numSectorsPerLog;
-
- /* initialize DAG parameters */
- RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
- bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t));
- (*fwr_dag_h)->tracerec = tracerec;
- fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0];
- fwr_wrNode->params[0].p = *fwr_pda;
-/* fwr_wrNode->params[1] = log->bufPtr; */
- fwr_wrNode->params[2].v = 0;
- fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
- 0, 0, 0);
-
- /* launch the dag to write the core log to disk */
- rf_DispatchDAG(*fwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
- (void *) fwr_mcpair);
-}
-
-
-static void
-ReadRegionParity(
- RF_RegionId_t regionID,
- RF_MCPair_t * prd_mcpair,
- caddr_t parityBuffer,
- RF_Raid_t * raidPtr,
- RF_DagHeader_t ** prd_dag_h,
- RF_AllocListElem_t ** prd_alloclist,
- RF_PhysDiskAddr_t ** prd_pda)
-{
- /* Initiate the read region parity from disk. Once initiated, return
- * to the calling routine.
- *
- * NON-BLOCKING */
-
- RF_AccTraceEntry_t *tracerec;
- RF_DagNode_t *prd_rdNode;
-
- /* create DAG to read region parity from disk */
- rf_MakeAllocList(*prd_alloclist);
- *prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc,
- rf_DiskReadUndoFunc, "Rrp",
- *prd_alloclist, RF_DAG_FLAGS_NONE,
- RF_IO_NORMAL_PRIORITY);
-
- /* create and initialize PDA for region parity */
- /* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
- * *)); */
- *prd_pda = rf_AllocPDAList(1);
- rf_MapRegionParity(raidPtr, regionID, &((*prd_pda)->row),
- &((*prd_pda)->col), &((*prd_pda)->startSector),
- &((*prd_pda)->numSector));
- if (rf_parityLogDebug)
- printf("[reading %d sectors of parity from region %d]\n",
- (int) (*prd_pda)->numSector, regionID);
- if ((*prd_pda)->next) {
- (*prd_pda)->next = NULL;
- printf("set prd_pda->next to NULL\n");
- }
- /* initialize DAG parameters */
- RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
- bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t));
- (*prd_dag_h)->tracerec = tracerec;
- prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0];
- prd_rdNode->params[0].p = *prd_pda;
- prd_rdNode->params[1].p = parityBuffer;
- prd_rdNode->params[2].v = 0;
- prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
- 0, 0, 0);
- if (rf_validateDAGDebug)
- rf_ValidateDAG(*prd_dag_h);
- /* launch region parity read dag */
- rf_DispatchDAG(*prd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
- (void *) prd_mcpair);
-}
-
-static void
-WriteRegionParity(
- RF_RegionId_t regionID,
- RF_MCPair_t * pwr_mcpair,
- caddr_t parityBuffer,
- RF_Raid_t * raidPtr,
- RF_DagHeader_t ** pwr_dag_h,
- RF_AllocListElem_t ** pwr_alloclist,
- RF_PhysDiskAddr_t ** pwr_pda)
-{
- /* Initiate the write of region parity to disk. Once initiated, return
- * to the calling routine.
- *
- * NON-BLOCKING */
-
- RF_AccTraceEntry_t *tracerec;
- RF_DagNode_t *pwr_wrNode;
-
- /* create DAG to write region log from disk */
- rf_MakeAllocList(*pwr_alloclist);
- *pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer,
- rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
- "Wrp", *pwr_alloclist,
- RF_DAG_FLAGS_NONE,
- RF_IO_NORMAL_PRIORITY);
-
- /* create and initialize PDA for region parity */
- /* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
- * *)); */
- *pwr_pda = rf_AllocPDAList(1);
- rf_MapRegionParity(raidPtr, regionID, &((*pwr_pda)->row),
- &((*pwr_pda)->col), &((*pwr_pda)->startSector),
- &((*pwr_pda)->numSector));
-
- /* initialize DAG parameters */
- RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
- bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t));
- (*pwr_dag_h)->tracerec = tracerec;
- pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0];
- pwr_wrNode->params[0].p = *pwr_pda;
-/* pwr_wrNode->params[1] = parityBuffer; */
- pwr_wrNode->params[2].v = 0;
- pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
- 0, 0, 0);
-
- /* launch the dag to write region parity to disk */
- rf_DispatchDAG(*pwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
- (void *) pwr_mcpair);
-}
-
-static void
-FlushLogsToDisk(
- RF_Raid_t * raidPtr,
- RF_ParityLog_t * logList)
-{
- /* Flush a linked list of core logs to the log disk. Logs contain the
- * disk location where they should be written. Logs were written in
- * FIFO order and that order must be preserved.
- *
- * Recommended optimizations: 1) allow multiple flushes to occur
- * simultaneously 2) coalesce contiguous flush operations
- *
- * BLOCKING */
-
- RF_ParityLog_t *log;
- RF_RegionId_t regionID;
- RF_MCPair_t *fwr_mcpair;
- RF_DagHeader_t *fwr_dag_h;
- RF_AllocListElem_t *fwr_alloclist;
- RF_PhysDiskAddr_t *fwr_pda;
-
- fwr_mcpair = rf_AllocMCPair();
- RF_LOCK_MUTEX(fwr_mcpair->mutex);
-
- RF_ASSERT(logList);
- log = logList;
- while (log) {
- regionID = log->regionID;
-
- /* create and launch a DAG to write the core log */
- if (rf_parityLogDebug)
- printf("[initiating write of core log for region %d]\n", regionID);
- fwr_mcpair->flag = RF_FALSE;
- WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h,
- &fwr_alloclist, &fwr_pda);
-
- /* wait for the DAG to complete */
- while (!fwr_mcpair->flag)
- RF_WAIT_COND(fwr_mcpair->cond, fwr_mcpair->mutex);
- if (fwr_dag_h->status != rf_enable) {
- RF_ERRORMSG1("Unable to write core log to disk (region %d)\n", regionID);
- RF_ASSERT(0);
- }
- /* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */
- rf_FreePhysDiskAddr(fwr_pda);
- rf_FreeDAG(fwr_dag_h);
- rf_FreeAllocList(fwr_alloclist);
-
- log = log->next;
- }
- RF_UNLOCK_MUTEX(fwr_mcpair->mutex);
- rf_FreeMCPair(fwr_mcpair);
- rf_ReleaseParityLogs(raidPtr, logList);
-}
-
-static void
-ReintegrateRegion(
- RF_Raid_t * raidPtr,
- RF_RegionId_t regionID,
- RF_ParityLog_t * coreLog)
-{
- RF_MCPair_t *rrd_mcpair = NULL, *prd_mcpair, *pwr_mcpair;
- RF_DagHeader_t *rrd_dag_h, *prd_dag_h, *pwr_dag_h;
- RF_AllocListElem_t *rrd_alloclist, *prd_alloclist, *pwr_alloclist;
- RF_PhysDiskAddr_t *rrd_pda, *prd_pda, *pwr_pda;
- caddr_t parityBuffer, regionBuffer = NULL;
-
- /* Reintegrate a region (regionID).
- *
- * 1. acquire region and parity buffers
- * 2. read log from disk
- * 3. read parity from disk
- * 4. apply log to parity
- * 5. apply core log to parity
- * 6. write new parity to disk
- *
- * BLOCKING */
-
- if (rf_parityLogDebug)
- printf("[reintegrating region %d]\n", regionID);
-
- /* initiate read of region parity */
- if (rf_parityLogDebug)
- printf("[initiating read of parity for region %d]\n",regionID);
- parityBuffer = AcquireReintBuffer(&raidPtr->parityBufferPool);
- prd_mcpair = rf_AllocMCPair();
- RF_LOCK_MUTEX(prd_mcpair->mutex);
- prd_mcpair->flag = RF_FALSE;
- ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr,
- &prd_dag_h, &prd_alloclist, &prd_pda);
-
- /* if region log nonempty, initiate read */
- if (raidPtr->regionInfo[regionID].diskCount > 0) {
- if (rf_parityLogDebug)
- printf("[initiating read of disk log for region %d]\n",
- regionID);
- regionBuffer = AcquireReintBuffer(&raidPtr->regionBufferPool);
- rrd_mcpair = rf_AllocMCPair();
- RF_LOCK_MUTEX(rrd_mcpair->mutex);
- rrd_mcpair->flag = RF_FALSE;
- ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr,
- &rrd_dag_h, &rrd_alloclist, &rrd_pda);
- }
- /* wait on read of region parity to complete */
- while (!prd_mcpair->flag) {
- RF_WAIT_COND(prd_mcpair->cond, prd_mcpair->mutex);
- }
- RF_UNLOCK_MUTEX(prd_mcpair->mutex);
- if (prd_dag_h->status != rf_enable) {
- RF_ERRORMSG("Unable to read parity from disk\n");
- /* add code to fail the parity disk */
- RF_ASSERT(0);
- }
- /* apply core log to parity */
- /* if (coreLog) ApplyLogsToParity(coreLog, parityBuffer); */
-
- if (raidPtr->regionInfo[regionID].diskCount > 0) {
- /* wait on read of region log to complete */
- while (!rrd_mcpair->flag)
- RF_WAIT_COND(rrd_mcpair->cond, rrd_mcpair->mutex);
- RF_UNLOCK_MUTEX(rrd_mcpair->mutex);
- if (rrd_dag_h->status != rf_enable) {
- RF_ERRORMSG("Unable to read region log from disk\n");
- /* add code to fail the log disk */
- RF_ASSERT(0);
- }
- /* apply region log to parity */
- /* ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */
- /* release resources associated with region log */
- /* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */
- rf_FreePhysDiskAddr(rrd_pda);
- rf_FreeDAG(rrd_dag_h);
- rf_FreeAllocList(rrd_alloclist);
- rf_FreeMCPair(rrd_mcpair);
- ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer);
- }
- /* write reintegrated parity to disk */
- if (rf_parityLogDebug)
- printf("[initiating write of parity for region %d]\n",
- regionID);
- pwr_mcpair = rf_AllocMCPair();
- RF_LOCK_MUTEX(pwr_mcpair->mutex);
- pwr_mcpair->flag = RF_FALSE;
- WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr,
- &pwr_dag_h, &pwr_alloclist, &pwr_pda);
- while (!pwr_mcpair->flag)
- RF_WAIT_COND(pwr_mcpair->cond, pwr_mcpair->mutex);
- RF_UNLOCK_MUTEX(pwr_mcpair->mutex);
- if (pwr_dag_h->status != rf_enable) {
- RF_ERRORMSG("Unable to write parity to disk\n");
- /* add code to fail the parity disk */
- RF_ASSERT(0);
- }
- /* release resources associated with read of old parity */
- /* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */
- rf_FreePhysDiskAddr(prd_pda);
- rf_FreeDAG(prd_dag_h);
- rf_FreeAllocList(prd_alloclist);
- rf_FreeMCPair(prd_mcpair);
-
- /* release resources associated with write of new parity */
- ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer);
- /* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */
- rf_FreePhysDiskAddr(pwr_pda);
- rf_FreeDAG(pwr_dag_h);
- rf_FreeAllocList(pwr_alloclist);
- rf_FreeMCPair(pwr_mcpair);
-
- if (rf_parityLogDebug)
- printf("[finished reintegrating region %d]\n", regionID);
-}
-
-
-
-static void
-ReintegrateLogs(
- RF_Raid_t * raidPtr,
- RF_ParityLog_t * logList)
-{
- RF_ParityLog_t *log, *freeLogList = NULL;
- RF_ParityLogData_t *logData, *logDataList;
- RF_RegionId_t regionID;
-
- RF_ASSERT(logList);
- while (logList) {
- log = logList;
- logList = logList->next;
- log->next = NULL;
- regionID = log->regionID;
- ReintegrateRegion(raidPtr, regionID, log);
- log->numRecords = 0;
-
- /* remove all items which are blocked on reintegration of this
- * region */
- RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID,
- &raidPtr->parityLogDiskQueue.reintBlockHead,
- &raidPtr->parityLogDiskQueue.reintBlockTail,
- RF_TRUE);
- logDataList = logData;
- while (logData) {
- logData->next = rf_SearchAndDequeueParityLogData(
- raidPtr, regionID,
- &raidPtr->parityLogDiskQueue.reintBlockHead,
- &raidPtr->parityLogDiskQueue.reintBlockTail,
- RF_TRUE);
- logData = logData->next;
- }
- RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
-
- /* process blocked log data and clear reintInProgress flag for
- * this region */
- if (logDataList)
- rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE);
- else {
- /* Enable flushing for this region. Holding both
- * locks provides a synchronization barrier with
- * DumpParityLogToDisk */
- RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
- RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
- RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- raidPtr->regionInfo[regionID].diskCount = 0;
- raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
- RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
- RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now
- * enabled */
- RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- }
- /* if log wasn't used, attach it to the list of logs to be
- * returned */
- if (log) {
- log->next = freeLogList;
- freeLogList = log;
- }
- }
- if (freeLogList)
- rf_ReleaseParityLogs(raidPtr, freeLogList);
-}
-
-int
-rf_ShutdownLogging(RF_Raid_t * raidPtr)
-{
- /* shutdown parity logging 1) disable parity logging in all regions 2)
- * reintegrate all regions */
-
- RF_SectorCount_t diskCount;
- RF_RegionId_t regionID;
- RF_ParityLog_t *log;
-
- if (rf_parityLogDebug)
- printf("[shutting down parity logging]\n");
- /* Since parity log maps are volatile, we must reintegrate all
- * regions. */
- if (rf_forceParityLogReint) {
- for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
- RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
- raidPtr->regionInfo[regionID].loggingEnabled =
- RF_FALSE;
- log = raidPtr->regionInfo[regionID].coreLog;
- raidPtr->regionInfo[regionID].coreLog = NULL;
- diskCount = raidPtr->regionInfo[regionID].diskCount;
- RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
- if (diskCount > 0 || log != NULL)
- ReintegrateRegion(raidPtr, regionID, log);
- if (log != NULL)
- rf_ReleaseParityLogs(raidPtr, log);
- }
- }
- if (rf_parityLogDebug) {
- printf("[parity logging disabled]\n");
- printf("[should be done!]\n");
- }
- return (0);
-}
-
-int
-rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr)
-{
- RF_ParityLog_t *reintQueue, *flushQueue;
- int workNeeded, done = RF_FALSE;
- int s;
-
- /* Main program for parity logging disk thread. This routine waits
- * for work to appear in either the flush or reintegration queues and
- * is responsible for flushing core logs to the log disk as well as
- * reintegrating parity regions.
- *
- * BLOCKING */
-
- s = splbio();
-
- RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
-
- /*
- * Inform our creator that we're running. Don't bother doing the
- * mutex lock/unlock dance- we locked above, and we'll unlock
- * below with nothing to do, yet.
- */
- raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING;
- RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
-
- /* empty the work queues */
- flushQueue = raidPtr->parityLogDiskQueue.flushQueue;
- raidPtr->parityLogDiskQueue.flushQueue = NULL;
- reintQueue = raidPtr->parityLogDiskQueue.reintQueue;
- raidPtr->parityLogDiskQueue.reintQueue = NULL;
- workNeeded = (flushQueue || reintQueue);
-
- while (!done) {
- while (workNeeded) {
- /* First, flush all logs in the flush queue, freeing
- * buffers Second, reintegrate all regions which are
- * reported as full. Third, append queued log data
- * until blocked.
- *
- * Note: Incoming appends (ParityLogAppend) can block on
- * either 1. empty buffer pool 2. region under
- * reintegration To preserve a global FIFO ordering of
- * appends, buffers are not released to the world
- * until those appends blocked on buffers are removed
- * from the append queue. Similarly, regions which
- * are reintegrated are not opened for general use
- * until the append queue has been emptied. */
-
- RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
-
- /* empty flushQueue, using free'd log buffers to
- * process bufTail */
- if (flushQueue)
- FlushLogsToDisk(raidPtr, flushQueue);
-
- /* empty reintQueue, flushing from reintTail as we go */
- if (reintQueue)
- ReintegrateLogs(raidPtr, reintQueue);
-
- RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- flushQueue = raidPtr->parityLogDiskQueue.flushQueue;
- raidPtr->parityLogDiskQueue.flushQueue = NULL;
- reintQueue = raidPtr->parityLogDiskQueue.reintQueue;
- raidPtr->parityLogDiskQueue.reintQueue = NULL;
- workNeeded = (flushQueue || reintQueue);
- }
- /* no work is needed at this point */
- if (raidPtr->parityLogDiskQueue.threadState & RF_PLOG_TERMINATE) {
- /* shutdown parity logging 1. disable parity logging
- * in all regions 2. reintegrate all regions */
- done = RF_TRUE; /* thread disabled, no work needed */
- RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- rf_ShutdownLogging(raidPtr);
- }
- if (!done) {
- /* thread enabled, no work needed, so sleep */
- if (rf_parityLogDebug)
- printf("[parity logging disk manager sleeping]\n");
- RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond,
- raidPtr->parityLogDiskQueue.mutex);
- if (rf_parityLogDebug)
- printf("[parity logging disk manager just woke up]\n");
- flushQueue = raidPtr->parityLogDiskQueue.flushQueue;
- raidPtr->parityLogDiskQueue.flushQueue = NULL;
- reintQueue = raidPtr->parityLogDiskQueue.reintQueue;
- raidPtr->parityLogDiskQueue.reintQueue = NULL;
- workNeeded = (flushQueue || reintQueue);
- }
- }
- /*
- * Announce that we're done.
- */
- RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN;
- RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
-
- splx(s);
-
- /*
- * In the NetBSD kernel, the thread must exit; returning would
- * cause the proc trampoline to attempt to return to userspace.
- */
- kthread_exit(0); /* does not return */
-}
-#endif /* RF_INCLUDE_PARITYLOGGING > 0 */
diff --git a/sys/dev/raidframe/rf_paritylogDiskMgr.h b/sys/dev/raidframe/rf_paritylogDiskMgr.h
deleted file mode 100644
index bdcc2a5..0000000
--- a/sys/dev/raidframe/rf_paritylogDiskMgr.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_paritylogDiskMgr.h,v 1.3 1999/02/05 00:06:14 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: William V. Courtright II
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/* header file for parity log disk mgr code
- *
- */
-
-#ifndef _RF__RF_PARITYLOGDISKMGR_H_
-#define _RF__RF_PARITYLOGDISKMGR_H_
-
-#include <dev/raidframe/rf_types.h>
-
-int rf_ShutdownLogging(RF_Raid_t * raidPtr);
-int rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr);
-
-#endif /* !_RF__RF_PARITYLOGDISKMGR_H_ */
diff --git a/sys/dev/raidframe/rf_paritylogging.c b/sys/dev/raidframe/rf_paritylogging.c
deleted file mode 100644
index 2f9cf5e..0000000
--- a/sys/dev/raidframe/rf_paritylogging.c
+++ /dev/null
@@ -1,1076 +0,0 @@
-/* $NetBSD: rf_paritylogging.c,v 1.10 2000/02/12 16:06:27 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: William V. Courtright II
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-
-/*
- parity logging configuration, dag selection, and mapping is implemented here
- */
-
-#include <dev/raidframe/rf_archs.h>
-
-#if RF_INCLUDE_PARITYLOGGING > 0
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_dagutils.h>
-#include <dev/raidframe/rf_dagfuncs.h>
-#include <dev/raidframe/rf_dagffrd.h>
-#include <dev/raidframe/rf_dagffwr.h>
-#include <dev/raidframe/rf_dagdegrd.h>
-#include <dev/raidframe/rf_dagdegwr.h>
-#include <dev/raidframe/rf_paritylog.h>
-#include <dev/raidframe/rf_paritylogDiskMgr.h>
-#include <dev/raidframe/rf_paritylogging.h>
-#include <dev/raidframe/rf_parityloggingdags.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_map.h>
-#include <dev/raidframe/rf_utils.h>
-#include <dev/raidframe/rf_shutdown.h>
-#include <dev/raidframe/rf_kintf.h>
-
-typedef struct RF_ParityLoggingConfigInfo_s {
- RF_RowCol_t **stripeIdentifier; /* filled in at config time & used by
- * IdentifyStripe */
-} RF_ParityLoggingConfigInfo_t;
-
-static void FreeRegionInfo(RF_Raid_t * raidPtr, RF_RegionId_t regionID);
-static void rf_ShutdownParityLogging(RF_ThreadArg_t arg);
-static void rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t arg);
-static void rf_ShutdownParityLoggingPool(RF_ThreadArg_t arg);
-static void rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t arg);
-static void rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t arg);
-static void rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t arg);
-
-int
-rf_ConfigureParityLogging(
- RF_ShutdownList_t ** listp,
- RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr)
-{
- int i, j, startdisk, rc;
- RF_SectorCount_t totalLogCapacity, fragmentation, lastRegionCapacity;
- RF_SectorCount_t parityBufferCapacity, maxRegionParityRange;
- RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
- RF_ParityLoggingConfigInfo_t *info;
- RF_ParityLog_t *l = NULL, *next;
- caddr_t lHeapPtr;
-
- if (rf_numParityRegions <= 0)
- return(EINVAL);
-
- /*
- * We create multiple entries on the shutdown list here, since
- * this configuration routine is fairly complicated in and of
- * itself, and this makes backing out of a failed configuration
- * much simpler.
- */
-
- raidPtr->numSectorsPerLog = RF_DEFAULT_NUM_SECTORS_PER_LOG;
-
- /* create a parity logging configuration structure */
- RF_MallocAndAdd(info, sizeof(RF_ParityLoggingConfigInfo_t),
- (RF_ParityLoggingConfigInfo_t *),
- raidPtr->cleanupList);
- if (info == NULL)
- return (ENOMEM);
- layoutPtr->layoutSpecificInfo = (void *) info;
-
- RF_ASSERT(raidPtr->numRow == 1);
-
- /* the stripe identifier must identify the disks in each stripe, IN
- * THE ORDER THAT THEY APPEAR IN THE STRIPE. */
- info->stripeIdentifier = rf_make_2d_array((raidPtr->numCol),
- (raidPtr->numCol),
- raidPtr->cleanupList);
- if (info->stripeIdentifier == NULL)
- return (ENOMEM);
-
- startdisk = 0;
- for (i = 0; i < (raidPtr->numCol); i++) {
- for (j = 0; j < (raidPtr->numCol); j++) {
- info->stripeIdentifier[i][j] = (startdisk + j) %
- (raidPtr->numCol - 1);
- }
- if ((--startdisk) < 0)
- startdisk = raidPtr->numCol - 1 - 1;
- }
-
- /* fill in the remaining layout parameters */
- layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk;
- layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit <<
- raidPtr->logBytesPerSector;
- layoutPtr->numParityCol = 1;
- layoutPtr->numParityLogCol = 1;
- layoutPtr->numDataCol = raidPtr->numCol - layoutPtr->numParityCol -
- layoutPtr->numParityLogCol;
- layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol *
- layoutPtr->sectorsPerStripeUnit;
- layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk;
- raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk *
- layoutPtr->sectorsPerStripeUnit;
-
- raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk *
- layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
-
- /* configure parity log parameters
- *
- * parameter comment/constraints
- * -------------------------------------------
- * numParityRegions* all regions (except possibly last)
- * of equal size
- * totalInCoreLogCapacity* amount of memory in bytes available
- * for in-core logs (default 1 MB)
- * numSectorsPerLog# capacity of an in-core log in sectors
- * (1 * disk track)
- * numParityLogs total number of in-core logs,
- * should be at least numParityRegions
- * regionLogCapacity size of a region log (except possibly
- * last one) in sectors
- * totalLogCapacity total amount of log space in sectors
- *
- * where '*' denotes a user settable parameter.
- * Note that logs are fixed to be the size of a disk track,
- * value #defined in rf_paritylog.h
- *
- */
-
- totalLogCapacity = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit * layoutPtr->numParityLogCol;
- raidPtr->regionLogCapacity = totalLogCapacity / rf_numParityRegions;
- if (rf_parityLogDebug)
- printf("bytes per sector %d\n", raidPtr->bytesPerSector);
-
- /* reduce fragmentation within a disk region by adjusting the number
- * of regions in an attempt to allow an integral number of logs to fit
- * into a disk region */
- fragmentation = raidPtr->regionLogCapacity % raidPtr->numSectorsPerLog;
- if (fragmentation > 0)
- for (i = 1; i < (raidPtr->numSectorsPerLog / 2); i++) {
- if (((totalLogCapacity / (rf_numParityRegions + i)) %
- raidPtr->numSectorsPerLog) < fragmentation) {
- rf_numParityRegions++;
- raidPtr->regionLogCapacity = totalLogCapacity /
- rf_numParityRegions;
- fragmentation = raidPtr->regionLogCapacity %
- raidPtr->numSectorsPerLog;
- }
- if (((totalLogCapacity / (rf_numParityRegions - i)) %
- raidPtr->numSectorsPerLog) < fragmentation) {
- rf_numParityRegions--;
- raidPtr->regionLogCapacity = totalLogCapacity /
- rf_numParityRegions;
- fragmentation = raidPtr->regionLogCapacity %
- raidPtr->numSectorsPerLog;
- }
- }
- /* ensure integral number of regions per log */
- raidPtr->regionLogCapacity = (raidPtr->regionLogCapacity /
- raidPtr->numSectorsPerLog) *
- raidPtr->numSectorsPerLog;
-
- raidPtr->numParityLogs = rf_totalInCoreLogCapacity /
- (raidPtr->bytesPerSector * raidPtr->numSectorsPerLog);
- /* to avoid deadlock, must ensure that enough logs exist for each
- * region to have one simultaneously */
- if (raidPtr->numParityLogs < rf_numParityRegions)
- raidPtr->numParityLogs = rf_numParityRegions;
-
- /* create region information structs */
- printf("Allocating %d bytes for in-core parity region info\n",
- (int) (rf_numParityRegions * sizeof(RF_RegionInfo_t)));
- RF_Malloc(raidPtr->regionInfo,
- (rf_numParityRegions * sizeof(RF_RegionInfo_t)),
- (RF_RegionInfo_t *));
- if (raidPtr->regionInfo == NULL)
- return (ENOMEM);
-
- /* last region may not be full capacity */
- lastRegionCapacity = raidPtr->regionLogCapacity;
- while ((rf_numParityRegions - 1) * raidPtr->regionLogCapacity +
- lastRegionCapacity > totalLogCapacity)
- lastRegionCapacity = lastRegionCapacity -
- raidPtr->numSectorsPerLog;
-
- raidPtr->regionParityRange = raidPtr->sectorsPerDisk /
- rf_numParityRegions;
- maxRegionParityRange = raidPtr->regionParityRange;
-
-/* i can't remember why this line is in the code -wvcii 6/30/95 */
-/* if (raidPtr->sectorsPerDisk % rf_numParityRegions > 0)
- regionParityRange++; */
-
- /* build pool of unused parity logs */
- printf("Allocating %d bytes for %d parity logs\n",
- raidPtr->numParityLogs * raidPtr->numSectorsPerLog *
- raidPtr->bytesPerSector,
- raidPtr->numParityLogs);
- RF_Malloc(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs *
- raidPtr->numSectorsPerLog * raidPtr->bytesPerSector,
- (caddr_t));
- if (raidPtr->parityLogBufferHeap == NULL)
- return (ENOMEM);
- lHeapPtr = raidPtr->parityLogBufferHeap;
- rc = rf_mutex_init(&raidPtr->parityLogPool.mutex, "RF_PARITYLOGGING1");
- if (rc) {
- RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n",
- __FILE__, __LINE__, rc);
- RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs *
- raidPtr->numSectorsPerLog * raidPtr->bytesPerSector);
- return (ENOMEM);
- }
- for (i = 0; i < raidPtr->numParityLogs; i++) {
- if (i == 0) {
- RF_Calloc(raidPtr->parityLogPool.parityLogs, 1,
- sizeof(RF_ParityLog_t), (RF_ParityLog_t *));
- if (raidPtr->parityLogPool.parityLogs == NULL) {
- RF_Free(raidPtr->parityLogBufferHeap,
- raidPtr->numParityLogs *
- raidPtr->numSectorsPerLog *
- raidPtr->bytesPerSector);
- return (ENOMEM);
- }
- l = raidPtr->parityLogPool.parityLogs;
- } else {
- RF_Calloc(l->next, 1, sizeof(RF_ParityLog_t),
- (RF_ParityLog_t *));
- if (l->next == NULL) {
- RF_Free(raidPtr->parityLogBufferHeap,
- raidPtr->numParityLogs *
- raidPtr->numSectorsPerLog *
- raidPtr->bytesPerSector);
- for (l = raidPtr->parityLogPool.parityLogs;
- l;
- l = next) {
- next = l->next;
- if (l->records)
- RF_Free(l->records, (raidPtr->numSectorsPerLog * sizeof(RF_ParityLogRecord_t)));
- RF_Free(l, sizeof(RF_ParityLog_t));
- }
- return (ENOMEM);
- }
- l = l->next;
- }
- l->bufPtr = lHeapPtr;
- lHeapPtr += raidPtr->numSectorsPerLog *
- raidPtr->bytesPerSector;
- RF_Malloc(l->records, (raidPtr->numSectorsPerLog *
- sizeof(RF_ParityLogRecord_t)),
- (RF_ParityLogRecord_t *));
- if (l->records == NULL) {
- RF_Free(raidPtr->parityLogBufferHeap,
- raidPtr->numParityLogs *
- raidPtr->numSectorsPerLog *
- raidPtr->bytesPerSector);
- for (l = raidPtr->parityLogPool.parityLogs;
- l;
- l = next) {
- next = l->next;
- if (l->records)
- RF_Free(l->records,
- (raidPtr->numSectorsPerLog *
- sizeof(RF_ParityLogRecord_t)));
- RF_Free(l, sizeof(RF_ParityLog_t));
- }
- return (ENOMEM);
- }
- }
- rc = rf_ShutdownCreate(listp, rf_ShutdownParityLoggingPool, raidPtr);
- if (rc) {
- RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- rf_ShutdownParityLoggingPool(raidPtr);
- return (rc);
- }
- /* build pool of region buffers */
- rc = rf_mutex_init(&raidPtr->regionBufferPool.mutex, "RF_PARITYLOGGING3");
- if (rc) {
- RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n",
- __FILE__, __LINE__, rc);
- return (ENOMEM);
- }
- rc = rf_cond_init(&raidPtr->regionBufferPool.cond);
- if (rc) {
- RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n",
- __FILE__, __LINE__, rc);
- rf_mutex_destroy(&raidPtr->regionBufferPool.mutex);
- return (ENOMEM);
- }
- raidPtr->regionBufferPool.bufferSize = raidPtr->regionLogCapacity *
- raidPtr->bytesPerSector;
- printf("regionBufferPool.bufferSize %d\n",
- raidPtr->regionBufferPool.bufferSize);
-
- /* for now, only one region at a time may be reintegrated */
- raidPtr->regionBufferPool.totalBuffers = 1;
-
- raidPtr->regionBufferPool.availableBuffers =
- raidPtr->regionBufferPool.totalBuffers;
- raidPtr->regionBufferPool.availBuffersIndex = 0;
- raidPtr->regionBufferPool.emptyBuffersIndex = 0;
- printf("Allocating %d bytes for regionBufferPool\n",
- (int) (raidPtr->regionBufferPool.totalBuffers *
- sizeof(caddr_t)));
- RF_Malloc(raidPtr->regionBufferPool.buffers,
- raidPtr->regionBufferPool.totalBuffers * sizeof(caddr_t),
- (caddr_t *));
- if (raidPtr->regionBufferPool.buffers == NULL) {
- rf_mutex_destroy(&raidPtr->regionBufferPool.mutex);
- rf_cond_destroy(&raidPtr->regionBufferPool.cond);
- return (ENOMEM);
- }
- for (i = 0; i < raidPtr->regionBufferPool.totalBuffers; i++) {
- printf("Allocating %d bytes for regionBufferPool#%d\n",
- (int) (raidPtr->regionBufferPool.bufferSize *
- sizeof(char)), i);
- RF_Malloc(raidPtr->regionBufferPool.buffers[i],
- raidPtr->regionBufferPool.bufferSize * sizeof(char),
- (caddr_t));
- if (raidPtr->regionBufferPool.buffers[i] == NULL) {
- rf_mutex_destroy(&raidPtr->regionBufferPool.mutex);
- rf_cond_destroy(&raidPtr->regionBufferPool.cond);
- for (j = 0; j < i; j++) {
- RF_Free(raidPtr->regionBufferPool.buffers[i],
- raidPtr->regionBufferPool.bufferSize *
- sizeof(char));
- }
- RF_Free(raidPtr->regionBufferPool.buffers,
- raidPtr->regionBufferPool.totalBuffers *
- sizeof(caddr_t));
- return (ENOMEM);
- }
- printf("raidPtr->regionBufferPool.buffers[%d] = %lx\n", i,
- (long) raidPtr->regionBufferPool.buffers[i]);
- }
- rc = rf_ShutdownCreate(listp,
- rf_ShutdownParityLoggingRegionBufferPool,
- raidPtr);
- if (rc) {
- RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- rf_ShutdownParityLoggingRegionBufferPool(raidPtr);
- return (rc);
- }
- /* build pool of parity buffers */
- parityBufferCapacity = maxRegionParityRange;
- rc = rf_mutex_init(&raidPtr->parityBufferPool.mutex, "RF_PARITYLOGGING3");
- if (rc) {
- RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n",
- __FILE__, __LINE__, rc);
- return (rc);
- }
- rc = rf_cond_init(&raidPtr->parityBufferPool.cond);
- if (rc) {
- RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n",
- __FILE__, __LINE__, rc);
- rf_mutex_destroy(&raidPtr->parityBufferPool.mutex);
- return (ENOMEM);
- }
- raidPtr->parityBufferPool.bufferSize = parityBufferCapacity *
- raidPtr->bytesPerSector;
- printf("parityBufferPool.bufferSize %d\n",
- raidPtr->parityBufferPool.bufferSize);
-
- /* for now, only one region at a time may be reintegrated */
- raidPtr->parityBufferPool.totalBuffers = 1;
-
- raidPtr->parityBufferPool.availableBuffers =
- raidPtr->parityBufferPool.totalBuffers;
- raidPtr->parityBufferPool.availBuffersIndex = 0;
- raidPtr->parityBufferPool.emptyBuffersIndex = 0;
- printf("Allocating %d bytes for parityBufferPool of %d units\n",
- (int) (raidPtr->parityBufferPool.totalBuffers *
- sizeof(caddr_t)),
- raidPtr->parityBufferPool.totalBuffers );
- RF_Malloc(raidPtr->parityBufferPool.buffers,
- raidPtr->parityBufferPool.totalBuffers * sizeof(caddr_t),
- (caddr_t *));
- if (raidPtr->parityBufferPool.buffers == NULL) {
- rf_mutex_destroy(&raidPtr->parityBufferPool.mutex);
- rf_cond_destroy(&raidPtr->parityBufferPool.cond);
- return (ENOMEM);
- }
- for (i = 0; i < raidPtr->parityBufferPool.totalBuffers; i++) {
- printf("Allocating %d bytes for parityBufferPool#%d\n",
- (int) (raidPtr->parityBufferPool.bufferSize *
- sizeof(char)),i);
- RF_Malloc(raidPtr->parityBufferPool.buffers[i],
- raidPtr->parityBufferPool.bufferSize * sizeof(char),
- (caddr_t));
- if (raidPtr->parityBufferPool.buffers == NULL) {
- rf_mutex_destroy(&raidPtr->parityBufferPool.mutex);
- rf_cond_destroy(&raidPtr->parityBufferPool.cond);
- for (j = 0; j < i; j++) {
- RF_Free(raidPtr->parityBufferPool.buffers[i],
- raidPtr->regionBufferPool.bufferSize *
- sizeof(char));
- }
- RF_Free(raidPtr->parityBufferPool.buffers,
- raidPtr->regionBufferPool.totalBuffers *
- sizeof(caddr_t));
- return (ENOMEM);
- }
- printf("parityBufferPool.buffers[%d] = %lx\n", i,
- (long) raidPtr->parityBufferPool.buffers[i]);
- }
- rc = rf_ShutdownCreate(listp,
- rf_ShutdownParityLoggingParityBufferPool,
- raidPtr);
- if (rc) {
- RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- rf_ShutdownParityLoggingParityBufferPool(raidPtr);
- return (rc);
- }
- /* initialize parityLogDiskQueue */
- rc = rf_create_managed_mutex(listp,
- &raidPtr->parityLogDiskQueue.mutex);
- if (rc) {
- RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n",
- __FILE__, __LINE__, rc);
- return (rc);
- }
- rc = rf_create_managed_cond(listp, &raidPtr->parityLogDiskQueue.cond);
- if (rc) {
- RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n",
- __FILE__, __LINE__, rc);
- return (rc);
- }
- raidPtr->parityLogDiskQueue.flushQueue = NULL;
- raidPtr->parityLogDiskQueue.reintQueue = NULL;
- raidPtr->parityLogDiskQueue.bufHead = NULL;
- raidPtr->parityLogDiskQueue.bufTail = NULL;
- raidPtr->parityLogDiskQueue.reintHead = NULL;
- raidPtr->parityLogDiskQueue.reintTail = NULL;
- raidPtr->parityLogDiskQueue.logBlockHead = NULL;
- raidPtr->parityLogDiskQueue.logBlockTail = NULL;
- raidPtr->parityLogDiskQueue.reintBlockHead = NULL;
- raidPtr->parityLogDiskQueue.reintBlockTail = NULL;
- raidPtr->parityLogDiskQueue.freeDataList = NULL;
- raidPtr->parityLogDiskQueue.freeCommonList = NULL;
-
- rc = rf_ShutdownCreate(listp,
- rf_ShutdownParityLoggingDiskQueue,
- raidPtr);
- if (rc) {
- RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- return (rc);
- }
- for (i = 0; i < rf_numParityRegions; i++) {
- rc = rf_mutex_init(&raidPtr->regionInfo[i].mutex, "RF_PARITYLOGGING3");
- if (rc) {
- RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- for (j = 0; j < i; j++)
- FreeRegionInfo(raidPtr, j);
- RF_Free(raidPtr->regionInfo,
- (rf_numParityRegions *
- sizeof(RF_RegionInfo_t)));
- return (ENOMEM);
- }
- rc = rf_mutex_init(&raidPtr->regionInfo[i].reintMutex, "RF_PARITYLOGGING4");
- if (rc) {
- RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- rf_mutex_destroy(&raidPtr->regionInfo[i].mutex);
- for (j = 0; j < i; j++)
- FreeRegionInfo(raidPtr, j);
- RF_Free(raidPtr->regionInfo,
- (rf_numParityRegions *
- sizeof(RF_RegionInfo_t)));
- return (ENOMEM);
- }
- raidPtr->regionInfo[i].reintInProgress = RF_FALSE;
- raidPtr->regionInfo[i].regionStartAddr =
- raidPtr->regionLogCapacity * i;
- raidPtr->regionInfo[i].parityStartAddr =
- raidPtr->regionParityRange * i;
- if (i < rf_numParityRegions - 1) {
- raidPtr->regionInfo[i].capacity =
- raidPtr->regionLogCapacity;
- raidPtr->regionInfo[i].numSectorsParity =
- raidPtr->regionParityRange;
- } else {
- raidPtr->regionInfo[i].capacity =
- lastRegionCapacity;
- raidPtr->regionInfo[i].numSectorsParity =
- raidPtr->sectorsPerDisk -
- raidPtr->regionParityRange * i;
- if (raidPtr->regionInfo[i].numSectorsParity >
- maxRegionParityRange)
- maxRegionParityRange =
- raidPtr->regionInfo[i].numSectorsParity;
- }
- raidPtr->regionInfo[i].diskCount = 0;
- RF_ASSERT(raidPtr->regionInfo[i].capacity +
- raidPtr->regionInfo[i].regionStartAddr <=
- totalLogCapacity);
- RF_ASSERT(raidPtr->regionInfo[i].parityStartAddr +
- raidPtr->regionInfo[i].numSectorsParity <=
- raidPtr->sectorsPerDisk);
- printf("Allocating %d bytes for region %d\n",
- (int) (raidPtr->regionInfo[i].capacity *
- sizeof(RF_DiskMap_t)), i);
- RF_Malloc(raidPtr->regionInfo[i].diskMap,
- (raidPtr->regionInfo[i].capacity *
- sizeof(RF_DiskMap_t)),
- (RF_DiskMap_t *));
- if (raidPtr->regionInfo[i].diskMap == NULL) {
- rf_mutex_destroy(&raidPtr->regionInfo[i].mutex);
- rf_mutex_destroy(&raidPtr->regionInfo[i].reintMutex);
- for (j = 0; j < i; j++)
- FreeRegionInfo(raidPtr, j);
- RF_Free(raidPtr->regionInfo,
- (rf_numParityRegions *
- sizeof(RF_RegionInfo_t)));
- return (ENOMEM);
- }
- raidPtr->regionInfo[i].loggingEnabled = RF_FALSE;
- raidPtr->regionInfo[i].coreLog = NULL;
- }
- rc = rf_ShutdownCreate(listp,
- rf_ShutdownParityLoggingRegionInfo,
- raidPtr);
- if (rc) {
- RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- rf_ShutdownParityLoggingRegionInfo(raidPtr);
- return (rc);
- }
- RF_ASSERT(raidPtr->parityLogDiskQueue.threadState == 0);
- raidPtr->parityLogDiskQueue.threadState = RF_PLOG_CREATED;
- rc = RF_CREATE_THREAD(raidPtr->pLogDiskThreadHandle,
- rf_ParityLoggingDiskManager, raidPtr,"rf_log");
- if (rc) {
- raidPtr->parityLogDiskQueue.threadState = 0;
- RF_ERRORMSG3("Unable to create parity logging disk thread file %s line %d rc=%d\n",
- __FILE__, __LINE__, rc);
- return (ENOMEM);
- }
- /* wait for thread to start */
- RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- while (!(raidPtr->parityLogDiskQueue.threadState & RF_PLOG_RUNNING)) {
- RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond,
- raidPtr->parityLogDiskQueue.mutex);
- }
- RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
-
- rc = rf_ShutdownCreate(listp, rf_ShutdownParityLogging, raidPtr);
- if (rc) {
- RF_ERRORMSG1("Got rc=%d adding parity logging shutdown event\n", rc);
- rf_ShutdownParityLogging(raidPtr);
- return (rc);
- }
- if (rf_parityLogDebug) {
- printf(" size of disk log in sectors: %d\n",
- (int) totalLogCapacity);
- printf(" total number of parity regions is %d\n", (int) rf_numParityRegions);
- printf(" nominal sectors of log per parity region is %d\n", (int) raidPtr->regionLogCapacity);
- printf(" nominal region fragmentation is %d sectors\n", (int) fragmentation);
- printf(" total number of parity logs is %d\n", raidPtr->numParityLogs);
- printf(" parity log size is %d sectors\n", raidPtr->numSectorsPerLog);
- printf(" total in-core log space is %d bytes\n", (int) rf_totalInCoreLogCapacity);
- }
- rf_EnableParityLogging(raidPtr);
-
- return (0);
-}
-
-static void
-FreeRegionInfo(
- RF_Raid_t * raidPtr,
- RF_RegionId_t regionID)
-{
- RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
- RF_Free(raidPtr->regionInfo[regionID].diskMap,
- (raidPtr->regionInfo[regionID].capacity *
- sizeof(RF_DiskMap_t)));
- if (!rf_forceParityLogReint && raidPtr->regionInfo[regionID].coreLog) {
- rf_ReleaseParityLogs(raidPtr,
- raidPtr->regionInfo[regionID].coreLog);
- raidPtr->regionInfo[regionID].coreLog = NULL;
- } else {
- RF_ASSERT(raidPtr->regionInfo[regionID].coreLog == NULL);
- RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == 0);
- }
- RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
- rf_mutex_destroy(&raidPtr->regionInfo[regionID].mutex);
- rf_mutex_destroy(&raidPtr->regionInfo[regionID].reintMutex);
-}
-
-
-static void
-FreeParityLogQueue(
- RF_Raid_t * raidPtr,
- RF_ParityLogQueue_t * queue)
-{
- RF_ParityLog_t *l1, *l2;
-
- RF_LOCK_MUTEX(queue->mutex);
- l1 = queue->parityLogs;
- while (l1) {
- l2 = l1;
- l1 = l2->next;
- RF_Free(l2->records, (raidPtr->numSectorsPerLog *
- sizeof(RF_ParityLogRecord_t)));
- RF_Free(l2, sizeof(RF_ParityLog_t));
- }
- RF_UNLOCK_MUTEX(queue->mutex);
- rf_mutex_destroy(&queue->mutex);
-}
-
-
-static void
-FreeRegionBufferQueue(RF_RegionBufferQueue_t * queue)
-{
- int i;
-
- RF_LOCK_MUTEX(queue->mutex);
- if (queue->availableBuffers != queue->totalBuffers) {
- printf("Attempt to free region queue which is still in use!\n");
- RF_ASSERT(0);
- }
- for (i = 0; i < queue->totalBuffers; i++)
- RF_Free(queue->buffers[i], queue->bufferSize);
- RF_Free(queue->buffers, queue->totalBuffers * sizeof(caddr_t));
- RF_UNLOCK_MUTEX(queue->mutex);
- rf_mutex_destroy(&queue->mutex);
-}
-
-static void
-rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t arg)
-{
- RF_Raid_t *raidPtr;
- RF_RegionId_t i;
-
- raidPtr = (RF_Raid_t *) arg;
- if (rf_parityLogDebug) {
- printf("raid%d: ShutdownParityLoggingRegionInfo\n",
- raidPtr->raidid);
- }
- /* free region information structs */
- for (i = 0; i < rf_numParityRegions; i++)
- FreeRegionInfo(raidPtr, i);
- RF_Free(raidPtr->regionInfo, (rf_numParityRegions *
- sizeof(raidPtr->regionInfo)));
- raidPtr->regionInfo = NULL;
-}
-
-static void
-rf_ShutdownParityLoggingPool(RF_ThreadArg_t arg)
-{
- RF_Raid_t *raidPtr;
-
- raidPtr = (RF_Raid_t *) arg;
- if (rf_parityLogDebug) {
- printf("raid%d: ShutdownParityLoggingPool\n", raidPtr->raidid);
- }
- /* free contents of parityLogPool */
- FreeParityLogQueue(raidPtr, &raidPtr->parityLogPool);
- RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs *
- raidPtr->numSectorsPerLog * raidPtr->bytesPerSector);
-}
-
-static void
-rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t arg)
-{
- RF_Raid_t *raidPtr;
-
- raidPtr = (RF_Raid_t *) arg;
- if (rf_parityLogDebug) {
- printf("raid%d: ShutdownParityLoggingRegionBufferPool\n",
- raidPtr->raidid);
- }
- FreeRegionBufferQueue(&raidPtr->regionBufferPool);
-}
-
-static void
-rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t arg)
-{
- RF_Raid_t *raidPtr;
-
- raidPtr = (RF_Raid_t *) arg;
- if (rf_parityLogDebug) {
- printf("raid%d: ShutdownParityLoggingParityBufferPool\n",
- raidPtr->raidid);
- }
- FreeRegionBufferQueue(&raidPtr->parityBufferPool);
-}
-
-static void
-rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t arg)
-{
- RF_ParityLogData_t *d;
- RF_CommonLogData_t *c;
- RF_Raid_t *raidPtr;
-
- raidPtr = (RF_Raid_t *) arg;
- if (rf_parityLogDebug) {
- printf("raid%d: ShutdownParityLoggingDiskQueue\n",
- raidPtr->raidid);
- }
- /* free disk manager stuff */
- RF_ASSERT(raidPtr->parityLogDiskQueue.bufHead == NULL);
- RF_ASSERT(raidPtr->parityLogDiskQueue.bufTail == NULL);
- RF_ASSERT(raidPtr->parityLogDiskQueue.reintHead == NULL);
- RF_ASSERT(raidPtr->parityLogDiskQueue.reintTail == NULL);
- while (raidPtr->parityLogDiskQueue.freeDataList) {
- d = raidPtr->parityLogDiskQueue.freeDataList;
- raidPtr->parityLogDiskQueue.freeDataList =
- raidPtr->parityLogDiskQueue.freeDataList->next;
- RF_Free(d, sizeof(RF_ParityLogData_t));
- }
- while (raidPtr->parityLogDiskQueue.freeCommonList) {
- c = raidPtr->parityLogDiskQueue.freeCommonList;
- rf_mutex_destroy(&c->mutex);
- raidPtr->parityLogDiskQueue.freeCommonList =
- raidPtr->parityLogDiskQueue.freeCommonList->next;
- RF_Free(c, sizeof(RF_CommonLogData_t));
- }
-}
-
-static void
-rf_ShutdownParityLogging(RF_ThreadArg_t arg)
-{
- RF_Raid_t *raidPtr;
-
- raidPtr = (RF_Raid_t *) arg;
- if (rf_parityLogDebug) {
- printf("raid%d: ShutdownParityLogging\n", raidPtr->raidid);
- }
- /* shutdown disk thread */
- /* This has the desirable side-effect of forcing all regions to be
- * reintegrated. This is necessary since all parity log maps are
- * currently held in volatile memory. */
-
- RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_TERMINATE;
- RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
- /*
- * pLogDiskThread will now terminate when queues are cleared
- * now wait for it to be done
- */
- RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- while (!(raidPtr->parityLogDiskQueue.threadState & RF_PLOG_SHUTDOWN)) {
- RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond,
- raidPtr->parityLogDiskQueue.mutex);
- }
- RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
- if (rf_parityLogDebug) {
- printf("raid%d: ShutdownParityLogging done (thread completed)\n", raidPtr->raidid);
- }
-}
-
-int
-rf_GetDefaultNumFloatingReconBuffersParityLogging(RF_Raid_t * raidPtr)
-{
- return (20);
-}
-
-RF_HeadSepLimit_t
-rf_GetDefaultHeadSepLimitParityLogging(RF_Raid_t * raidPtr)
-{
- return (10);
-}
-/* return the region ID for a given RAID address */
-RF_RegionId_t
-rf_MapRegionIDParityLogging(
- RF_Raid_t * raidPtr,
- RF_SectorNum_t address)
-{
- RF_RegionId_t regionID;
-
-/* regionID = address / (raidPtr->regionParityRange * raidPtr->Layout.numDataCol); */
- regionID = address / raidPtr->regionParityRange;
- if (regionID == rf_numParityRegions) {
- /* last region may be larger than other regions */
- regionID--;
- }
- RF_ASSERT(address >= raidPtr->regionInfo[regionID].parityStartAddr);
- RF_ASSERT(address < raidPtr->regionInfo[regionID].parityStartAddr +
- raidPtr->regionInfo[regionID].numSectorsParity);
- RF_ASSERT(regionID < rf_numParityRegions);
- return (regionID);
-}
-
-
-/* given a logical RAID sector, determine physical disk address of data */
-void
-rf_MapSectorParityLogging(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t raidSector,
- RF_RowCol_t * row,
- RF_RowCol_t * col,
- RF_SectorNum_t * diskSector,
- int remap)
-{
- RF_StripeNum_t SUID = raidSector /
- raidPtr->Layout.sectorsPerStripeUnit;
- *row = 0;
- /* *col = (SUID % (raidPtr->numCol -
- * raidPtr->Layout.numParityLogCol)); */
- *col = SUID % raidPtr->Layout.numDataCol;
- *diskSector = (SUID / (raidPtr->Layout.numDataCol)) *
- raidPtr->Layout.sectorsPerStripeUnit +
- (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
-}
-
-
-/* given a logical RAID sector, determine physical disk address of parity */
-void
-rf_MapParityParityLogging(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t raidSector,
- RF_RowCol_t * row,
- RF_RowCol_t * col,
- RF_SectorNum_t * diskSector,
- int remap)
-{
- RF_StripeNum_t SUID = raidSector /
- raidPtr->Layout.sectorsPerStripeUnit;
-
- *row = 0;
- /* *col =
- * raidPtr->Layout.numDataCol-(SUID/raidPtr->Layout.numDataCol)%(raidPt
- * r->numCol - raidPtr->Layout.numParityLogCol); */
- *col = raidPtr->Layout.numDataCol;
- *diskSector = (SUID / (raidPtr->Layout.numDataCol)) *
- raidPtr->Layout.sectorsPerStripeUnit +
- (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
-}
-
-
-/* given a regionID and sector offset, determine the physical disk address of the parity log */
-void
-rf_MapLogParityLogging(
- RF_Raid_t * raidPtr,
- RF_RegionId_t regionID,
- RF_SectorNum_t regionOffset,
- RF_RowCol_t * row,
- RF_RowCol_t * col,
- RF_SectorNum_t * startSector)
-{
- *row = 0;
- *col = raidPtr->numCol - 1;
- *startSector = raidPtr->regionInfo[regionID].regionStartAddr + regionOffset;
-}
-
-
-/* given a regionID, determine the physical disk address of the logged
- parity for that region */
-void
-rf_MapRegionParity(
- RF_Raid_t * raidPtr,
- RF_RegionId_t regionID,
- RF_RowCol_t * row,
- RF_RowCol_t * col,
- RF_SectorNum_t * startSector,
- RF_SectorCount_t * numSector)
-{
- *row = 0;
- *col = raidPtr->numCol - 2;
- *startSector = raidPtr->regionInfo[regionID].parityStartAddr;
- *numSector = raidPtr->regionInfo[regionID].numSectorsParity;
-}
-
-
-/* given a logical RAID address, determine the participating disks in
- the stripe */
-void
-rf_IdentifyStripeParityLogging(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t addr,
- RF_RowCol_t ** diskids,
- RF_RowCol_t * outRow)
-{
- RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout,
- addr);
- RF_ParityLoggingConfigInfo_t *info = (RF_ParityLoggingConfigInfo_t *)
- raidPtr->Layout.layoutSpecificInfo;
- *outRow = 0;
- *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol];
-}
-
-
-void
-rf_MapSIDToPSIDParityLogging(
- RF_RaidLayout_t * layoutPtr,
- RF_StripeNum_t stripeID,
- RF_StripeNum_t * psID,
- RF_ReconUnitNum_t * which_ru)
-{
- *which_ru = 0;
- *psID = stripeID;
-}
-
-
-/* select an algorithm for performing an access. Returns two pointers,
- * one to a function that will return information about the DAG, and
- * another to a function that will create the dag.
- */
-void
-rf_ParityLoggingDagSelect(
- RF_Raid_t * raidPtr,
- RF_IoType_t type,
- RF_AccessStripeMap_t * asmp,
- RF_VoidFuncPtr * createFunc)
-{
- RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
- RF_PhysDiskAddr_t *failedPDA = NULL;
- RF_RowCol_t frow, fcol;
- RF_RowStatus_t rstat;
- int prior_recon;
-
- RF_ASSERT(RF_IO_IS_R_OR_W(type));
-
- if (asmp->numDataFailed + asmp->numParityFailed > 1) {
- RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n");
- /* *infoFunc = */ *createFunc = NULL;
- return;
- } else
- if (asmp->numDataFailed + asmp->numParityFailed == 1) {
-
- /* if under recon & already reconstructed, redirect
- * the access to the spare drive and eliminate the
- * failure indication */
- failedPDA = asmp->failedPDAs[0];
- frow = failedPDA->row;
- fcol = failedPDA->col;
- rstat = raidPtr->status[failedPDA->row];
- prior_recon = (rstat == rf_rs_reconfigured) || (
- (rstat == rf_rs_reconstructing) ?
- rf_CheckRUReconstructed(raidPtr->reconControl[frow]->reconMap, failedPDA->startSector) : 0
- );
- if (prior_recon) {
- RF_RowCol_t or = failedPDA->row, oc = failedPDA->col;
- RF_SectorNum_t oo = failedPDA->startSector;
- if (layoutPtr->map->flags &
- RF_DISTRIBUTE_SPARE) {
- /* redirect to dist spare space */
-
- if (failedPDA == asmp->parityInfo) {
-
- /* parity has failed */
- (layoutPtr->map->MapParity) (raidPtr, failedPDA->raidAddress, &failedPDA->row,
- &failedPDA->col, &failedPDA->startSector, RF_REMAP);
-
- if (asmp->parityInfo->next) { /* redir 2nd component,
- * if any */
- RF_PhysDiskAddr_t *p = asmp->parityInfo->next;
- RF_SectorNum_t SUoffs = p->startSector % layoutPtr->sectorsPerStripeUnit;
- p->row = failedPDA->row;
- p->col = failedPDA->col;
- p->startSector = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, failedPDA->startSector) +
- SUoffs; /* cheating:
- * startSector is not
- * really a RAID address */
- }
- } else
- if (asmp->parityInfo->next && failedPDA == asmp->parityInfo->next) {
- RF_ASSERT(0); /* should not ever
- * happen */
- } else {
-
- /* data has failed */
- (layoutPtr->map->MapSector) (raidPtr, failedPDA->raidAddress, &failedPDA->row,
- &failedPDA->col, &failedPDA->startSector, RF_REMAP);
-
- }
-
- } else {
- /* redirect to dedicated spare space */
-
- failedPDA->row = raidPtr->Disks[frow][fcol].spareRow;
- failedPDA->col = raidPtr->Disks[frow][fcol].spareCol;
-
- /* the parity may have two distinct
- * components, both of which may need
- * to be redirected */
- if (asmp->parityInfo->next) {
- if (failedPDA == asmp->parityInfo) {
- failedPDA->next->row = failedPDA->row;
- failedPDA->next->col = failedPDA->col;
- } else
- if (failedPDA == asmp->parityInfo->next) { /* paranoid: should never occur */
- asmp->parityInfo->row = failedPDA->row;
- asmp->parityInfo->col = failedPDA->col;
- }
- }
- }
-
- RF_ASSERT(failedPDA->col != -1);
-
- if (rf_dagDebug || rf_mapDebug) {
- printf("raid%d: Redirected type '%c' r %d c %d o %ld -> r %d c %d o %ld\n",
- raidPtr->raidid, type, or, oc, (long) oo, failedPDA->row, failedPDA->col, (long) failedPDA->startSector);
- }
- asmp->numDataFailed = asmp->numParityFailed = 0;
- }
- }
- if (type == RF_IO_TYPE_READ) {
-
- if (asmp->numDataFailed == 0)
- *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG;
- else
- *createFunc = (RF_VoidFuncPtr) rf_CreateRaidFiveDegradedReadDAG;
-
- } else {
-
-
- /* if mirroring, always use large writes. If the access
- * requires two distinct parity updates, always do a small
- * write. If the stripe contains a failure but the access
- * does not, do a small write. The first conditional
- * (numStripeUnitsAccessed <= numDataCol/2) uses a
- * less-than-or-equal rather than just a less-than because
- * when G is 3 or 4, numDataCol/2 is 1, and I want
- * single-stripe-unit updates to use just one disk. */
- if ((asmp->numDataFailed + asmp->numParityFailed) == 0) {
- if (((asmp->numStripeUnitsAccessed <=
- (layoutPtr->numDataCol / 2)) &&
- (layoutPtr->numDataCol != 1)) ||
- (asmp->parityInfo->next != NULL) ||
- rf_CheckStripeForFailures(raidPtr, asmp)) {
- *createFunc = (RF_VoidFuncPtr) rf_CreateParityLoggingSmallWriteDAG;
- } else
- *createFunc = (RF_VoidFuncPtr) rf_CreateParityLoggingLargeWriteDAG;
- } else
- if (asmp->numParityFailed == 1)
- *createFunc = (RF_VoidFuncPtr) rf_CreateNonRedundantWriteDAG;
- else
- if (asmp->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit)
- *createFunc = NULL;
- else
- *createFunc = (RF_VoidFuncPtr) rf_CreateDegradedWriteDAG;
- }
-}
-#endif /* RF_INCLUDE_PARITYLOGGING > 0 */
diff --git a/sys/dev/raidframe/rf_paritylogging.h b/sys/dev/raidframe/rf_paritylogging.h
deleted file mode 100644
index 5b7dd25..0000000
--- a/sys/dev/raidframe/rf_paritylogging.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_paritylogging.h,v 1.3 1999/02/05 00:06:14 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: William V. Courtright II
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/* header file for Parity Logging */
-
-#ifndef _RF__RF_PARITYLOGGING_H_
-#define _RF__RF_PARITYLOGGING_H_
-
-int
-rf_ConfigureParityLogging(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr);
-int rf_GetDefaultNumFloatingReconBuffersParityLogging(RF_Raid_t * raidPtr);
-RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitParityLogging(RF_Raid_t * raidPtr);
-RF_RegionId_t
-rf_MapRegionIDParityLogging(RF_Raid_t * raidPtr,
- RF_SectorNum_t address);
-void
-rf_MapSectorParityLogging(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
- RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector,
- int remap);
-void
-rf_MapParityParityLogging(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
- RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector,
- int remap);
-void
-rf_MapLogParityLogging(RF_Raid_t * raidPtr, RF_RegionId_t regionID,
- RF_SectorNum_t regionOffset, RF_RowCol_t * row, RF_RowCol_t * col,
- RF_SectorNum_t * startSector);
-void
-rf_MapRegionParity(RF_Raid_t * raidPtr, RF_RegionId_t regionID,
- RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * startSector,
- RF_SectorCount_t * numSector);
-void
-rf_IdentifyStripeParityLogging(RF_Raid_t * raidPtr, RF_RaidAddr_t addr,
- RF_RowCol_t ** diskids, RF_RowCol_t * outRow);
-void
-rf_MapSIDToPSIDParityLogging(RF_RaidLayout_t * layoutPtr,
- RF_StripeNum_t stripeID, RF_StripeNum_t * psID,
- RF_ReconUnitNum_t * which_ru);
-void
-rf_ParityLoggingDagSelect(RF_Raid_t * raidPtr, RF_IoType_t type,
- RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc);
-
-#endif /* !_RF__RF_PARITYLOGGING_H_ */
diff --git a/sys/dev/raidframe/rf_parityloggingdags.c b/sys/dev/raidframe/rf_parityloggingdags.c
deleted file mode 100644
index 30a5892..0000000
--- a/sys/dev/raidframe/rf_parityloggingdags.c
+++ /dev/null
@@ -1,675 +0,0 @@
-/* $NetBSD: rf_parityloggingdags.c,v 1.4 2000/01/07 03:41:04 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: William V. Courtright II
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#include <dev/raidframe/rf_archs.h>
-
-#if RF_INCLUDE_PARITYLOGGING > 0
-
-/*
- DAGs specific to parity logging are created here
- */
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_dagutils.h>
-#include <dev/raidframe/rf_dagfuncs.h>
-#include <dev/raidframe/rf_debugMem.h>
-#include <dev/raidframe/rf_paritylog.h>
-#include <dev/raidframe/rf_memchunk.h>
-#include <dev/raidframe/rf_general.h>
-
-#include <dev/raidframe/rf_parityloggingdags.h>
-
-/******************************************************************************
- *
- * creates a DAG to perform a large-write operation:
- *
- * / Rod \ / Wnd \
- * H -- NIL- Rod - NIL - Wnd ------ NIL - T
- * \ Rod / \ Xor - Lpo /
- *
- * The writes are not done until the reads complete because if they were done in
- * parallel, a failure on one of the reads could leave the parity in an inconsistent
- * state, so that the retry with a new DAG would produce erroneous parity.
- *
- * Note: this DAG has the nasty property that none of the buffers allocated for reading
- * old data can be freed until the XOR node fires. Need to fix this.
- *
- * The last two arguments are the number of faults tolerated, and function for the
- * redundancy calculation. The undo for the redundancy calc is assumed to be null
- *
- *****************************************************************************/
-
-void
-rf_CommonCreateParityLoggingLargeWriteDAG(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h,
- void *bp,
- RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList,
- int nfaults,
- int (*redFunc) (RF_DagNode_t *))
-{
- RF_DagNode_t *nodes, *wndNodes, *rodNodes = NULL, *syncNode, *xorNode,
- *lpoNode, *blockNode, *unblockNode, *termNode;
- int nWndNodes, nRodNodes, i;
- RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
- RF_AccessStripeMapHeader_t *new_asm_h[2];
- int nodeNum, asmNum;
- RF_ReconUnitNum_t which_ru;
- char *sosBuffer, *eosBuffer;
- RF_PhysDiskAddr_t *pda;
- RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru);
-
- if (rf_dagDebug)
- printf("[Creating parity-logging large-write DAG]\n");
- RF_ASSERT(nfaults == 1);/* this arch only single fault tolerant */
- dag_h->creator = "ParityLoggingLargeWriteDAG";
-
- /* alloc the Wnd nodes, the xor node, and the Lpo node */
- nWndNodes = asmap->numStripeUnitsAccessed;
- RF_CallocAndAdd(nodes, nWndNodes + 6, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList);
- i = 0;
- wndNodes = &nodes[i];
- i += nWndNodes;
- xorNode = &nodes[i];
- i += 1;
- lpoNode = &nodes[i];
- i += 1;
- blockNode = &nodes[i];
- i += 1;
- syncNode = &nodes[i];
- i += 1;
- unblockNode = &nodes[i];
- i += 1;
- termNode = &nodes[i];
- i += 1;
-
- dag_h->numCommitNodes = nWndNodes + 1;
- dag_h->numCommits = 0;
- dag_h->numSuccedents = 1;
-
- rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h, new_asm_h, &nRodNodes, &sosBuffer, &eosBuffer, allocList);
- if (nRodNodes > 0)
- RF_CallocAndAdd(rodNodes, nRodNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList);
-
- /* begin node initialization */
- rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nRodNodes + 1, 0, 0, 0, dag_h, "Nil", allocList);
- rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nWndNodes + 1, 0, 0, dag_h, "Nil", allocList);
- rf_InitNode(syncNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nWndNodes + 1, nRodNodes + 1, 0, 0, dag_h, "Nil", allocList);
- rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
-
- /* initialize the Rod nodes */
- for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) {
- if (new_asm_h[asmNum]) {
- pda = new_asm_h[asmNum]->stripeMap->physInfo;
- while (pda) {
- rf_InitNode(&rodNodes[nodeNum], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rod", allocList);
- rodNodes[nodeNum].params[0].p = pda;
- rodNodes[nodeNum].params[1].p = pda->bufPtr;
- rodNodes[nodeNum].params[2].v = parityStripeID;
- rodNodes[nodeNum].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- nodeNum++;
- pda = pda->next;
- }
- }
- }
- RF_ASSERT(nodeNum == nRodNodes);
-
- /* initialize the wnd nodes */
- pda = asmap->physInfo;
- for (i = 0; i < nWndNodes; i++) {
- rf_InitNode(&wndNodes[i], rf_wait, RF_TRUE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList);
- RF_ASSERT(pda != NULL);
- wndNodes[i].params[0].p = pda;
- wndNodes[i].params[1].p = pda->bufPtr;
- wndNodes[i].params[2].v = parityStripeID;
- wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- pda = pda->next;
- }
-
- /* initialize the redundancy node */
- rf_InitNode(xorNode, rf_wait, RF_TRUE, redFunc, rf_NullNodeUndoFunc, NULL, 1, 1, 2 * (nWndNodes + nRodNodes) + 1, 1, dag_h, "Xr ", allocList);
- xorNode->flags |= RF_DAGNODE_FLAG_YIELD;
- for (i = 0; i < nWndNodes; i++) {
- xorNode->params[2 * i + 0] = wndNodes[i].params[0]; /* pda */
- xorNode->params[2 * i + 1] = wndNodes[i].params[1]; /* buf ptr */
- }
- for (i = 0; i < nRodNodes; i++) {
- xorNode->params[2 * (nWndNodes + i) + 0] = rodNodes[i].params[0]; /* pda */
- xorNode->params[2 * (nWndNodes + i) + 1] = rodNodes[i].params[1]; /* buf ptr */
- }
- xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr; /* xor node needs to get
- * at RAID information */
-
- /* look for an Rod node that reads a complete SU. If none, alloc a
- * buffer to receive the parity info. Note that we can't use a new
- * data buffer because it will not have gotten written when the xor
- * occurs. */
- for (i = 0; i < nRodNodes; i++)
- if (((RF_PhysDiskAddr_t *) rodNodes[i].params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit)
- break;
- if (i == nRodNodes) {
- RF_CallocAndAdd(xorNode->results[0], 1, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (void *), allocList);
- } else {
- xorNode->results[0] = rodNodes[i].params[1].p;
- }
-
- /* initialize the Lpo node */
- rf_InitNode(lpoNode, rf_wait, RF_FALSE, rf_ParityLogOverwriteFunc, rf_ParityLogOverwriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Lpo", allocList);
-
- lpoNode->params[0].p = asmap->parityInfo;
- lpoNode->params[1].p = xorNode->results[0];
- RF_ASSERT(asmap->parityInfo->next == NULL); /* parityInfo must
- * describe entire
- * parity unit */
-
- /* connect nodes to form graph */
-
- /* connect dag header to block node */
- RF_ASSERT(dag_h->numSuccedents == 1);
- RF_ASSERT(blockNode->numAntecedents == 0);
- dag_h->succedents[0] = blockNode;
-
- /* connect the block node to the Rod nodes */
- RF_ASSERT(blockNode->numSuccedents == nRodNodes + 1);
- for (i = 0; i < nRodNodes; i++) {
- RF_ASSERT(rodNodes[i].numAntecedents == 1);
- blockNode->succedents[i] = &rodNodes[i];
- rodNodes[i].antecedents[0] = blockNode;
- rodNodes[i].antType[0] = rf_control;
- }
-
- /* connect the block node to the sync node */
- /* necessary if nRodNodes == 0 */
- RF_ASSERT(syncNode->numAntecedents == nRodNodes + 1);
- blockNode->succedents[nRodNodes] = syncNode;
- syncNode->antecedents[0] = blockNode;
- syncNode->antType[0] = rf_control;
-
- /* connect the Rod nodes to the syncNode */
- for (i = 0; i < nRodNodes; i++) {
- rodNodes[i].succedents[0] = syncNode;
- syncNode->antecedents[1 + i] = &rodNodes[i];
- syncNode->antType[1 + i] = rf_control;
- }
-
- /* connect the sync node to the xor node */
- RF_ASSERT(syncNode->numSuccedents == nWndNodes + 1);
- RF_ASSERT(xorNode->numAntecedents == 1);
- syncNode->succedents[0] = xorNode;
- xorNode->antecedents[0] = syncNode;
- xorNode->antType[0] = rf_trueData; /* carry forward from sync */
-
- /* connect the sync node to the Wnd nodes */
- for (i = 0; i < nWndNodes; i++) {
- RF_ASSERT(wndNodes->numAntecedents == 1);
- syncNode->succedents[1 + i] = &wndNodes[i];
- wndNodes[i].antecedents[0] = syncNode;
- wndNodes[i].antType[0] = rf_control;
- }
-
- /* connect the xor node to the Lpo node */
- RF_ASSERT(xorNode->numSuccedents == 1);
- RF_ASSERT(lpoNode->numAntecedents == 1);
- xorNode->succedents[0] = lpoNode;
- lpoNode->antecedents[0] = xorNode;
- lpoNode->antType[0] = rf_trueData;
-
- /* connect the Wnd nodes to the unblock node */
- RF_ASSERT(unblockNode->numAntecedents == nWndNodes + 1);
- for (i = 0; i < nWndNodes; i++) {
- RF_ASSERT(wndNodes->numSuccedents == 1);
- wndNodes[i].succedents[0] = unblockNode;
- unblockNode->antecedents[i] = &wndNodes[i];
- unblockNode->antType[i] = rf_control;
- }
-
- /* connect the Lpo node to the unblock node */
- RF_ASSERT(lpoNode->numSuccedents == 1);
- lpoNode->succedents[0] = unblockNode;
- unblockNode->antecedents[nWndNodes] = lpoNode;
- unblockNode->antType[nWndNodes] = rf_control;
-
- /* connect unblock node to terminator */
- RF_ASSERT(unblockNode->numSuccedents == 1);
- RF_ASSERT(termNode->numAntecedents == 1);
- RF_ASSERT(termNode->numSuccedents == 0);
- unblockNode->succedents[0] = termNode;
- termNode->antecedents[0] = unblockNode;
- termNode->antType[0] = rf_control;
-}
-
-
-
-
-/******************************************************************************
- *
- * creates a DAG to perform a small-write operation (either raid 5 or pq), which is as follows:
- *
- * Header
- * |
- * Block
- * / | ... \ \
- * / | \ \
- * Rod Rod Rod Rop
- * | \ /| \ / | \/ |
- * | | | /\ |
- * Wnd Wnd Wnd X
- * | \ / |
- * | \ / |
- * \ \ / Lpo
- * \ \ / /
- * +-> Unblock <-+
- * |
- * T
- *
- *
- * R = Read, W = Write, X = Xor, o = old, n = new, d = data, p = parity.
- * When the access spans a stripe unit boundary and is less than one SU in size, there will
- * be two Rop -- X -- Wnp branches. I call this the "double-XOR" case.
- * The second output from each Rod node goes to the X node. In the double-XOR
- * case, there are exactly 2 Rod nodes, and each sends one output to one X node.
- * There is one Rod -- Wnd -- T branch for each stripe unit being updated.
- *
- * The block and unblock nodes are unused. See comment above CreateFaultFreeReadDAG.
- *
- * Note: this DAG ignores all the optimizations related to making the RMWs atomic.
- * it also has the nasty property that none of the buffers allocated for reading
- * old data & parity can be freed until the XOR node fires. Need to fix this.
- *
- * A null qfuncs indicates single fault tolerant
- *****************************************************************************/
-
-void
-rf_CommonCreateParityLoggingSmallWriteDAG(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h,
- void *bp,
- RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList,
- RF_RedFuncs_t * pfuncs,
- RF_RedFuncs_t * qfuncs)
-{
- RF_DagNode_t *xorNodes, *blockNode, *unblockNode, *nodes;
- RF_DagNode_t *readDataNodes, *readParityNodes;
- RF_DagNode_t *writeDataNodes, *lpuNodes;
- RF_DagNode_t *unlockDataNodes = NULL, *termNode;
- RF_PhysDiskAddr_t *pda = asmap->physInfo;
- int numDataNodes = asmap->numStripeUnitsAccessed;
- int numParityNodes = (asmap->parityInfo->next) ? 2 : 1;
- int i, j, nNodes, totalNumNodes;
- RF_ReconUnitNum_t which_ru;
- int (*func) (RF_DagNode_t * node), (*undoFunc) (RF_DagNode_t * node);
- int (*qfunc) (RF_DagNode_t * node);
- char *name, *qname;
- RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru);
- long nfaults = qfuncs ? 2 : 1;
- int lu_flag = (rf_enableAtomicRMW) ? 1 : 0; /* lock/unlock flag */
-
- if (rf_dagDebug)
- printf("[Creating parity-logging small-write DAG]\n");
- RF_ASSERT(numDataNodes > 0);
- RF_ASSERT(nfaults == 1);
- dag_h->creator = "ParityLoggingSmallWriteDAG";
-
- /* DAG creation occurs in three steps: 1. count the number of nodes in
- * the DAG 2. create the nodes 3. initialize the nodes 4. connect the
- * nodes */
-
- /* Step 1. compute number of nodes in the graph */
-
- /* number of nodes: a read and write for each data unit a redundancy
- * computation node for each parity node a read and Lpu for each
- * parity unit a block and unblock node (2) a terminator node if
- * atomic RMW an unlock node for each data unit, redundancy unit */
- totalNumNodes = (2 * numDataNodes) + numParityNodes + (2 * numParityNodes) + 3;
- if (lu_flag)
- totalNumNodes += numDataNodes;
-
- nNodes = numDataNodes + numParityNodes;
-
- dag_h->numCommitNodes = numDataNodes + numParityNodes;
- dag_h->numCommits = 0;
- dag_h->numSuccedents = 1;
-
- /* Step 2. create the nodes */
- RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList);
- i = 0;
- blockNode = &nodes[i];
- i += 1;
- unblockNode = &nodes[i];
- i += 1;
- readDataNodes = &nodes[i];
- i += numDataNodes;
- readParityNodes = &nodes[i];
- i += numParityNodes;
- writeDataNodes = &nodes[i];
- i += numDataNodes;
- lpuNodes = &nodes[i];
- i += numParityNodes;
- xorNodes = &nodes[i];
- i += numParityNodes;
- termNode = &nodes[i];
- i += 1;
- if (lu_flag) {
- unlockDataNodes = &nodes[i];
- i += numDataNodes;
- }
- RF_ASSERT(i == totalNumNodes);
-
- /* Step 3. initialize the nodes */
- /* initialize block node (Nil) */
- rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", allocList);
-
- /* initialize unblock node (Nil) */
- rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nNodes, 0, 0, dag_h, "Nil", allocList);
-
- /* initialize terminatory node (Trm) */
- rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
-
- /* initialize nodes which read old data (Rod) */
- for (i = 0; i < numDataNodes; i++) {
- rf_InitNode(&readDataNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, nNodes, 1, 4, 0, dag_h, "Rod", allocList);
- RF_ASSERT(pda != NULL);
- readDataNodes[i].params[0].p = pda; /* physical disk addr
- * desc */
- readDataNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old
- * data */
- readDataNodes[i].params[2].v = parityStripeID;
- readDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru);
- pda = pda->next;
- readDataNodes[i].propList[0] = NULL;
- readDataNodes[i].propList[1] = NULL;
- }
-
- /* initialize nodes which read old parity (Rop) */
- pda = asmap->parityInfo;
- i = 0;
- for (i = 0; i < numParityNodes; i++) {
- RF_ASSERT(pda != NULL);
- rf_InitNode(&readParityNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, nNodes, 1, 4, 0, dag_h, "Rop", allocList);
- readParityNodes[i].params[0].p = pda;
- readParityNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old
- * parity */
- readParityNodes[i].params[2].v = parityStripeID;
- readParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- readParityNodes[i].propList[0] = NULL;
- pda = pda->next;
- }
-
- /* initialize nodes which write new data (Wnd) */
- pda = asmap->physInfo;
- for (i = 0; i < numDataNodes; i++) {
- RF_ASSERT(pda != NULL);
- rf_InitNode(&writeDataNodes[i], rf_wait, RF_TRUE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, nNodes, 4, 0, dag_h, "Wnd", allocList);
- writeDataNodes[i].params[0].p = pda; /* physical disk addr
- * desc */
- writeDataNodes[i].params[1].p = pda->bufPtr; /* buffer holding new
- * data to be written */
- writeDataNodes[i].params[2].v = parityStripeID;
- writeDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
-
- if (lu_flag) {
- /* initialize node to unlock the disk queue */
- rf_InitNode(&unlockDataNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Und", allocList);
- unlockDataNodes[i].params[0].p = pda; /* physical disk addr
- * desc */
- unlockDataNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru);
- }
- pda = pda->next;
- }
-
-
- /* initialize nodes which compute new parity */
- /* we use the simple XOR func in the double-XOR case, and when we're
- * accessing only a portion of one stripe unit. the distinction
- * between the two is that the regular XOR func assumes that the
- * targbuf is a full SU in size, and examines the pda associated with
- * the buffer to decide where within the buffer to XOR the data,
- * whereas the simple XOR func just XORs the data into the start of
- * the buffer. */
- if ((numParityNodes == 2) || ((numDataNodes == 1) && (asmap->totalSectorsAccessed < raidPtr->Layout.sectorsPerStripeUnit))) {
- func = pfuncs->simple;
- undoFunc = rf_NullNodeUndoFunc;
- name = pfuncs->SimpleName;
- if (qfuncs) {
- qfunc = qfuncs->simple;
- qname = qfuncs->SimpleName;
- }
- } else {
- func = pfuncs->regular;
- undoFunc = rf_NullNodeUndoFunc;
- name = pfuncs->RegularName;
- if (qfuncs) {
- qfunc = qfuncs->regular;
- qname = qfuncs->RegularName;
- }
- }
- /* initialize the xor nodes: params are {pda,buf} from {Rod,Wnd,Rop}
- * nodes, and raidPtr */
- if (numParityNodes == 2) { /* double-xor case */
- for (i = 0; i < numParityNodes; i++) {
- rf_InitNode(&xorNodes[i], rf_wait, RF_TRUE, func, undoFunc, NULL, 1, nNodes, 7, 1, dag_h, name, allocList); /* no wakeup func for
- * xor */
- xorNodes[i].flags |= RF_DAGNODE_FLAG_YIELD;
- xorNodes[i].params[0] = readDataNodes[i].params[0];
- xorNodes[i].params[1] = readDataNodes[i].params[1];
- xorNodes[i].params[2] = readParityNodes[i].params[0];
- xorNodes[i].params[3] = readParityNodes[i].params[1];
- xorNodes[i].params[4] = writeDataNodes[i].params[0];
- xorNodes[i].params[5] = writeDataNodes[i].params[1];
- xorNodes[i].params[6].p = raidPtr;
- xorNodes[i].results[0] = readParityNodes[i].params[1].p; /* use old parity buf as
- * target buf */
- }
- } else {
- /* there is only one xor node in this case */
- rf_InitNode(&xorNodes[0], rf_wait, RF_TRUE, func, undoFunc, NULL, 1, nNodes, (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, name, allocList);
- xorNodes[0].flags |= RF_DAGNODE_FLAG_YIELD;
- for (i = 0; i < numDataNodes + 1; i++) {
- /* set up params related to Rod and Rop nodes */
- xorNodes[0].params[2 * i + 0] = readDataNodes[i].params[0]; /* pda */
- xorNodes[0].params[2 * i + 1] = readDataNodes[i].params[1]; /* buffer pointer */
- }
- for (i = 0; i < numDataNodes; i++) {
- /* set up params related to Wnd and Wnp nodes */
- xorNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = writeDataNodes[i].params[0]; /* pda */
- xorNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = writeDataNodes[i].params[1]; /* buffer pointer */
- }
- xorNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr; /* xor node needs to get
- * at RAID information */
- xorNodes[0].results[0] = readParityNodes[0].params[1].p;
- }
-
- /* initialize the log node(s) */
- pda = asmap->parityInfo;
- for (i = 0; i < numParityNodes; i++) {
- RF_ASSERT(pda);
- rf_InitNode(&lpuNodes[i], rf_wait, RF_FALSE, rf_ParityLogUpdateFunc, rf_ParityLogUpdateUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Lpu", allocList);
- lpuNodes[i].params[0].p = pda; /* PhysDiskAddr of parity */
- lpuNodes[i].params[1].p = xorNodes[i].results[0]; /* buffer pointer to
- * parity */
- pda = pda->next;
- }
-
-
- /* Step 4. connect the nodes */
-
- /* connect header to block node */
- RF_ASSERT(dag_h->numSuccedents == 1);
- RF_ASSERT(blockNode->numAntecedents == 0);
- dag_h->succedents[0] = blockNode;
-
- /* connect block node to read old data nodes */
- RF_ASSERT(blockNode->numSuccedents == (numDataNodes + numParityNodes));
- for (i = 0; i < numDataNodes; i++) {
- blockNode->succedents[i] = &readDataNodes[i];
- RF_ASSERT(readDataNodes[i].numAntecedents == 1);
- readDataNodes[i].antecedents[0] = blockNode;
- readDataNodes[i].antType[0] = rf_control;
- }
-
- /* connect block node to read old parity nodes */
- for (i = 0; i < numParityNodes; i++) {
- blockNode->succedents[numDataNodes + i] = &readParityNodes[i];
- RF_ASSERT(readParityNodes[i].numAntecedents == 1);
- readParityNodes[i].antecedents[0] = blockNode;
- readParityNodes[i].antType[0] = rf_control;
- }
-
- /* connect read old data nodes to write new data nodes */
- for (i = 0; i < numDataNodes; i++) {
- RF_ASSERT(readDataNodes[i].numSuccedents == numDataNodes + numParityNodes);
- for (j = 0; j < numDataNodes; j++) {
- RF_ASSERT(writeDataNodes[j].numAntecedents == numDataNodes + numParityNodes);
- readDataNodes[i].succedents[j] = &writeDataNodes[j];
- writeDataNodes[j].antecedents[i] = &readDataNodes[i];
- if (i == j)
- writeDataNodes[j].antType[i] = rf_antiData;
- else
- writeDataNodes[j].antType[i] = rf_control;
- }
- }
-
- /* connect read old data nodes to xor nodes */
- for (i = 0; i < numDataNodes; i++)
- for (j = 0; j < numParityNodes; j++) {
- RF_ASSERT(xorNodes[j].numAntecedents == numDataNodes + numParityNodes);
- readDataNodes[i].succedents[numDataNodes + j] = &xorNodes[j];
- xorNodes[j].antecedents[i] = &readDataNodes[i];
- xorNodes[j].antType[i] = rf_trueData;
- }
-
- /* connect read old parity nodes to write new data nodes */
- for (i = 0; i < numParityNodes; i++) {
- RF_ASSERT(readParityNodes[i].numSuccedents == numDataNodes + numParityNodes);
- for (j = 0; j < numDataNodes; j++) {
- readParityNodes[i].succedents[j] = &writeDataNodes[j];
- writeDataNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i];
- writeDataNodes[j].antType[numDataNodes + i] = rf_control;
- }
- }
-
- /* connect read old parity nodes to xor nodes */
- for (i = 0; i < numParityNodes; i++)
- for (j = 0; j < numParityNodes; j++) {
- readParityNodes[i].succedents[numDataNodes + j] = &xorNodes[j];
- xorNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i];
- xorNodes[j].antType[numDataNodes + i] = rf_trueData;
- }
-
- /* connect xor nodes to write new parity nodes */
- for (i = 0; i < numParityNodes; i++) {
- RF_ASSERT(xorNodes[i].numSuccedents == 1);
- RF_ASSERT(lpuNodes[i].numAntecedents == 1);
- xorNodes[i].succedents[0] = &lpuNodes[i];
- lpuNodes[i].antecedents[0] = &xorNodes[i];
- lpuNodes[i].antType[0] = rf_trueData;
- }
-
- for (i = 0; i < numDataNodes; i++) {
- if (lu_flag) {
- /* connect write new data nodes to unlock nodes */
- RF_ASSERT(writeDataNodes[i].numSuccedents == 1);
- RF_ASSERT(unlockDataNodes[i].numAntecedents == 1);
- writeDataNodes[i].succedents[0] = &unlockDataNodes[i];
- unlockDataNodes[i].antecedents[0] = &writeDataNodes[i];
- unlockDataNodes[i].antType[0] = rf_control;
-
- /* connect unlock nodes to unblock node */
- RF_ASSERT(unlockDataNodes[i].numSuccedents == 1);
- RF_ASSERT(unblockNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
- unlockDataNodes[i].succedents[0] = unblockNode;
- unblockNode->antecedents[i] = &unlockDataNodes[i];
- unblockNode->antType[i] = rf_control;
- } else {
- /* connect write new data nodes to unblock node */
- RF_ASSERT(writeDataNodes[i].numSuccedents == 1);
- RF_ASSERT(unblockNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
- writeDataNodes[i].succedents[0] = unblockNode;
- unblockNode->antecedents[i] = &writeDataNodes[i];
- unblockNode->antType[i] = rf_control;
- }
- }
-
- /* connect write new parity nodes to unblock node */
- for (i = 0; i < numParityNodes; i++) {
- RF_ASSERT(lpuNodes[i].numSuccedents == 1);
- lpuNodes[i].succedents[0] = unblockNode;
- unblockNode->antecedents[numDataNodes + i] = &lpuNodes[i];
- unblockNode->antType[numDataNodes + i] = rf_control;
- }
-
- /* connect unblock node to terminator */
- RF_ASSERT(unblockNode->numSuccedents == 1);
- RF_ASSERT(termNode->numAntecedents == 1);
- RF_ASSERT(termNode->numSuccedents == 0);
- unblockNode->succedents[0] = termNode;
- termNode->antecedents[0] = unblockNode;
- termNode->antType[0] = rf_control;
-}
-
-
-void
-rf_CreateParityLoggingSmallWriteDAG(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h,
- void *bp,
- RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList,
- RF_RedFuncs_t * pfuncs,
- RF_RedFuncs_t * qfuncs)
-{
- dag_h->creator = "ParityLoggingSmallWriteDAG";
- rf_CommonCreateParityLoggingSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_xorFuncs, NULL);
-}
-
-
-void
-rf_CreateParityLoggingLargeWriteDAG(
- RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap,
- RF_DagHeader_t * dag_h,
- void *bp,
- RF_RaidAccessFlags_t flags,
- RF_AllocListElem_t * allocList,
- int nfaults,
- int (*redFunc) (RF_DagNode_t *))
-{
- dag_h->creator = "ParityLoggingSmallWriteDAG";
- rf_CommonCreateParityLoggingLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularXorFunc);
-}
-#endif /* RF_INCLUDE_PARITYLOGGING > 0 */
diff --git a/sys/dev/raidframe/rf_parityloggingdags.h b/sys/dev/raidframe/rf_parityloggingdags.h
deleted file mode 100644
index dc0fc9b..0000000
--- a/sys/dev/raidframe/rf_parityloggingdags.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_parityloggingdags.h,v 1.3 1999/02/05 00:06:14 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: William V. Courtright II
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/****************************************************************************
- * *
- * rf_parityloggingdags.h -- header file for parity logging dags *
- * *
- ****************************************************************************/
-
-#ifndef _RF__RF_PARITYLOGGINGDAGS_H_
-#define _RF__RF_PARITYLOGGINGDAGS_H_
-
-/* routines that create DAGs */
-void
-rf_CommonCreateParityLoggingLargeWriteDAG(RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h,
- void *bp, RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList,
- int nfaults, int (*redFunc) (RF_DagNode_t *));
- void rf_CommonCreateParityLoggingSmallWriteDAG(RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h,
- void *bp, RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList,
- RF_RedFuncs_t * pfuncs, RF_RedFuncs_t * qfuncs);
-
- void rf_CreateParityLoggingLargeWriteDAG(RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h,
- void *bp, RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList,
- int nfaults, int (*redFunc) (RF_DagNode_t *));
- void rf_CreateParityLoggingSmallWriteDAG(RF_Raid_t * raidPtr,
- RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h,
- void *bp, RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList,
- RF_RedFuncs_t * pfuncs, RF_RedFuncs_t * qfuncs);
-
-#endif /* !_RF__RF_PARITYLOGGINGDAGS_H_ */
diff --git a/sys/dev/raidframe/rf_parityscan.c b/sys/dev/raidframe/rf_parityscan.c
deleted file mode 100644
index 34834cb..0000000
--- a/sys/dev/raidframe/rf_parityscan.c
+++ /dev/null
@@ -1,445 +0,0 @@
-/* $NetBSD: rf_parityscan.c,v 1.9 2000/05/28 03:00:31 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*****************************************************************************
- *
- * rf_parityscan.c -- misc utilities related to parity verification
- *
- *****************************************************************************/
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_dagfuncs.h>
-#include <dev/raidframe/rf_dagutils.h>
-#include <dev/raidframe/rf_mcpair.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_engine.h>
-#include <dev/raidframe/rf_parityscan.h>
-#include <dev/raidframe/rf_map.h>
-#include <dev/raidframe/rf_kintf.h>
-
-/*****************************************************************************************
- *
- * walk through the entire arry and write new parity.
- * This works by creating two DAGs, one to read a stripe of data and one to
- * write new parity. The first is executed, the data is xored together, and
- * then the second is executed. To avoid constantly building and tearing down
- * the DAGs, we create them a priori and fill them in with the mapping
- * information as we go along.
- *
- * there should never be more than one thread running this.
- *
- ****************************************************************************************/
-
-int
-rf_RewriteParity(raidPtr)
- RF_Raid_t *raidPtr;
-{
- RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
- RF_AccessStripeMapHeader_t *asm_h;
- int ret_val;
- int rc;
- RF_PhysDiskAddr_t pda;
- RF_SectorNum_t i;
-
- if (raidPtr->Layout.map->faultsTolerated == 0) {
- /* There isn't any parity. Call it "okay." */
- return (RF_PARITY_OKAY);
- }
- if (raidPtr->status[0] != rf_rs_optimal) {
- /*
- * We're in degraded mode. Don't try to verify parity now!
- * XXX: this should be a "we don't want to", not a
- * "we can't" error.
- */
- return (RF_PARITY_COULD_NOT_VERIFY);
- }
-
- ret_val = 0;
-
- pda.startSector = 0;
- pda.numSector = raidPtr->Layout.sectorsPerStripeUnit;
- rc = RF_PARITY_OKAY;
-
- for (i = 0; i < raidPtr->totalSectors &&
- rc <= RF_PARITY_CORRECTED;
- i += layoutPtr->dataSectorsPerStripe) {
- if (raidPtr->waitShutdown) {
- /* Someone is pulling the plug on this set...
- abort the re-write */
- return (1);
- }
- asm_h = rf_MapAccess(raidPtr, i,
- layoutPtr->dataSectorsPerStripe,
- NULL, RF_DONT_REMAP);
- raidPtr->parity_rewrite_stripes_done =
- i / layoutPtr->dataSectorsPerStripe ;
- rc = rf_VerifyParity(raidPtr, asm_h->stripeMap, 1, 0);
-
- switch (rc) {
- case RF_PARITY_OKAY:
- case RF_PARITY_CORRECTED:
- break;
- case RF_PARITY_BAD:
- printf("Parity bad during correction\n");
- ret_val = 1;
- break;
- case RF_PARITY_COULD_NOT_CORRECT:
- printf("Could not correct bad parity\n");
- ret_val = 1;
- break;
- case RF_PARITY_COULD_NOT_VERIFY:
- printf("Could not verify parity\n");
- ret_val = 1;
- break;
- default:
- printf("Bad rc=%d from VerifyParity in RewriteParity\n", rc);
- ret_val = 1;
- }
- rf_FreeAccessStripeMap(asm_h);
- }
- return (ret_val);
-}
-/*****************************************************************************************
- *
- * verify that the parity in a particular stripe is correct.
- * we validate only the range of parity defined by parityPDA, since
- * this is all we have locked. The way we do this is to create an asm
- * that maps the whole stripe and then range-restrict it to the parity
- * region defined by the parityPDA.
- *
- ****************************************************************************************/
-int
-rf_VerifyParity(raidPtr, aasm, correct_it, flags)
- RF_Raid_t *raidPtr;
- RF_AccessStripeMap_t *aasm;
- int correct_it;
- RF_RaidAccessFlags_t flags;
-{
- RF_PhysDiskAddr_t *parityPDA;
- RF_AccessStripeMap_t *doasm;
- RF_LayoutSW_t *lp;
- int lrc, rc;
-
- lp = raidPtr->Layout.map;
- if (lp->faultsTolerated == 0) {
- /*
- * There isn't any parity. Call it "okay."
- */
- return (RF_PARITY_OKAY);
- }
- rc = RF_PARITY_OKAY;
- if (lp->VerifyParity) {
- for (doasm = aasm; doasm; doasm = doasm->next) {
- for (parityPDA = doasm->parityInfo; parityPDA;
- parityPDA = parityPDA->next) {
- lrc = lp->VerifyParity(raidPtr,
- doasm->raidAddress,
- parityPDA,
- correct_it, flags);
- if (lrc > rc) {
- /* see rf_parityscan.h for why this
- * works */
- rc = lrc;
- }
- }
- }
- } else {
- rc = RF_PARITY_COULD_NOT_VERIFY;
- }
- return (rc);
-}
-
-int
-rf_VerifyParityBasic(raidPtr, raidAddr, parityPDA, correct_it, flags)
- RF_Raid_t *raidPtr;
- RF_RaidAddr_t raidAddr;
- RF_PhysDiskAddr_t *parityPDA;
- int correct_it;
- RF_RaidAccessFlags_t flags;
-{
- RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
- RF_RaidAddr_t startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
- raidAddr);
- RF_SectorCount_t numsector = parityPDA->numSector;
- int numbytes = rf_RaidAddressToByte(raidPtr, numsector);
- int bytesPerStripe = numbytes * layoutPtr->numDataCol;
- RF_DagHeader_t *rd_dag_h, *wr_dag_h; /* read, write dag */
- RF_DagNode_t *blockNode, *unblockNode, *wrBlock, *wrUnblock;
- RF_AccessStripeMapHeader_t *asm_h;
- RF_AccessStripeMap_t *asmap;
- RF_AllocListElem_t *alloclist;
- RF_PhysDiskAddr_t *pda;
- char *pbuf, *buf, *end_p, *p;
- int i, retcode;
- RF_ReconUnitNum_t which_ru;
- RF_StripeNum_t psID = rf_RaidAddressToParityStripeID(layoutPtr,
- raidAddr,
- &which_ru);
- int stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol;
- RF_AccTraceEntry_t tracerec;
- RF_MCPair_t *mcpair;
-
- retcode = RF_PARITY_OKAY;
-
- mcpair = rf_AllocMCPair();
- rf_MakeAllocList(alloclist);
- RF_MallocAndAdd(buf, numbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol), (char *), alloclist);
- RF_CallocAndAdd(pbuf, 1, numbytes, (char *), alloclist); /* use calloc to make
- * sure buffer is zeroed */
- end_p = buf + bytesPerStripe;
-
- rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, numbytes, buf, rf_DiskReadFunc, rf_DiskReadUndoFunc,
- "Rod", alloclist, flags, RF_IO_NORMAL_PRIORITY);
- blockNode = rd_dag_h->succedents[0];
- unblockNode = blockNode->succedents[0]->succedents[0];
-
- /* map the stripe and fill in the PDAs in the dag */
- asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, buf, RF_DONT_REMAP);
- asmap = asm_h->stripeMap;
-
- for (pda = asmap->physInfo, i = 0; i < layoutPtr->numDataCol; i++, pda = pda->next) {
- RF_ASSERT(pda);
- rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1);
- RF_ASSERT(pda->numSector != 0);
- if (rf_TryToRedirectPDA(raidPtr, pda, 0))
- goto out; /* no way to verify parity if disk is
- * dead. return w/ good status */
- blockNode->succedents[i]->params[0].p = pda;
- blockNode->succedents[i]->params[2].v = psID;
- blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- }
-
- RF_ASSERT(!asmap->parityInfo->next);
- rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->parityInfo, 0, 1);
- RF_ASSERT(asmap->parityInfo->numSector != 0);
- if (rf_TryToRedirectPDA(raidPtr, asmap->parityInfo, 1))
- goto out;
- blockNode->succedents[layoutPtr->numDataCol]->params[0].p = asmap->parityInfo;
-
- /* fire off the DAG */
- bzero((char *) &tracerec, sizeof(tracerec));
- rd_dag_h->tracerec = &tracerec;
-
- if (rf_verifyParityDebug) {
- printf("Parity verify read dag:\n");
- rf_PrintDAGList(rd_dag_h);
- }
- RF_LOCK_MUTEX(mcpair->mutex);
- mcpair->flag = 0;
- rf_DispatchDAG(rd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
- (void *) mcpair);
- while (!mcpair->flag)
- RF_WAIT_COND(mcpair->cond, mcpair->mutex);
- RF_UNLOCK_MUTEX(mcpair->mutex);
- if (rd_dag_h->status != rf_enable) {
- RF_ERRORMSG("Unable to verify parity: can't read the stripe\n");
- retcode = RF_PARITY_COULD_NOT_VERIFY;
- goto out;
- }
- for (p = buf; p < end_p; p += numbytes) {
- rf_bxor(p, pbuf, numbytes, NULL);
- }
- for (i = 0; i < numbytes; i++) {
-#if 0
- if (pbuf[i] != 0 || buf[bytesPerStripe + i] != 0) {
- printf("Bytes: %d %d %d\n", i, pbuf[i], buf[bytesPerStripe + i]);
- }
-#endif
- if (pbuf[i] != buf[bytesPerStripe + i]) {
- if (!correct_it)
- RF_ERRORMSG3("Parity verify error: byte %d of parity is 0x%x should be 0x%x\n",
- i, (u_char) buf[bytesPerStripe + i], (u_char) pbuf[i]);
- retcode = RF_PARITY_BAD;
- break;
- }
- }
-
- if (retcode && correct_it) {
- wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, pbuf, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
- "Wnp", alloclist, flags, RF_IO_NORMAL_PRIORITY);
- wrBlock = wr_dag_h->succedents[0];
- wrUnblock = wrBlock->succedents[0]->succedents[0];
- wrBlock->succedents[0]->params[0].p = asmap->parityInfo;
- wrBlock->succedents[0]->params[2].v = psID;
- wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- bzero((char *) &tracerec, sizeof(tracerec));
- wr_dag_h->tracerec = &tracerec;
- if (rf_verifyParityDebug) {
- printf("Parity verify write dag:\n");
- rf_PrintDAGList(wr_dag_h);
- }
- RF_LOCK_MUTEX(mcpair->mutex);
- mcpair->flag = 0;
- rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
- (void *) mcpair);
- while (!mcpair->flag)
- RF_WAIT_COND(mcpair->cond, mcpair->mutex);
- RF_UNLOCK_MUTEX(mcpair->mutex);
- if (wr_dag_h->status != rf_enable) {
- RF_ERRORMSG("Unable to correct parity in VerifyParity: can't write the stripe\n");
- retcode = RF_PARITY_COULD_NOT_CORRECT;
- }
- rf_FreeDAG(wr_dag_h);
- if (retcode == RF_PARITY_BAD)
- retcode = RF_PARITY_CORRECTED;
- }
-out:
- rf_FreeAccessStripeMap(asm_h);
- rf_FreeAllocList(alloclist);
- rf_FreeDAG(rd_dag_h);
- rf_FreeMCPair(mcpair);
- return (retcode);
-}
-
-int
-rf_TryToRedirectPDA(raidPtr, pda, parity)
- RF_Raid_t *raidPtr;
- RF_PhysDiskAddr_t *pda;
- int parity;
-{
- if (raidPtr->Disks[pda->row][pda->col].status == rf_ds_reconstructing) {
- if (rf_CheckRUReconstructed(raidPtr->reconControl[pda->row]->reconMap, pda->startSector)) {
- if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
- RF_RowCol_t or = pda->row, oc = pda->col;
- RF_SectorNum_t os = pda->startSector;
- if (parity) {
- (raidPtr->Layout.map->MapParity) (raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP);
- if (rf_verifyParityDebug)
- printf("VerifyParity: Redir P r %d c %d sect %ld -> r %d c %d sect %ld\n",
- or, oc, (long) os, pda->row, pda->col, (long) pda->startSector);
- } else {
- (raidPtr->Layout.map->MapSector) (raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP);
- if (rf_verifyParityDebug)
- printf("VerifyParity: Redir D r %d c %d sect %ld -> r %d c %d sect %ld\n",
- or, oc, (long) os, pda->row, pda->col, (long) pda->startSector);
- }
- } else {
- RF_RowCol_t spRow = raidPtr->Disks[pda->row][pda->col].spareRow;
- RF_RowCol_t spCol = raidPtr->Disks[pda->row][pda->col].spareCol;
- pda->row = spRow;
- pda->col = spCol;
- }
- }
- }
- if (RF_DEAD_DISK(raidPtr->Disks[pda->row][pda->col].status))
- return (1);
- return (0);
-}
-/*****************************************************************************************
- *
- * currently a stub.
- *
- * takes as input an ASM describing a write operation and containing one failure, and
- * verifies that the parity was correctly updated to reflect the write.
- *
- * if it's a data unit that's failed, we read the other data units in the stripe and
- * the parity unit, XOR them together, and verify that we get the data intended for
- * the failed disk. Since it's easy, we also validate that the right data got written
- * to the surviving data disks.
- *
- * If it's the parity that failed, there's really no validation we can do except the
- * above verification that the right data got written to all disks. This is because
- * the new data intended for the failed disk is supplied in the ASM, but this is of
- * course not the case for the new parity.
- *
- ****************************************************************************************/
-int
-rf_VerifyDegrModeWrite(raidPtr, asmh)
- RF_Raid_t *raidPtr;
- RF_AccessStripeMapHeader_t *asmh;
-{
- return (0);
-}
-/* creates a simple DAG with a header, a block-recon node at level 1,
- * nNodes nodes at level 2, an unblock-recon node at level 3, and
- * a terminator node at level 4. The stripe address field in
- * the block and unblock nodes are not touched, nor are the pda
- * fields in the second-level nodes, so they must be filled in later.
- *
- * commit point is established at unblock node - this means that any
- * failure during dag execution causes the dag to fail
- */
-RF_DagHeader_t *
-rf_MakeSimpleDAG(raidPtr, nNodes, bytesPerSU, databuf, doFunc, undoFunc, name, alloclist, flags, priority)
- RF_Raid_t *raidPtr;
- int nNodes;
- int bytesPerSU;
- char *databuf;
- int (*doFunc) (RF_DagNode_t * node);
- int (*undoFunc) (RF_DagNode_t * node);
- char *name; /* node names at the second level */
- RF_AllocListElem_t *alloclist;
- RF_RaidAccessFlags_t flags;
- int priority;
-{
- RF_DagHeader_t *dag_h;
- RF_DagNode_t *nodes, *termNode, *blockNode, *unblockNode;
- int i;
-
- /* create the nodes, the block & unblock nodes, and the terminator
- * node */
- RF_CallocAndAdd(nodes, nNodes + 3, sizeof(RF_DagNode_t), (RF_DagNode_t *), alloclist);
- blockNode = &nodes[nNodes];
- unblockNode = blockNode + 1;
- termNode = unblockNode + 1;
-
- dag_h = rf_AllocDAGHeader();
- dag_h->raidPtr = (void *) raidPtr;
- dag_h->allocList = NULL;/* we won't use this alloc list */
- dag_h->status = rf_enable;
- dag_h->numSuccedents = 1;
- dag_h->creator = "SimpleDAG";
-
- /* this dag can not commit until the unblock node is reached errors
- * prior to the commit point imply the dag has failed */
- dag_h->numCommitNodes = 1;
- dag_h->numCommits = 0;
-
- dag_h->succedents[0] = blockNode;
- rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", alloclist);
- rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nNodes, 0, 0, dag_h, "Nil", alloclist);
- unblockNode->succedents[0] = termNode;
- for (i = 0; i < nNodes; i++) {
- blockNode->succedents[i] = unblockNode->antecedents[i] = &nodes[i];
- unblockNode->antType[i] = rf_control;
- rf_InitNode(&nodes[i], rf_wait, RF_FALSE, doFunc, undoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, alloclist);
- nodes[i].succedents[0] = unblockNode;
- nodes[i].antecedents[0] = blockNode;
- nodes[i].antType[0] = rf_control;
- nodes[i].params[1].p = (databuf + (i * bytesPerSU));
- }
- rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", alloclist);
- termNode->antecedents[0] = unblockNode;
- termNode->antType[0] = rf_control;
- return (dag_h);
-}
diff --git a/sys/dev/raidframe/rf_parityscan.h b/sys/dev/raidframe/rf_parityscan.h
deleted file mode 100644
index babca41..0000000
--- a/sys/dev/raidframe/rf_parityscan.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_parityscan.h,v 1.3 1999/02/05 00:06:14 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#ifndef _RF__RF_PARITYSCAN_H_
-#define _RF__RF_PARITYSCAN_H_
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_alloclist.h>
-
-int rf_RewriteParity(RF_Raid_t * raidPtr);
-int
-rf_VerifyParityBasic(RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr,
- RF_PhysDiskAddr_t * parityPDA, int correct_it, RF_RaidAccessFlags_t flags);
-int
-rf_VerifyParity(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * stripeMap,
- int correct_it, RF_RaidAccessFlags_t flags);
-int rf_TryToRedirectPDA(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * pda, int parity);
-int rf_VerifyDegrModeWrite(RF_Raid_t * raidPtr, RF_AccessStripeMapHeader_t * asmh);
-RF_DagHeader_t *
-rf_MakeSimpleDAG(RF_Raid_t * raidPtr, int nNodes,
- int bytesPerSU, char *databuf,
- int (*doFunc) (RF_DagNode_t *),
- int (*undoFunc) (RF_DagNode_t *),
- char *name, RF_AllocListElem_t * alloclist,
- RF_RaidAccessFlags_t flags, int priority);
-
-#define RF_DO_CORRECT_PARITY 1
-#define RF_DONT_CORRECT_PARITY 0
-
-/*
- * Return vals for VerifyParity operation
- *
- * Ordering is important here.
- */
-#define RF_PARITY_OKAY 0 /* or no parity information */
-#define RF_PARITY_CORRECTED 1
-#define RF_PARITY_BAD 2
-#define RF_PARITY_COULD_NOT_CORRECT 3
-#define RF_PARITY_COULD_NOT_VERIFY 4
-
-#endif /* !_RF__RF_PARITYSCAN_H_ */
diff --git a/sys/dev/raidframe/rf_pq.c b/sys/dev/raidframe/rf_pq.c
deleted file mode 100644
index 98b53e8..0000000
--- a/sys/dev/raidframe/rf_pq.c
+++ /dev/null
@@ -1,928 +0,0 @@
-/* $NetBSD: rf_pq.c,v 1.7 2000/01/07 03:41:02 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Daniel Stodolsky
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*
- * Code for RAID level 6 (P + Q) disk array architecture.
- */
-
-#include <dev/raidframe/rf_archs.h>
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_dagffrd.h>
-#include <dev/raidframe/rf_dagffwr.h>
-#include <dev/raidframe/rf_dagdegrd.h>
-#include <dev/raidframe/rf_dagdegwr.h>
-#include <dev/raidframe/rf_dagutils.h>
-#include <dev/raidframe/rf_dagfuncs.h>
-#include <dev/raidframe/rf_etimer.h>
-#include <dev/raidframe/rf_pqdeg.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_map.h>
-#include <dev/raidframe/rf_pq.h>
-
-RF_RedFuncs_t rf_pFuncs = {rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P"};
-RF_RedFuncs_t rf_pRecoveryFuncs = {rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func"};
-
-int
-rf_RegularONPFunc(node)
- RF_DagNode_t *node;
-{
- return (rf_RegularXorFunc(node));
-}
-/*
- same as simpleONQ func, but the coefficient is always 1
-*/
-
-int
-rf_SimpleONPFunc(node)
- RF_DagNode_t *node;
-{
- return (rf_SimpleXorFunc(node));
-}
-
-int
-rf_RecoveryPFunc(node)
- RF_DagNode_t *node;
-{
- return (rf_RecoveryXorFunc(node));
-}
-
-int
-rf_RegularPFunc(node)
- RF_DagNode_t *node;
-{
- return (rf_RegularXorFunc(node));
-}
-#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
-
-static void
-QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
- unsigned char coeff);
-static void
-rf_InvertQ(unsigned long *qbuf, unsigned long *abuf,
- unsigned length, unsigned coeff);
-
-RF_RedFuncs_t rf_qFuncs = {rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q"};
-RF_RedFuncs_t rf_qRecoveryFuncs = {rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func"};
-RF_RedFuncs_t rf_pqRecoveryFuncs = {rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func"};
-
-void
-rf_PQDagSelect(
- RF_Raid_t * raidPtr,
- RF_IoType_t type,
- RF_AccessStripeMap_t * asmap,
- RF_VoidFuncPtr * createFunc)
-{
- RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
- unsigned ndfail = asmap->numDataFailed;
- unsigned npfail = asmap->numParityFailed;
- unsigned ntfail = npfail + ndfail;
-
- RF_ASSERT(RF_IO_IS_R_OR_W(type));
- if (ntfail > 2) {
- RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n");
- /* *infoFunc = */ *createFunc = NULL;
- return;
- }
- /* ok, we can do this I/O */
- if (type == RF_IO_TYPE_READ) {
- switch (ndfail) {
- case 0:
- /* fault free read */
- *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; /* same as raid 5 */
- break;
- case 1:
- /* lost a single data unit */
- /* two cases: (1) parity is not lost. do a normal raid
- * 5 reconstruct read. (2) parity is lost. do a
- * reconstruct read using "q". */
- if (ntfail == 2) { /* also lost redundancy */
- if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY)
- *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateReadDAG;
- else
- *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateReadDAG;
- } else {
- /* P and Q are ok. But is there a failure in
- * some unaccessed data unit? */
- if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
- *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
- else
- *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateReadDAG;
- }
- break;
- case 2:
- /* lost two data units */
- /* *infoFunc = PQOneTwo; */
- *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
- break;
- }
- return;
- }
- /* a write */
- switch (ntfail) {
- case 0: /* fault free */
- if (rf_suppressLocksAndLargeWrites ||
- (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) ||
- (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) {
-
- *createFunc = (RF_VoidFuncPtr) rf_PQCreateSmallWriteDAG;
- } else {
- *createFunc = (RF_VoidFuncPtr) rf_PQCreateLargeWriteDAG;
- }
- break;
-
- case 1: /* single disk fault */
- if (npfail == 1) {
- RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q));
- if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) { /* q died, treat like
- * normal mode raid5
- * write. */
- if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
- || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
- *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateSmallWriteDAG;
- else
- *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateLargeWriteDAG;
- } else {/* parity died, small write only updating Q */
- if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
- || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
- *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateSmallWriteDAG;
- else
- *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateLargeWriteDAG;
- }
- } else { /* data missing. Do a P reconstruct write if
- * only a single data unit is lost in the
- * stripe, otherwise a PQ reconstruct write. */
- if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
- *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
- else
- *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateWriteDAG;
- }
- break;
-
- case 2: /* two disk faults */
- switch (npfail) {
- case 2: /* both p and q dead */
- *createFunc = (RF_VoidFuncPtr) rf_PQ_011_CreateWriteDAG;
- break;
- case 1: /* either p or q and dead data */
- RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA);
- RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q));
- if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)
- *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateWriteDAG;
- else
- *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateWriteDAG;
- break;
- case 0: /* double data loss */
- *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
- break;
- }
- break;
-
- default: /* more than 2 disk faults */
- *createFunc = NULL;
- RF_PANIC();
- }
- return;
-}
-/*
- Used as a stop gap info function
-*/
-#if 0
-static void
-PQOne(raidPtr, nSucc, nAnte, asmap)
- RF_Raid_t *raidPtr;
- int *nSucc;
- int *nAnte;
- RF_AccessStripeMap_t *asmap;
-{
- *nSucc = *nAnte = 1;
-}
-
-static void
-PQOneTwo(raidPtr, nSucc, nAnte, asmap)
- RF_Raid_t *raidPtr;
- int *nSucc;
- int *nAnte;
- RF_AccessStripeMap_t *asmap;
-{
- *nSucc = 1;
- *nAnte = 2;
-}
-#endif
-
-RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG)
-{
- rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2,
- rf_RegularPQFunc, RF_FALSE);
-}
-
-int
-rf_RegularONQFunc(node)
- RF_DagNode_t *node;
-{
- int np = node->numParams;
- int d;
- RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
- int i;
- RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
- RF_Etimer_t timer;
- char *qbuf, *qpbuf;
- char *obuf, *nbuf;
- RF_PhysDiskAddr_t *old, *new;
- unsigned long coeff;
- unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
-
- RF_ETIMER_START(timer);
-
- d = (np - 3) / 4;
- RF_ASSERT(4 * d + 3 == np);
- qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */
- for (i = 0; i < d; i++) {
- old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
- obuf = (char *) node->params[2 * i + 1].p;
- new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
- nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
- RF_ASSERT(new->numSector == old->numSector);
- RF_ASSERT(new->raidAddress == old->raidAddress);
- /* the stripe unit within the stripe tells us the coefficient
- * to use for the multiply. */
- coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
- /* compute the data unit offset within the column, then add
- * one */
- coeff = (coeff % raidPtr->Layout.numDataCol);
- qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
- QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
- }
-
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- tracerec->q_us += RF_ETIMER_VAL_US(timer);
- rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
- * I/O in this node */
- return (0);
-}
-/*
- See the SimpleXORFunc for the difference between a simple and regular func.
- These Q functions should be used for
-
- new q = Q(data,old data,old q)
-
- style updates and not for
-
- q = ( new data, new data, .... )
-
- computations.
-
- The simple q takes 2(2d+1)+1 params, where d is the number
- of stripes written. The order of params is
- old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... old data pda_d, old data buffer_d
- [2d] old q pda_0, old q buffer
- [2d_2] new data pda_0, new data buffer_0, ... new data pda_d, new data buffer_d
- raidPtr
-*/
-
-int
-rf_SimpleONQFunc(node)
- RF_DagNode_t *node;
-{
- int np = node->numParams;
- int d;
- RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
- int i;
- RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
- RF_Etimer_t timer;
- char *qbuf;
- char *obuf, *nbuf;
- RF_PhysDiskAddr_t *old, *new;
- unsigned long coeff;
-
- RF_ETIMER_START(timer);
-
- d = (np - 3) / 4;
- RF_ASSERT(4 * d + 3 == np);
- qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */
- for (i = 0; i < d; i++) {
- old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
- obuf = (char *) node->params[2 * i + 1].p;
- new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
- nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
- RF_ASSERT(new->numSector == old->numSector);
- RF_ASSERT(new->raidAddress == old->raidAddress);
- /* the stripe unit within the stripe tells us the coefficient
- * to use for the multiply. */
- coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
- /* compute the data unit offset within the column, then add
- * one */
- coeff = (coeff % raidPtr->Layout.numDataCol);
- QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
- }
-
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- tracerec->q_us += RF_ETIMER_VAL_US(timer);
- rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
- * I/O in this node */
- return (0);
-}
-RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG)
-{
- rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs);
-}
-
-static void RegularQSubr(RF_DagNode_t *node, char *qbuf);
-
-static void
-RegularQSubr(node, qbuf)
- RF_DagNode_t *node;
- char *qbuf;
-{
- int np = node->numParams;
- int d;
- RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
- unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
- int i;
- RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
- RF_Etimer_t timer;
- char *obuf, *qpbuf;
- RF_PhysDiskAddr_t *old;
- unsigned long coeff;
-
- RF_ETIMER_START(timer);
-
- d = (np - 1) / 2;
- RF_ASSERT(2 * d + 1 == np);
- for (i = 0; i < d; i++) {
- old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
- obuf = (char *) node->params[2 * i + 1].p;
- coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
- /* compute the data unit offset within the column, then add
- * one */
- coeff = (coeff % raidPtr->Layout.numDataCol);
- /* the input buffers may not all be aligned with the start of
- * the stripe. so shift by their sector offset within the
- * stripe unit */
- qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
- rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
- }
-
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- tracerec->q_us += RF_ETIMER_VAL_US(timer);
-}
-/*
- used in degraded writes.
-*/
-
-static void DegrQSubr(RF_DagNode_t *node);
-
-static void
-DegrQSubr(node)
- RF_DagNode_t *node;
-{
- int np = node->numParams;
- int d;
- RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
- unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
- int i;
- RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
- RF_Etimer_t timer;
- char *qbuf = node->results[1];
- char *obuf, *qpbuf;
- RF_PhysDiskAddr_t *old;
- unsigned long coeff;
- unsigned fail_start;
- int j;
-
- old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
- fail_start = old->startSector % secPerSU;
-
- RF_ETIMER_START(timer);
-
- d = (np - 2) / 2;
- RF_ASSERT(2 * d + 2 == np);
- for (i = 0; i < d; i++) {
- old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
- obuf = (char *) node->params[2 * i + 1].p;
- coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
- /* compute the data unit offset within the column, then add
- * one */
- coeff = (coeff % raidPtr->Layout.numDataCol);
- /* the input buffers may not all be aligned with the start of
- * the stripe. so shift by their sector offset within the
- * stripe unit */
- j = old->startSector % secPerSU;
- RF_ASSERT(j >= fail_start);
- qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
- rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
- }
-
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- tracerec->q_us += RF_ETIMER_VAL_US(timer);
-}
-/*
- Called by large write code to compute the new parity and the new q.
-
- structure of the params:
-
- pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d ( d = numDataCol
- raidPtr
-
- for a total of 2d+1 arguments.
- The result buffers results[0], results[1] are the buffers for the p and q,
- respectively.
-
- We compute Q first, then compute P. The P calculation may try to reuse
- one of the input buffers for its output, so if we computed P first, we would
- corrupt the input for the q calculation.
-*/
-
-int
-rf_RegularPQFunc(node)
- RF_DagNode_t *node;
-{
- RegularQSubr(node, node->results[1]);
- return (rf_RegularXorFunc(node)); /* does the wakeup */
-}
-
-int
-rf_RegularQFunc(node)
- RF_DagNode_t *node;
-{
- /* Almost ... adjust Qsubr args */
- RegularQSubr(node, node->results[0]);
- rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
- * I/O in this node */
- return (0);
-}
-/*
- Called by singly degraded write code to compute the new parity and the new q.
-
- structure of the params:
-
- pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d
- failedPDA raidPtr
-
- for a total of 2d+2 arguments.
- The result buffers results[0], results[1] are the buffers for the parity and q,
- respectively.
-
- We compute Q first, then compute parity. The parity calculation may try to reuse
- one of the input buffers for its output, so if we computed parity first, we would
- corrupt the input for the q calculation.
-
- We treat this identically to the regularPQ case, ignoring the failedPDA extra argument.
-*/
-
-void
-rf_Degraded_100_PQFunc(node)
- RF_DagNode_t *node;
-{
- int np = node->numParams;
-
- RF_ASSERT(np >= 2);
- DegrQSubr(node);
- rf_RecoveryXorFunc(node);
-}
-
-
-/*
- The two below are used when reading a stripe with a single lost data unit.
- The parameters are
-
- pda_0, buffer_0, .... pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr
-
- and results[0] contains the data buffer. Which is originally zero-filled.
-
-*/
-
-/* this Q func is used by the degraded-mode dag functions to recover lost data.
- * the second-to-last parameter is the PDA for the failed portion of the access.
- * the code here looks at this PDA and assumes that the xor target buffer is
- * equal in size to the number of sectors in the failed PDA. It then uses
- * the other PDAs in the parameter list to determine where within the target
- * buffer the corresponding data should be xored.
- *
- * Recall the basic equation is
- *
- * Q = ( data_1 + 2 * data_2 ... + k * data_k ) mod 256
- *
- * so to recover data_j we need
- *
- * J data_j = (Q - data_1 - 2 data_2 ....- k* data_k) mod 256
- *
- * So the coefficient for each buffer is (255 - data_col), and j should be initialized by
- * copying Q into it. Then we need to do a table lookup to convert to solve
- * data_j /= J
- *
- *
- */
-int
-rf_RecoveryQFunc(node)
- RF_DagNode_t *node;
-{
- RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
- RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
- RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
- int i;
- RF_PhysDiskAddr_t *pda;
- RF_RaidAddr_t suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
- char *srcbuf, *destbuf;
- RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
- RF_Etimer_t timer;
- unsigned long coeff;
-
- RF_ETIMER_START(timer);
- /* start by copying Q into the buffer */
- bcopy(node->params[node->numParams - 3].p, node->results[0],
- rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
- for (i = 0; i < node->numParams - 4; i += 2) {
- RF_ASSERT(node->params[i + 1].p != node->results[0]);
- pda = (RF_PhysDiskAddr_t *) node->params[i].p;
- srcbuf = (char *) node->params[i + 1].p;
- suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
- destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
- coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress);
- /* compute the data unit offset within the column */
- coeff = (coeff % raidPtr->Layout.numDataCol);
- rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
- }
- /* Do the nasty inversion now */
- coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), failedPDA->startSector) % raidPtr->Layout.numDataCol);
- rf_InvertQ(node->results[0], node->results[0], rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- tracerec->q_us += RF_ETIMER_VAL_US(timer);
- rf_GenericWakeupFunc(node, 0);
- return (0);
-}
-
-int
-rf_RecoveryPQFunc(node)
- RF_DagNode_t *node;
-{
- RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
- printf("raid%d: Recovery from PQ not implemented.\n",raidPtr->raidid);
- return (1);
-}
-/*
- Degraded write Q subroutine.
- Used when P is dead.
- Large-write style Q computation.
- Parameters
-
- (pda,buf),(pda,buf),.....,(failedPDA,bufPtr),failedPDA,raidPtr.
-
- We ignore failedPDA.
-
- This is a "simple style" recovery func.
-*/
-
-void
-rf_PQ_DegradedWriteQFunc(node)
- RF_DagNode_t *node;
-{
- int np = node->numParams;
- int d;
- RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
- unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
- int i;
- RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
- RF_Etimer_t timer;
- char *qbuf = node->results[0];
- char *obuf, *qpbuf;
- RF_PhysDiskAddr_t *old;
- unsigned long coeff;
- int fail_start, j;
-
- old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
- fail_start = old->startSector % secPerSU;
-
- RF_ETIMER_START(timer);
-
- d = (np - 2) / 2;
- RF_ASSERT(2 * d + 2 == np);
-
- for (i = 0; i < d; i++) {
- old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
- obuf = (char *) node->params[2 * i + 1].p;
- coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
- /* compute the data unit offset within the column, then add
- * one */
- coeff = (coeff % raidPtr->Layout.numDataCol);
- j = old->startSector % secPerSU;
- RF_ASSERT(j >= fail_start);
- qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
- rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
- }
-
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- tracerec->q_us += RF_ETIMER_VAL_US(timer);
- rf_GenericWakeupFunc(node, 0);
-}
-
-
-
-
-/* Q computations */
-
-/*
- coeff - colummn;
-
- compute dest ^= qfor[28-coeff][rn[coeff+1] a]
-
- on 5-bit basis;
- length in bytes;
-*/
-
-void
-rf_IncQ(dest, buf, length, coeff)
- unsigned long *dest;
- unsigned long *buf;
- unsigned length;
- unsigned coeff;
-{
- unsigned long a, d, new;
- unsigned long a1, a2;
- unsigned int *q = &(rf_qfor[28 - coeff][0]);
- unsigned r = rf_rn[coeff + 1];
-
-#define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f)
-#define INSERT(a,i) (a << (5L*i))
-
- length /= 8;
- /* 13 5 bit quants in a 64 bit word */
- while (length) {
- a = *buf++;
- d = *dest;
- a1 = EXTRACT(a, 0) ^ r;
- a2 = EXTRACT(a, 1) ^ r;
- new = INSERT(a2, 1) | a1;
- a1 = EXTRACT(a, 2) ^ r;
- a2 = EXTRACT(a, 3) ^ r;
- a1 = q[a1];
- a2 = q[a2];
- new = new | INSERT(a1, 2) | INSERT(a2, 3);
- a1 = EXTRACT(a, 4) ^ r;
- a2 = EXTRACT(a, 5) ^ r;
- a1 = q[a1];
- a2 = q[a2];
- new = new | INSERT(a1, 4) | INSERT(a2, 5);
- a1 = EXTRACT(a, 5) ^ r;
- a2 = EXTRACT(a, 6) ^ r;
- a1 = q[a1];
- a2 = q[a2];
- new = new | INSERT(a1, 5) | INSERT(a2, 6);
-#if RF_LONGSHIFT > 2
- a1 = EXTRACT(a, 7) ^ r;
- a2 = EXTRACT(a, 8) ^ r;
- a1 = q[a1];
- a2 = q[a2];
- new = new | INSERT(a1, 7) | INSERT(a2, 8);
- a1 = EXTRACT(a, 9) ^ r;
- a2 = EXTRACT(a, 10) ^ r;
- a1 = q[a1];
- a2 = q[a2];
- new = new | INSERT(a1, 9) | INSERT(a2, 10);
- a1 = EXTRACT(a, 11) ^ r;
- a2 = EXTRACT(a, 12) ^ r;
- a1 = q[a1];
- a2 = q[a2];
- new = new | INSERT(a1, 11) | INSERT(a2, 12);
-#endif /* RF_LONGSHIFT > 2 */
- d ^= new;
- *dest++ = d;
- length--;
- }
-}
-/*
- compute
-
- dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new) ]
-
- on a five bit basis.
- optimization: compute old ^ new on 64 bit basis.
-
- length in bytes.
-*/
-
-static void
-QDelta(
- char *dest,
- char *obuf,
- char *nbuf,
- unsigned length,
- unsigned char coeff)
-{
- unsigned long a, d, new;
- unsigned long a1, a2;
- unsigned int *q = &(rf_qfor[28 - coeff][0]);
- unsigned int r = rf_rn[coeff + 1];
-
- r = a1 = a2 = new = d = a = 0; /* XXX for now... */
- q = NULL; /* XXX for now */
-
-#ifdef _KERNEL
- /* PQ in kernel currently not supported because the encoding/decoding
- * table is not present */
- bzero(dest, length);
-#else /* KERNEL */
- /* this code probably doesn't work and should be rewritten -wvcii */
- /* 13 5 bit quants in a 64 bit word */
- length /= 8;
- while (length) {
- a = *obuf++; /* XXX need to reorg to avoid cache conflicts */
- a ^= *nbuf++;
- d = *dest;
- a1 = EXTRACT(a, 0) ^ r;
- a2 = EXTRACT(a, 1) ^ r;
- a1 = q[a1];
- a2 = q[a2];
- new = INSERT(a2, 1) | a1;
- a1 = EXTRACT(a, 2) ^ r;
- a2 = EXTRACT(a, 3) ^ r;
- a1 = q[a1];
- a2 = q[a2];
- new = new | INSERT(a1, 2) | INSERT(a2, 3);
- a1 = EXTRACT(a, 4) ^ r;
- a2 = EXTRACT(a, 5) ^ r;
- a1 = q[a1];
- a2 = q[a2];
- new = new | INSERT(a1, 4) | INSERT(a2, 5);
- a1 = EXTRACT(a, 5) ^ r;
- a2 = EXTRACT(a, 6) ^ r;
- a1 = q[a1];
- a2 = q[a2];
- new = new | INSERT(a1, 5) | INSERT(a2, 6);
-#if RF_LONGSHIFT > 2
- a1 = EXTRACT(a, 7) ^ r;
- a2 = EXTRACT(a, 8) ^ r;
- a1 = q[a1];
- a2 = q[a2];
- new = new | INSERT(a1, 7) | INSERT(a2, 8);
- a1 = EXTRACT(a, 9) ^ r;
- a2 = EXTRACT(a, 10) ^ r;
- a1 = q[a1];
- a2 = q[a2];
- new = new | INSERT(a1, 9) | INSERT(a2, 10);
- a1 = EXTRACT(a, 11) ^ r;
- a2 = EXTRACT(a, 12) ^ r;
- a1 = q[a1];
- a2 = q[a2];
- new = new | INSERT(a1, 11) | INSERT(a2, 12);
-#endif /* RF_LONGSHIFT > 2 */
- d ^= new;
- *dest++ = d;
- length--;
- }
-#endif /* _KERNEL */
-}
-/*
- recover columns a and b from the given p and q into
- bufs abuf and bbuf. All bufs are word aligned.
- Length is in bytes.
-*/
-
-
-/*
- * XXX
- *
- * Everything about this seems wrong.
- */
-void
-rf_PQ_recover(pbuf, qbuf, abuf, bbuf, length, coeff_a, coeff_b)
- unsigned long *pbuf;
- unsigned long *qbuf;
- unsigned long *abuf;
- unsigned long *bbuf;
- unsigned length;
- unsigned coeff_a;
- unsigned coeff_b;
-{
- unsigned long p, q, a, a0, a1;
- int col = (29 * coeff_a) + coeff_b;
- unsigned char *q0 = &(rf_qinv[col][0]);
-
- length /= 8;
- while (length) {
- p = *pbuf++;
- q = *qbuf++;
- a0 = EXTRACT(p, 0);
- a1 = EXTRACT(q, 0);
- a = q0[a0 << 5 | a1];
-#define MF(i) \
- a0 = EXTRACT(p,i); \
- a1 = EXTRACT(q,i); \
- a = a | INSERT(q0[a0<<5 | a1],i)
-
- MF(1);
- MF(2);
- MF(3);
- MF(4);
- MF(5);
- MF(6);
-#if 0
- MF(7);
- MF(8);
- MF(9);
- MF(10);
- MF(11);
- MF(12);
-#endif /* 0 */
- *abuf++ = a;
- *bbuf++ = a ^ p;
- length--;
- }
-}
-/*
- Lost parity and a data column. Recover that data column.
- Assume col coeff is lost. Let q the contents of Q after
- all surviving data columns have been q-xored out of it.
- Then we have the equation
-
- q[28-coeff][a_i ^ r_i+1] = q
-
- but q is cyclic with period 31.
- So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] =
- q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} .
-
- so a_i = r_{coeff+1} ^ q[3+coeff][q]
-
- The routine is passed q buffer and the buffer
- the data is to be recoverd into. They can be the same.
-*/
-
-
-
-static void
-rf_InvertQ(
- unsigned long *qbuf,
- unsigned long *abuf,
- unsigned length,
- unsigned coeff)
-{
- unsigned long a, new;
- unsigned long a1, a2;
- unsigned int *q = &(rf_qfor[3 + coeff][0]);
- unsigned r = rf_rn[coeff + 1];
-
- /* 13 5 bit quants in a 64 bit word */
- length /= 8;
- while (length) {
- a = *qbuf++;
- a1 = EXTRACT(a, 0);
- a2 = EXTRACT(a, 1);
- a1 = r ^ q[a1];
- a2 = r ^ q[a2];
- new = INSERT(a2, 1) | a1;
-#define M(i,j) \
- a1 = EXTRACT(a,i); \
- a2 = EXTRACT(a,j); \
- a1 = r ^ q[a1]; \
- a2 = r ^ q[a2]; \
- new = new | INSERT(a1,i) | INSERT(a2,j)
-
- M(2, 3);
- M(4, 5);
- M(5, 6);
-#if RF_LONGSHIFT > 2
- M(7, 8);
- M(9, 10);
- M(11, 12);
-#endif /* RF_LONGSHIFT > 2 */
- *abuf++ = new;
- length--;
- }
-}
-#endif /* (RF_INCLUDE_DECL_PQ > 0) ||
- * (RF_INCLUDE_RAID6 > 0) */
diff --git a/sys/dev/raidframe/rf_pq.h b/sys/dev/raidframe/rf_pq.h
deleted file mode 100644
index 9a2ce23..0000000
--- a/sys/dev/raidframe/rf_pq.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_pq.h,v 1.3 1999/02/05 00:06:15 oster Exp $ */
-/*
- * rf_pq.h
- */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Daniel Stodolsky
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#ifndef _RF__RF_PQ_H_
-#define _RF__RF_PQ_H_
-
-#include <dev/raidframe/rf_archs.h>
-
-extern RF_RedFuncs_t rf_pFuncs;
-extern RF_RedFuncs_t rf_pRecoveryFuncs;
-
-int rf_RegularONPFunc(RF_DagNode_t * node);
-int rf_SimpleONPFunc(RF_DagNode_t * node);
-int rf_RecoveryPFunc(RF_DagNode_t * node);
-int rf_RegularPFunc(RF_DagNode_t * node);
-
-#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
-
-extern RF_RedFuncs_t rf_qFuncs;
-extern RF_RedFuncs_t rf_qRecoveryFuncs;
-extern RF_RedFuncs_t rf_pqRecoveryFuncs;
-
-void
-rf_PQDagSelect(RF_Raid_t * raidPtr, RF_IoType_t type,
- RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc);
-RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG);
-int rf_RegularONQFunc(RF_DagNode_t * node);
-int rf_SimpleONQFunc(RF_DagNode_t * node);
-RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG);
-int rf_RegularPQFunc(RF_DagNode_t * node);
-int rf_RegularQFunc(RF_DagNode_t * node);
-void rf_Degraded_100_PQFunc(RF_DagNode_t * node);
-int rf_RecoveryQFunc(RF_DagNode_t * node);
-int rf_RecoveryPQFunc(RF_DagNode_t * node);
-void rf_PQ_DegradedWriteQFunc(RF_DagNode_t * node);
-void
-rf_IncQ(unsigned long *dest, unsigned long *buf, unsigned length,
- unsigned coeff);
-void
-rf_PQ_recover(unsigned long *pbuf, unsigned long *qbuf, unsigned long *abuf,
- unsigned long *bbuf, unsigned length, unsigned coeff_a, unsigned coeff_b);
-
-#endif /* (RF_INCLUDE_DECL_PQ > 0) ||
- * (RF_INCLUDE_RAID6 > 0) */
-
-#endif /* !_RF__RF_PQ_H_ */
diff --git a/sys/dev/raidframe/rf_pqdeg.c b/sys/dev/raidframe/rf_pqdeg.c
deleted file mode 100644
index 0d3356c..0000000
--- a/sys/dev/raidframe/rf_pqdeg.c
+++ /dev/null
@@ -1,219 +0,0 @@
-/* $NetBSD: rf_pqdeg.c,v 1.5 2000/01/07 03:41:04 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Daniel Stodolsky
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#include <dev/raidframe/rf_archs.h>
-
-#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_dagutils.h>
-#include <dev/raidframe/rf_dagfuncs.h>
-#include <dev/raidframe/rf_dagffrd.h>
-#include <dev/raidframe/rf_dagffwr.h>
-#include <dev/raidframe/rf_dagdegrd.h>
-#include <dev/raidframe/rf_dagdegwr.h>
-#include <dev/raidframe/rf_etimer.h>
-#include <dev/raidframe/rf_pqdeg.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_pqdegdags.h>
-#include <dev/raidframe/rf_pq.h>
-
-/*
- Degraded mode dag functions for P+Q calculations.
-
- The following nomenclature is used.
-
- PQ_<D><P><Q>_Create{Large,Small}<Write|Read>DAG
-
- where <D><P><Q> are single digits representing the number of failed
- data units <D> (0,1,2), parity units <P> (0,1), and Q units <Q>, effecting
- the I/O. The reads have only PQ_<D><P><Q>_CreateReadDAG variants, while
- the single fault writes have both large and small write versions. (Single fault
- PQ is equivalent to normal mode raid 5 in many aspects.
-
- Some versions degenerate into the same case, and are grouped together below.
-*/
-
-/* Reads, single failure
-
- we have parity, so we can do a raid 5
- reconstruct read.
-*/
-
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_100_CreateReadDAG)
-{
- rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pRecoveryFuncs);
-}
-/* Reads double failure */
-
-/*
- Q is lost, but not parity
- so we can a raid 5 reconstruct read.
-*/
-
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_101_CreateReadDAG)
-{
- rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pRecoveryFuncs);
-}
-/*
- parity is lost, so we need to
- do a reconstruct read and recompute
- the data with Q.
-*/
-
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_110_CreateReadDAG)
-{
- RF_PhysDiskAddr_t *temp;
- /* swap P and Q pointers to fake out the DegradedReadDAG code */
- temp = asmap->parityInfo;
- asmap->parityInfo = asmap->qInfo;
- asmap->qInfo = temp;
- rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_qRecoveryFuncs);
-}
-/*
- Two data units are dead in this stripe, so we will need read
- both P and Q to reconstruct the data. Note that only
- one data unit we are reading may actually be missing.
-*/
-RF_CREATE_DAG_FUNC_DECL(rf_CreateDoubleDegradedReadDAG);
-RF_CREATE_DAG_FUNC_DECL(rf_CreateDoubleDegradedReadDAG)
-{
- rf_PQ_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList);
-}
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateReadDAG);
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateReadDAG)
-{
- rf_CreateDoubleDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList);
-}
-/* Writes, single failure */
-
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_100_CreateWriteDAG);
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_100_CreateWriteDAG)
-{
- if (asmap->numStripeUnitsAccessed != 1 &&
- asmap->failedPDAs[0]->numSector !=
- raidPtr->Layout.sectorsPerStripeUnit)
- RF_PANIC();
- rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp,
- flags, allocList, 2,
- (int (*) (RF_DagNode_t *)) rf_Degraded_100_PQFunc,
- RF_FALSE);
-}
-/* Dead P - act like a RAID 5 small write with parity = Q */
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_010_CreateSmallWriteDAG)
-{
- RF_PhysDiskAddr_t *temp;
- /* swap P and Q pointers to fake out the DegradedReadDAG code */
- temp = asmap->parityInfo;
- asmap->parityInfo = asmap->qInfo;
- asmap->qInfo = temp;
- rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags,
- allocList, &rf_qFuncs, NULL);
-}
-/* Dead Q - act like a RAID 5 small write */
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_001_CreateSmallWriteDAG)
-{
- rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags,
- allocList, &rf_pFuncs, NULL);
-}
-/* Dead P - act like a RAID 5 large write but for Q */
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_010_CreateLargeWriteDAG)
-{
- RF_PhysDiskAddr_t *temp;
- /* swap P and Q pointers to fake out the code */
- temp = asmap->parityInfo;
- asmap->parityInfo = asmap->qInfo;
- asmap->qInfo = temp;
- rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags,
- allocList, 1, rf_RegularQFunc, RF_FALSE);
-}
-/* Dead Q - act like a RAID 5 large write */
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_001_CreateLargeWriteDAG)
-{
- rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags,
- allocList, 1, rf_RegularPFunc, RF_FALSE);
-}
-
-
-/*
- * writes, double failure
- */
-
-/*
- * Lost P & Q - do a nonredundant write
- */
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_011_CreateWriteDAG)
-{
- rf_CreateNonRedundantWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
- RF_IO_TYPE_WRITE);
-}
-/*
- In the two cases below,
- A nasty case arises when the write a (strict) portion of a failed stripe unit
- and parts of another su. For now, we do not support this.
-*/
-
-/*
- Lost Data and P - do a Q write.
-*/
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_110_CreateWriteDAG)
-{
- RF_PhysDiskAddr_t *temp;
-
- if (asmap->numStripeUnitsAccessed != 1 &&
- asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) {
- RF_PANIC();
- }
- /* swap P and Q to fake out parity code */
- temp = asmap->parityInfo;
- asmap->parityInfo = asmap->qInfo;
- asmap->qInfo = temp;
- rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags,
- allocList, 1,
- (int (*) (RF_DagNode_t *)) rf_PQ_DegradedWriteQFunc,
- RF_FALSE);
- /* is the regular Q func the right one to call? */
-}
-/*
- Lost Data and Q - do degraded mode P write
-*/
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_101_CreateWriteDAG)
-{
- if (asmap->numStripeUnitsAccessed != 1 &&
- asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit)
- RF_PANIC();
- rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags,
- allocList, 1, rf_RecoveryXorFunc, RF_FALSE);
-}
-#endif /* (RF_INCLUDE_DECL_PQ > 0) ||
- * (RF_INCLUDE_RAID6 > 0) */
diff --git a/sys/dev/raidframe/rf_pqdeg.h b/sys/dev/raidframe/rf_pqdeg.h
deleted file mode 100644
index 83371e6..0000000
--- a/sys/dev/raidframe/rf_pqdeg.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_pqdeg.h,v 1.3 1999/02/05 00:06:15 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Daniel Stodolsky
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#ifndef _RF__RF_PQDEG_H_
-#define _RF__RF_PQDEG_H_
-
-#include <dev/raidframe/rf_types.h>
-
-#if RF_UTILITY == 0
-#include <dev/raidframe/rf_dag.h>
-
-/* extern decl's of the failure mode PQ functions.
- * See pddeg.c for nomenclature discussion.
- */
-
-/* reads, single failure */
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_100_CreateReadDAG);
-/* reads, two failure */
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_110_CreateReadDAG);
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_101_CreateReadDAG);
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateReadDAG);
-
-/* writes, single failure */
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_100_CreateWriteDAG);
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_010_CreateSmallWriteDAG);
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_010_CreateLargeWriteDAG);
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_001_CreateSmallWriteDAG);
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_001_CreateLargeWriteDAG);
-
-/* writes, double failure */
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_011_CreateWriteDAG);
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_110_CreateWriteDAG);
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_101_CreateWriteDAG);
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateWriteDAG);
-#endif /* RF_UTILITY == 0 */
-
-typedef RF_uint32 RF_ua32_t[32];
-typedef RF_uint8 RF_ua1024_t[1024];
-
-extern RF_ua32_t rf_rn;
-extern RF_ua32_t rf_qfor[32];
-#ifndef _KERNEL /* we don't support PQ in the kernel yet, so
- * don't link in this monster table */
-extern RF_ua1024_t rf_qinv[29 * 29];
-#else /* !_KERNEL */
-extern RF_ua1024_t rf_qinv[1];
-#endif /* !_KERNEL */
-
-#endif /* !_RF__RF_PQDEG_H_ */
diff --git a/sys/dev/raidframe/rf_pqdegdags.c b/sys/dev/raidframe/rf_pqdegdags.c
deleted file mode 100644
index 3606005..0000000
--- a/sys/dev/raidframe/rf_pqdegdags.c
+++ /dev/null
@@ -1,432 +0,0 @@
-/* $NetBSD: rf_pqdegdags.c,v 1.5 1999/08/15 02:36:40 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Daniel Stodolsky
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*
- * rf_pqdegdags.c
- * Degraded mode dags for double fault cases.
-*/
-
-
-#include <dev/raidframe/rf_archs.h>
-
-#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_dagdegrd.h>
-#include <dev/raidframe/rf_dagdegwr.h>
-#include <dev/raidframe/rf_dagfuncs.h>
-#include <dev/raidframe/rf_dagutils.h>
-#include <dev/raidframe/rf_etimer.h>
-#include <dev/raidframe/rf_acctrace.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_pqdegdags.h>
-#include <dev/raidframe/rf_pq.h>
-
-static void
-applyPDA(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * pda, RF_PhysDiskAddr_t * ppda,
- RF_PhysDiskAddr_t * qpda, void *bp);
-
-/*
- Two data drives have failed, and we are doing a read that covers one of them.
- We may also be reading some of the surviving drives.
-
-
- *****************************************************************************************
- *
- * creates a DAG to perform a degraded-mode read of data within one stripe.
- * This DAG is as follows:
- *
- * Hdr
- * |
- * Block
- * / / \ \ \ \
- * Rud ... Rud Rrd ... Rrd Rp Rq
- * | \ | \ | \ | \ | \ | \
- *
- * | |
- * Unblock X
- * \ /
- * ------ T ------
- *
- * Each R node is a successor of the L node
- * One successor arc from each R node goes to U, and the other to X
- * There is one Rud for each chunk of surviving user data requested by the user,
- * and one Rrd for each chunk of surviving user data _not_ being read by the user
- * R = read, ud = user data, rd = recovery (surviving) data, p = P data, q = Qdata
- * X = pq recovery node, T = terminate
- *
- * The block & unblock nodes are leftovers from a previous version. They
- * do nothing, but I haven't deleted them because it would be a tremendous
- * effort to put them back in.
- *
- * Note: The target buffer for the XOR node is set to the actual user buffer where the
- * failed data is supposed to end up. This buffer is zero'd by the code here. Thus,
- * if you create a degraded read dag, use it, and then re-use, you have to be sure to
- * zero the target buffer prior to the re-use.
- *
- * Every buffer read is passed to the pq recovery node, whose job it is to sort out whats
- * needs and what's not.
- ****************************************************************************************/
-/* init a disk node with 2 successors and one predecessor */
-#define INIT_DISK_NODE(node,name) \
-rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 2,1,4,0, dag_h, name, allocList); \
-(node)->succedents[0] = unblockNode; \
-(node)->succedents[1] = recoveryNode; \
-(node)->antecedents[0] = blockNode; \
-(node)->antType[0] = rf_control
-
-#define DISK_NODE_PARAMS(_node_,_p_) \
- (_node_).params[0].p = _p_ ; \
- (_node_).params[1].p = (_p_)->bufPtr; \
- (_node_).params[2].v = parityStripeID; \
- (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru)
-
-#define DISK_NODE_PDA(node) ((node)->params[0].p)
-
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_DoubleDegRead)
-{
- rf_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList,
- "Rq", "PQ Recovery", rf_PQDoubleRecoveryFunc);
-}
-
-static void
-applyPDA(raidPtr, pda, ppda, qpda, bp)
- RF_Raid_t *raidPtr;
- RF_PhysDiskAddr_t *pda;
- RF_PhysDiskAddr_t *ppda;
- RF_PhysDiskAddr_t *qpda;
- void *bp;
-{
- RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
- RF_RaidAddr_t s0off = rf_StripeUnitOffset(layoutPtr, ppda->startSector);
- RF_SectorCount_t s0len = ppda->numSector, len;
- RF_SectorNum_t suoffset;
- unsigned coeff;
- char *pbuf = ppda->bufPtr;
- char *qbuf = qpda->bufPtr;
- char *buf;
- int delta;
-
- suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
- len = pda->numSector;
- /* see if pda intersects a recovery pda */
- if ((suoffset < s0off + s0len) && (suoffset + len > s0off)) {
- buf = pda->bufPtr;
- coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress);
- coeff = (coeff % raidPtr->Layout.numDataCol);
-
- if (suoffset < s0off) {
- delta = s0off - suoffset;
- buf += rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), delta);
- suoffset = s0off;
- len -= delta;
- }
- if (suoffset > s0off) {
- delta = suoffset - s0off;
- pbuf += rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), delta);
- qbuf += rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), delta);
- }
- if ((suoffset + len) > (s0len + s0off))
- len = s0len + s0off - suoffset;
-
- /* src, dest, len */
- rf_bxor(buf, pbuf, rf_RaidAddressToByte(raidPtr, len), bp);
-
- /* dest, src, len, coeff */
- rf_IncQ((unsigned long *) qbuf, (unsigned long *) buf, rf_RaidAddressToByte(raidPtr, len), coeff);
- }
-}
-/*
- Recover data in the case of a double failure. There can be two
- result buffers, one for each chunk of data trying to be recovered.
- The params are pda's that have not been range restricted or otherwise
- politely massaged - this should be done here. The last params are the
- pdas of P and Q, followed by the raidPtr. The list can look like
-
- pda, pda, ... , p pda, q pda, raidptr, asm
-
- or
-
- pda, pda, ... , p_1 pda, p_2 pda, q_1 pda, q_2 pda, raidptr, asm
-
- depending on wether two chunks of recovery data were required.
-
- The second condition only arises if there are two failed buffers
- whose lengths do not add up a stripe unit.
-*/
-
-
-int
-rf_PQDoubleRecoveryFunc(node)
- RF_DagNode_t *node;
-{
- int np = node->numParams;
- RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
- RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
- RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
- int d, i;
- unsigned coeff;
- RF_RaidAddr_t sosAddr, suoffset;
- RF_SectorCount_t len, secPerSU = layoutPtr->sectorsPerStripeUnit;
- int two = 0;
- RF_PhysDiskAddr_t *ppda, *ppda2, *qpda, *qpda2, *pda, npda;
- char *buf;
- int numDataCol = layoutPtr->numDataCol;
- RF_Etimer_t timer;
- RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
-
- RF_ETIMER_START(timer);
-
- if (asmap->failedPDAs[1] &&
- (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) {
- RF_ASSERT(0);
- ppda = node->params[np - 6].p;
- ppda2 = node->params[np - 5].p;
- qpda = node->params[np - 4].p;
- qpda2 = node->params[np - 3].p;
- d = (np - 6);
- two = 1;
- } else {
- ppda = node->params[np - 4].p;
- qpda = node->params[np - 3].p;
- d = (np - 4);
- }
-
- for (i = 0; i < d; i++) {
- pda = node->params[i].p;
- buf = pda->bufPtr;
- suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
- len = pda->numSector;
- coeff = rf_RaidAddressToStripeUnitID(layoutPtr, pda->raidAddress);
- /* compute the data unit offset within the column */
- coeff = (coeff % raidPtr->Layout.numDataCol);
- /* see if pda intersects a recovery pda */
- applyPDA(raidPtr, pda, ppda, qpda, node->dagHdr->bp);
- if (two)
- applyPDA(raidPtr, pda, ppda, qpda, node->dagHdr->bp);
- }
-
- /* ok, we got the parity back to the point where we can recover. We
- * now need to determine the coeff of the columns that need to be
- * recovered. We can also only need to recover a single stripe unit. */
-
- if (asmap->failedPDAs[1] == NULL) { /* only a single stripe unit
- * to recover. */
- pda = asmap->failedPDAs[0];
- sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
- /* need to determine the column of the other failed disk */
- coeff = rf_RaidAddressToStripeUnitID(layoutPtr, pda->raidAddress);
- /* compute the data unit offset within the column */
- coeff = (coeff % raidPtr->Layout.numDataCol);
- for (i = 0; i < numDataCol; i++) {
- npda.raidAddress = sosAddr + (i * secPerSU);
- (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0);
- /* skip over dead disks */
- if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
- if (i != coeff)
- break;
- }
- RF_ASSERT(i < numDataCol);
- RF_ASSERT(two == 0);
- /* recover the data. Since we need only want to recover one
- * column, we overwrite the parity with the other one. */
- if (coeff < i) /* recovering 'a' */
- rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) pda->bufPtr, (unsigned long *) ppda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff, i);
- else /* recovering 'b' */
- rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, (unsigned long *) pda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), i, coeff);
- } else
- RF_PANIC();
-
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- if (tracerec)
- tracerec->q_us += RF_ETIMER_VAL_US(timer);
- rf_GenericWakeupFunc(node, 0);
- return (0);
-}
-
-int
-rf_PQWriteDoubleRecoveryFunc(node)
- RF_DagNode_t *node;
-{
- /* The situation:
- *
- * We are doing a write that hits only one failed data unit. The other
- * failed data unit is not being overwritten, so we need to generate
- * it.
- *
- * For the moment, we assume all the nonfailed data being written is in
- * the shadow of the failed data unit. (i.e,, either a single data
- * unit write or the entire failed stripe unit is being overwritten. )
- *
- * Recovery strategy: apply the recovery data to the parity and q. Use P
- * & Q to recover the second failed data unit in P. Zero fill Q, then
- * apply the recovered data to p. Then apply the data being written to
- * the failed drive. Then walk through the surviving drives, applying
- * new data when it exists, othewise the recovery data. Quite a mess.
- *
- *
- * The params
- *
- * read pda0, read pda1, ... read pda (numDataCol-3), write pda0, ... ,
- * write pda (numStripeUnitAccess - numDataFailed), failed pda,
- * raidPtr, asmap */
-
- int np = node->numParams;
- RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
- RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
- RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
- int i;
- RF_RaidAddr_t sosAddr;
- unsigned coeff;
- RF_StripeCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
- RF_PhysDiskAddr_t *ppda, *qpda, *pda, npda;
- int numDataCol = layoutPtr->numDataCol;
- RF_Etimer_t timer;
- RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
-
- RF_ASSERT(node->numResults == 2);
- RF_ASSERT(asmap->failedPDAs[1] == NULL);
- RF_ETIMER_START(timer);
- ppda = node->results[0];
- qpda = node->results[1];
- /* apply the recovery data */
- for (i = 0; i < numDataCol - 2; i++)
- applyPDA(raidPtr, node->params[i].p, ppda, qpda, node->dagHdr->bp);
-
- /* determine the other failed data unit */
- pda = asmap->failedPDAs[0];
- sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
- /* need to determine the column of the other failed disk */
- coeff = rf_RaidAddressToStripeUnitID(layoutPtr, pda->raidAddress);
- /* compute the data unit offset within the column */
- coeff = (coeff % raidPtr->Layout.numDataCol);
- for (i = 0; i < numDataCol; i++) {
- npda.raidAddress = sosAddr + (i * secPerSU);
- (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0);
- /* skip over dead disks */
- if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
- if (i != coeff)
- break;
- }
- RF_ASSERT(i < numDataCol);
- /* recover the data. The column we want to recover we write over the
- * parity. The column we don't care about we dump in q. */
- if (coeff < i) /* recovering 'a' */
- rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff, i);
- else /* recovering 'b' */
- rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), i, coeff);
-
- /* OK. The valid data is in P. Zero fill Q, then inc it into it. */
- bzero(qpda->bufPtr, rf_RaidAddressToByte(raidPtr, qpda->numSector));
- rf_IncQ((unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, rf_RaidAddressToByte(raidPtr, qpda->numSector), i);
-
- /* now apply all the write data to the buffer */
- /* single stripe unit write case: the failed data is only thing we are
- * writing. */
- RF_ASSERT(asmap->numStripeUnitsAccessed == 1);
- /* dest, src, len, coeff */
- rf_IncQ((unsigned long *) qpda->bufPtr, (unsigned long *) asmap->failedPDAs[0]->bufPtr, rf_RaidAddressToByte(raidPtr, qpda->numSector), coeff);
- rf_bxor(asmap->failedPDAs[0]->bufPtr, ppda->bufPtr, rf_RaidAddressToByte(raidPtr, ppda->numSector), node->dagHdr->bp);
-
- /* now apply all the recovery data */
- for (i = 0; i < numDataCol - 2; i++)
- applyPDA(raidPtr, node->params[i].p, ppda, qpda, node->dagHdr->bp);
-
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- if (tracerec)
- tracerec->q_us += RF_ETIMER_VAL_US(timer);
-
- rf_GenericWakeupFunc(node, 0);
- return (0);
-}
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_DDLargeWrite)
-{
- RF_PANIC();
-}
-/*
- Two lost data unit write case.
-
- There are really two cases here:
-
- (1) The write completely covers the two lost data units.
- In that case, a reconstruct write that doesn't write the
- failed data units will do the correct thing. So in this case,
- the dag looks like
-
- full stripe read of surviving data units (not being overwriten)
- write new data (ignoring failed units) compute P&Q
- write P&Q
-
-
- (2) The write does not completely cover both failed data units
- (but touches at least one of them). Then we need to do the
- equivalent of a reconstruct read to recover the missing data
- unit from the other stripe.
-
- For any data we are writing that is not in the "shadow"
- of the failed units, we need to do a four cycle update.
- PANIC on this case. for now
-
-*/
-
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateWriteDAG)
-{
- RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
- RF_SectorCount_t sectorsPerSU = layoutPtr->sectorsPerStripeUnit;
- int sum;
- int nf = asmap->numDataFailed;
-
- sum = asmap->failedPDAs[0]->numSector;
- if (nf == 2)
- sum += asmap->failedPDAs[1]->numSector;
-
- if ((nf == 2) && (sum == (2 * sectorsPerSU))) {
- /* large write case */
- rf_PQ_DDLargeWrite(raidPtr, asmap, dag_h, bp, flags, allocList);
- return;
- }
- if ((nf == asmap->numStripeUnitsAccessed) || (sum >= sectorsPerSU)) {
- /* small write case, no user data not in shadow */
- rf_PQ_DDSimpleSmallWrite(raidPtr, asmap, dag_h, bp, flags, allocList);
- return;
- }
- RF_PANIC();
-}
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_DDSimpleSmallWrite)
-{
- rf_DoubleDegSmallWrite(raidPtr, asmap, dag_h, bp, flags, allocList, "Rq", "Wq", "PQ Recovery", rf_PQWriteDoubleRecoveryFunc);
-}
-#endif /* (RF_INCLUDE_DECL_PQ > 0) ||
- * (RF_INCLUDE_RAID6 > 0) */
diff --git a/sys/dev/raidframe/rf_pqdegdags.h b/sys/dev/raidframe/rf_pqdegdags.h
deleted file mode 100644
index 11ce820..0000000
--- a/sys/dev/raidframe/rf_pqdegdags.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_pqdegdags.h,v 1.3 1999/02/05 00:06:15 oster Exp $ */
-/*
- * rf_pqdegdags.h
- */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Daniel Stodolsky
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-/*
- * rf_pqdegdags.c
- * Degraded mode dags for double fault cases.
- */
-
-#ifndef _RF__RF_PQDEGDAGS_H_
-#define _RF__RF_PQDEGDAGS_H_
-
-#include <dev/raidframe/rf_dag.h>
-
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_DoubleDegRead);
-int rf_PQDoubleRecoveryFunc(RF_DagNode_t * node);
-int rf_PQWriteDoubleRecoveryFunc(RF_DagNode_t * node);
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_DDLargeWrite);
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_DDSimpleSmallWrite);
-RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateWriteDAG);
-
-#endif /* !_RF__RF_PQDEGDAGS_H_ */
diff --git a/sys/dev/raidframe/rf_psstatus.c b/sys/dev/raidframe/rf_psstatus.c
deleted file mode 100644
index a6968cf..0000000
--- a/sys/dev/raidframe/rf_psstatus.c
+++ /dev/null
@@ -1,378 +0,0 @@
-/* $NetBSD: rf_psstatus.c,v 1.5 2000/01/08 22:57:31 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*****************************************************************************
- *
- * psstatus.c
- *
- * The reconstruction code maintains a bunch of status related to the parity
- * stripes that are currently under reconstruction. This header file defines
- * the status structures.
- *
- *****************************************************************************/
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_debugprint.h>
-#include <dev/raidframe/rf_freelist.h>
-#include <dev/raidframe/rf_psstatus.h>
-#include <dev/raidframe/rf_shutdown.h>
-
-#define Dprintf1(s,a) if (rf_pssDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL)
-#define Dprintf2(s,a,b) if (rf_pssDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL)
-#define Dprintf3(s,a,b,c) if (rf_pssDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL)
-
-static void
-RealPrintPSStatusTable(RF_Raid_t * raidPtr,
- RF_PSStatusHeader_t * pssTable);
-
-#define RF_MAX_FREE_PSS 32
-#define RF_PSS_INC 8
-#define RF_PSS_INITIAL 4
-
-static int init_pss(RF_ReconParityStripeStatus_t *, RF_Raid_t *);
-static void clean_pss(RF_ReconParityStripeStatus_t *, RF_Raid_t *);
-static void rf_ShutdownPSStatus(void *);
-
-static int
-init_pss(p, raidPtr)
- RF_ReconParityStripeStatus_t *p;
- RF_Raid_t *raidPtr;
-{
- RF_Calloc(p->issued, raidPtr->numCol, sizeof(char), (char *));
- if (p->issued == NULL)
- return (ENOMEM);
- return (0);
-}
-
-static void
-clean_pss(p, raidPtr)
- RF_ReconParityStripeStatus_t *p;
- RF_Raid_t *raidPtr;
-{
- RF_Free(p->issued, raidPtr->numCol * sizeof(char));
-}
-
-static void
-rf_ShutdownPSStatus(arg)
- void *arg;
-{
- RF_Raid_t *raidPtr = (RF_Raid_t *) arg;
-
- RF_FREELIST_DESTROY_CLEAN_ARG(raidPtr->pss_freelist, next, (RF_ReconParityStripeStatus_t *), clean_pss, raidPtr);
-}
-
-int
-rf_ConfigurePSStatus(
- RF_ShutdownList_t ** listp,
- RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr)
-{
- int rc;
-
- raidPtr->pssTableSize = RF_PSS_DEFAULT_TABLESIZE;
- RF_FREELIST_CREATE(raidPtr->pss_freelist, RF_MAX_FREE_PSS,
- RF_PSS_INC, sizeof(RF_ReconParityStripeStatus_t));
- if (raidPtr->pss_freelist == NULL)
- return (ENOMEM);
- rc = rf_ShutdownCreate(listp, rf_ShutdownPSStatus, raidPtr);
- if (rc) {
- RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n",
- __FILE__, __LINE__, rc);
- rf_ShutdownPSStatus(raidPtr);
- return (rc);
- }
- RF_FREELIST_PRIME_INIT_ARG(raidPtr->pss_freelist, RF_PSS_INITIAL, next,
- (RF_ReconParityStripeStatus_t *), init_pss, raidPtr);
- return (0);
-}
-/*****************************************************************************************
- * sets up the pss table
- * We pre-allocate a bunch of entries to avoid as much as possible having to
- * malloc up hash chain entries.
- ****************************************************************************************/
-RF_PSStatusHeader_t *
-rf_MakeParityStripeStatusTable(raidPtr)
- RF_Raid_t *raidPtr;
-{
- RF_PSStatusHeader_t *pssTable;
- int i, j, rc;
-
- RF_Calloc(pssTable, raidPtr->pssTableSize, sizeof(RF_PSStatusHeader_t), (RF_PSStatusHeader_t *));
- for (i = 0; i < raidPtr->pssTableSize; i++) {
- rc = rf_mutex_init(&pssTable[i].mutex, __FUNCTION__);
- if (rc) {
- RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- /* fail and deallocate */
- for (j = 0; j < i; j++) {
- rf_mutex_destroy(&pssTable[i].mutex);
- }
- RF_Free(pssTable, raidPtr->pssTableSize * sizeof(RF_PSStatusHeader_t));
- return (NULL);
- }
- }
- return (pssTable);
-}
-
-void
-rf_FreeParityStripeStatusTable(raidPtr, pssTable)
- RF_Raid_t *raidPtr;
- RF_PSStatusHeader_t *pssTable;
-{
- int i;
-
- if (rf_pssDebug)
- RealPrintPSStatusTable(raidPtr, pssTable);
- for (i = 0; i < raidPtr->pssTableSize; i++) {
- if (pssTable[i].chain) {
- printf("ERROR: pss hash chain not null at recon shutdown\n");
- }
- rf_mutex_destroy(&pssTable[i].mutex);
- }
- RF_Free(pssTable, raidPtr->pssTableSize * sizeof(RF_PSStatusHeader_t));
-}
-
-
-/* looks up the status structure for a parity stripe.
- * if the create_flag is on, creates and returns the status structure it it doesn't exist
- * otherwise returns NULL if the status structure does not exist
- *
- * ASSUMES THE PSS DESCRIPTOR IS LOCKED UPON ENTRY
- */
-RF_ReconParityStripeStatus_t *
-rf_LookupRUStatus(
- RF_Raid_t * raidPtr,
- RF_PSStatusHeader_t * pssTable,
- RF_StripeNum_t psID,
- RF_ReconUnitNum_t which_ru,
- RF_PSSFlags_t flags, /* whether or not to create it if it doesn't
- * exist + what flags to set initially */
- int *created)
-{
- RF_PSStatusHeader_t *hdr = &pssTable[RF_HASH_PSID(raidPtr, psID)];
- RF_ReconParityStripeStatus_t *p, *pssPtr = hdr->chain;
-
- *created = 0;
- for (p = pssPtr; p; p = p->next) {
- if (p->parityStripeID == psID && p->which_ru == which_ru)
- break;
- }
-
- if (!p && (flags & RF_PSS_CREATE)) {
- Dprintf2("PSS: creating pss for psid %ld ru %d\n", psID, which_ru);
- p = rf_AllocPSStatus(raidPtr);
- p->next = hdr->chain;
- hdr->chain = p;
-
- p->parityStripeID = psID;
- p->which_ru = which_ru;
- p->flags = flags;
- p->rbuf = NULL;
- p->writeRbuf = NULL;
- p->blockCount = 0;
- p->procWaitList = NULL;
- p->blockWaitList = NULL;
- p->bufWaitList = NULL;
- *created = 1;
- } else
- if (p) { /* we didn't create, but we want to specify
- * some new status */
- p->flags |= flags; /* add in whatever flags we're
- * specifying */
- }
- if (p && (flags & RF_PSS_RECON_BLOCKED)) {
- p->blockCount++;/* if we're asking to block recon, bump the
- * count */
- Dprintf3("raid%d: Blocked recon on psid %ld. count now %d\n",
- raidPtr->raidid, psID, p->blockCount);
- }
- return (p);
-}
-/* deletes an entry from the parity stripe status table. typically used
- * when an entry has been allocated solely to block reconstruction, and
- * no recon was requested while recon was blocked. Assumes the hash
- * chain is ALREADY LOCKED.
- */
-void
-rf_PSStatusDelete(raidPtr, pssTable, pssPtr)
- RF_Raid_t *raidPtr;
- RF_PSStatusHeader_t *pssTable;
- RF_ReconParityStripeStatus_t *pssPtr;
-{
- RF_PSStatusHeader_t *hdr = &(pssTable[RF_HASH_PSID(raidPtr, pssPtr->parityStripeID)]);
- RF_ReconParityStripeStatus_t *p = hdr->chain, *pt = NULL;
-
- while (p) {
- if (p == pssPtr) {
- if (pt)
- pt->next = p->next;
- else
- hdr->chain = p->next;
- p->next = NULL;
- rf_FreePSStatus(raidPtr, p);
- return;
- }
- pt = p;
- p = p->next;
- }
- RF_ASSERT(0); /* we must find it here */
-}
-/* deletes an entry from the ps status table after reconstruction has completed */
-void
-rf_RemoveFromActiveReconTable(raidPtr, row, psid, which_ru)
- RF_Raid_t *raidPtr;
- RF_RowCol_t row;
- RF_ReconUnitNum_t which_ru;
- RF_StripeNum_t psid;
-{
- RF_PSStatusHeader_t *hdr = &(raidPtr->reconControl[row]->pssTable[RF_HASH_PSID(raidPtr, psid)]);
- RF_ReconParityStripeStatus_t *p, *pt;
- RF_CallbackDesc_t *cb, *cb1;
-
- RF_LOCK_MUTEX(hdr->mutex);
- for (pt = NULL, p = hdr->chain; p; pt = p, p = p->next) {
- if ((p->parityStripeID == psid) && (p->which_ru == which_ru))
- break;
- }
- if (p == NULL) {
- rf_PrintPSStatusTable(raidPtr, row);
- }
- RF_ASSERT(p); /* it must be there */
-
- Dprintf2("PSS: deleting pss for psid %ld ru %d\n", psid, which_ru);
-
- /* delete this entry from the hash chain */
- if (pt)
- pt->next = p->next;
- else
- hdr->chain = p->next;
- p->next = NULL;
-
- RF_UNLOCK_MUTEX(hdr->mutex);
-
- /* wakup anyone waiting on the parity stripe ID */
- cb = p->procWaitList;
- p->procWaitList = NULL;
- while (cb) {
- Dprintf1("Waking up access waiting on parity stripe ID %ld\n", p->parityStripeID);
- cb1 = cb->next;
- (cb->callbackFunc) (cb->callbackArg);
-
- /* THIS IS WHAT THE ORIGINAL CODE HAD... the extra 0 is bogus,
- * IMHO */
- /* (cb->callbackFunc)(cb->callbackArg, 0); */
- rf_FreeCallbackDesc(cb);
- cb = cb1;
- }
-
- rf_FreePSStatus(raidPtr, p);
-}
-
-RF_ReconParityStripeStatus_t *
-rf_AllocPSStatus(raidPtr)
- RF_Raid_t *raidPtr;
-{
- RF_ReconParityStripeStatus_t *p;
-
- RF_FREELIST_GET_INIT_ARG(raidPtr->pss_freelist, p, next, (RF_ReconParityStripeStatus_t *), init_pss, raidPtr);
- if (p) {
- bzero(p->issued, raidPtr->numCol);
- }
- p->next = NULL;
- /* no need to initialize here b/c the only place we're called from is
- * the above Lookup */
- return (p);
-}
-
-void
-rf_FreePSStatus(raidPtr, p)
- RF_Raid_t *raidPtr;
- RF_ReconParityStripeStatus_t *p;
-{
- RF_ASSERT(p->procWaitList == NULL);
- RF_ASSERT(p->blockWaitList == NULL);
- RF_ASSERT(p->bufWaitList == NULL);
-
- RF_FREELIST_FREE_CLEAN_ARG(raidPtr->pss_freelist, p, next, clean_pss, raidPtr);
-}
-
-static void
-RealPrintPSStatusTable(raidPtr, pssTable)
- RF_Raid_t *raidPtr;
- RF_PSStatusHeader_t *pssTable;
-{
- int i, j, procsWaiting, blocksWaiting, bufsWaiting;
- RF_ReconParityStripeStatus_t *p;
- RF_CallbackDesc_t *cb;
-
- printf("\nParity Stripe Status Table\n");
- for (i = 0; i < raidPtr->pssTableSize; i++) {
- for (p = pssTable[i].chain; p; p = p->next) {
- procsWaiting = blocksWaiting = bufsWaiting = 0;
- for (cb = p->procWaitList; cb; cb = cb->next)
- procsWaiting++;
- for (cb = p->blockWaitList; cb; cb = cb->next)
- blocksWaiting++;
- for (cb = p->bufWaitList; cb; cb = cb->next)
- bufsWaiting++;
- printf("PSID %ld RU %d : blockCount %d %d/%d/%d proc/block/buf waiting, issued ",
- (long) p->parityStripeID, p->which_ru, p->blockCount, procsWaiting, blocksWaiting, bufsWaiting);
- for (j = 0; j < raidPtr->numCol; j++)
- printf("%c", (p->issued[j]) ? '1' : '0');
- if (!p->flags)
- printf(" flags: (none)");
- else {
- if (p->flags & RF_PSS_UNDER_RECON)
- printf(" under-recon");
- if (p->flags & RF_PSS_FORCED_ON_WRITE)
- printf(" forced-w");
- if (p->flags & RF_PSS_FORCED_ON_READ)
- printf(" forced-r");
- if (p->flags & RF_PSS_RECON_BLOCKED)
- printf(" blocked");
- if (p->flags & RF_PSS_BUFFERWAIT)
- printf(" bufwait");
- }
- printf("\n");
- }
- }
-}
-
-void
-rf_PrintPSStatusTable(raidPtr, row)
- RF_Raid_t *raidPtr;
- RF_RowCol_t row;
-{
- RF_PSStatusHeader_t *pssTable = raidPtr->reconControl[row]->pssTable;
- RealPrintPSStatusTable(raidPtr, pssTable);
-}
diff --git a/sys/dev/raidframe/rf_psstatus.h b/sys/dev/raidframe/rf_psstatus.h
deleted file mode 100644
index c836d49..0000000
--- a/sys/dev/raidframe/rf_psstatus.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_psstatus.h,v 1.3 1999/02/05 00:06:15 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*****************************************************************************
- *
- * psstatus.h
- *
- * The reconstruction code maintains a bunch of status related to the parity
- * stripes that are currently under reconstruction. This header file defines
- * the status structures.
- *
- *****************************************************************************/
-
-#ifndef _RF__RF_PSSTATUS_H_
-#define _RF__RF_PSSTATUS_H_
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_threadstuff.h>
-#include <dev/raidframe/rf_callback.h>
-
-#define RF_PS_MAX_BUFS 10 /* max number of bufs we'll accumulate before
- * we do an XOR */
-
-#define RF_PSS_DEFAULT_TABLESIZE 200
-
-/*
- * Macros to acquire/release the mutex lock on a parity stripe status
- * descriptor. Note that we use just one lock for the whole hash chain.
- */
-#define RF_HASH_PSID(_raid_,_psid_) ( (_psid_) % ((_raid_)->pssTableSize) ) /* simple hash function */
-#define RF_LOCK_PSS_MUTEX(_raidPtr, _row, _psid) \
- RF_LOCK_MUTEX((_raidPtr)->reconControl[_row]->pssTable[ RF_HASH_PSID(_raidPtr,_psid) ].mutex)
-#define RF_UNLOCK_PSS_MUTEX(_raidPtr, _row, _psid) \
- RF_UNLOCK_MUTEX((_raidPtr)->reconControl[_row]->pssTable[ RF_HASH_PSID(_raidPtr,_psid) ].mutex)
-
-struct RF_ReconParityStripeStatus_s {
- RF_StripeNum_t parityStripeID; /* the parity stripe ID */
- RF_ReconUnitNum_t which_ru; /* which reconstruction unit with the
- * indicated parity stripe */
- RF_PSSFlags_t flags; /* flags indicating various conditions */
- void *rbuf; /* this is the accumulating xor sum */
- void *writeRbuf; /* DEBUG ONLY: a pointer to the rbuf after it
- * has filled & been sent to disk */
- void *rbufsForXor[RF_PS_MAX_BUFS]; /* these are buffers still to
- * be xored into the
- * accumulating sum */
- int xorBufCount; /* num buffers waiting to be xored */
- int blockCount; /* count of # proc that have blocked recon on
- * this parity stripe */
- char *issued; /* issued[i]==1 <=> column i has already
- * issued a read request for the indicated RU */
- RF_CallbackDesc_t *procWaitList; /* list of user procs waiting
- * for recon to be done */
- RF_CallbackDesc_t *blockWaitList; /* list of disks blocked
- * waiting for user write to
- * complete */
- RF_CallbackDesc_t *bufWaitList; /* list of disks blocked waiting to
- * acquire a buffer for this RU */
- RF_ReconParityStripeStatus_t *next;
-};
-
-struct RF_PSStatusHeader_s {
- RF_DECLARE_MUTEX(mutex) /* mutex for this hash chain */
- RF_ReconParityStripeStatus_t *chain; /* the hash chain */
-};
-/* masks for the "flags" field above */
-#define RF_PSS_NONE 0x00000000 /* no flags */
-#define RF_PSS_UNDER_RECON 0x00000001 /* this parity stripe is
- * currently under
- * reconstruction */
-#define RF_PSS_FORCED_ON_WRITE 0x00000002 /* indicates a recon was
- * forced due to a user-write
- * operation */
-#define RF_PSS_FORCED_ON_READ 0x00000004 /* ditto for read, but not
- * currently implemented */
-#define RF_PSS_RECON_BLOCKED 0x00000008 /* reconstruction is currently
- * blocked due to a pending
- * user I/O */
-#define RF_PSS_CREATE 0x00000010 /* tells LookupRUStatus to
- * create the entry */
-#define RF_PSS_BUFFERWAIT 0x00000020 /* someone is waiting for a
- * buffer for this RU */
-
-int
-rf_ConfigurePSStatus(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr);
-
-RF_PSStatusHeader_t *rf_MakeParityStripeStatusTable(RF_Raid_t * raidPtr);
-void
-rf_FreeParityStripeStatusTable(RF_Raid_t * raidPtr,
- RF_PSStatusHeader_t * pssTable);
-RF_ReconParityStripeStatus_t *
-rf_LookupRUStatus(RF_Raid_t * raidPtr,
- RF_PSStatusHeader_t * pssTable, RF_StripeNum_t psID,
- RF_ReconUnitNum_t which_ru, RF_PSSFlags_t flags, int *created);
-void
-rf_PSStatusDelete(RF_Raid_t * raidPtr, RF_PSStatusHeader_t * pssTable,
- RF_ReconParityStripeStatus_t * pssPtr);
-void
-rf_RemoveFromActiveReconTable(RF_Raid_t * raidPtr, RF_RowCol_t row,
- RF_StripeNum_t psid, RF_ReconUnitNum_t which_ru);
-RF_ReconParityStripeStatus_t *rf_AllocPSStatus(RF_Raid_t * raidPtr);
-void rf_FreePSStatus(RF_Raid_t * raidPtr, RF_ReconParityStripeStatus_t * p);
-void rf_PrintPSStatusTable(RF_Raid_t * raidPtr, RF_RowCol_t row);
-
-#endif /* !_RF__RF_PSSTATUS_H_ */
diff --git a/sys/dev/raidframe/rf_raid.h b/sys/dev/raidframe/rf_raid.h
deleted file mode 100644
index e91a2ae..0000000
--- a/sys/dev/raidframe/rf_raid.h
+++ /dev/null
@@ -1,299 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_raid.h,v 1.12 2000/02/24 17:12:10 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/**********************************************
- * rf_raid.h -- main header file for RAID driver
- **********************************************/
-
-
-#ifndef _RF__RF_RAID_H_
-#define _RF__RF_RAID_H_
-
-#include <dev/raidframe/rf_archs.h>
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_threadstuff.h>
-
-#include <dev/raidframe/rf_bsd.h>
-
-#include <sys/disklabel.h>
-#include <sys/types.h>
-
-#include <dev/raidframe/rf_alloclist.h>
-#include <dev/raidframe/rf_stripelocks.h>
-#include <dev/raidframe/rf_layout.h>
-#include <dev/raidframe/rf_disks.h>
-#include <dev/raidframe/rf_debugMem.h>
-#include <dev/raidframe/rf_diskqueue.h>
-#include <dev/raidframe/rf_reconstruct.h>
-#include <dev/raidframe/rf_acctrace.h>
-
-#if RF_INCLUDE_PARITYLOGGING > 0
-#include <dev/raidframe/rf_paritylog.h>
-#endif /* RF_INCLUDE_PARITYLOGGING > 0 */
-
-#define RF_MAX_DISKS 128 /* max disks per array */
-#if defined(__NetBSD__)
-#define RF_DEV2RAIDID(_dev) (DISKUNIT(_dev))
-#endif
-
-#define RF_COMPONENT_LABEL_VERSION_1 1
-#define RF_COMPONENT_LABEL_VERSION 2
-#define RF_RAID_DIRTY 0
-#define RF_RAID_CLEAN 1
-
-
-/*
- * Each row in the array is a distinct parity group, so
- * each has it's own status, which is one of the following.
- */
-typedef enum RF_RowStatus_e {
- rf_rs_optimal,
- rf_rs_degraded,
- rf_rs_reconstructing,
- rf_rs_reconfigured
-} RF_RowStatus_t;
-
-struct RF_CumulativeStats_s {
- struct timeval start; /* the time when the stats were last started */
- struct timeval stop; /* the time when the stats were last stopped */
- long sum_io_us; /* sum of all user response times (us) */
- long num_ios; /* total number of I/Os serviced */
- long num_sect_moved; /* total number of sectors read or written */
-};
-
-struct RF_ThroughputStats_s {
- RF_DECLARE_MUTEX(mutex) /* a mutex used to lock the configuration
- * stuff */
- struct timeval start; /* timer started when numOutstandingRequests
- * moves from 0 to 1 */
- struct timeval stop; /* timer stopped when numOutstandingRequests
- * moves from 1 to 0 */
- RF_uint64 sum_io_us; /* total time timer is enabled */
- RF_uint64 num_ios; /* total number of ios processed by RAIDframe */
- long num_out_ios; /* number of outstanding ios */
-};
-
-struct RF_Raid_s {
- /* This portion never changes, and can be accessed without locking */
- /* an exception is Disks[][].status, which requires locking when it is
- * changed. XXX this is no longer true. numSpare and friends can
- * change now.
- */
- u_int numRow; /* number of rows of disks, typically == # of
- * ranks */
- u_int numCol; /* number of columns of disks, typically == #
- * of disks/rank */
- u_int numSpare; /* number of spare disks */
- int maxQueueDepth; /* max disk queue depth */
- RF_SectorCount_t totalSectors; /* total number of sectors in the
- * array */
- RF_SectorCount_t sectorsPerDisk; /* number of sectors on each
- * disk */
- u_int logBytesPerSector; /* base-2 log of the number of bytes
- * in a sector */
- u_int bytesPerSector; /* bytes in a sector */
- RF_int32 sectorMask; /* mask of bytes-per-sector */
-
- RF_RaidLayout_t Layout; /* all information related to layout */
- RF_RaidDisk_t **Disks; /* all information related to physical disks */
- RF_DiskQueue_t **Queues;/* all information related to disk queues */
- RF_DiskQueueSW_t *qType;/* pointer to the DiskQueueSW used for the
- component queues. */
- /* NOTE: This is an anchor point via which the queues can be
- * accessed, but the enqueue/dequeue routines in diskqueue.c use a
- * local copy of this pointer for the actual accesses. */
- /* The remainder of the structure can change, and therefore requires
- * locking on reads and updates */
- RF_DECLARE_MUTEX(mutex) /* mutex used to serialize access to
- * the fields below */
- RF_RowStatus_t *status; /* the status of each row in the array */
- int valid; /* indicates successful configuration */
- RF_LockTableEntry_t *lockTable; /* stripe-lock table */
- RF_LockTableEntry_t *quiesceLock; /* quiesnce table */
- int numFailures; /* total number of failures in the array */
- int numNewFailures; /* number of *new* failures (that havn't
- caused a mod_counter update */
-
- int parity_good; /* !0 if parity is known to be correct */
- int serial_number; /* a "serial number" for this set */
- int mod_counter; /* modification counter for component labels */
- int clean; /* the clean bit for this array. */
-
- int openings; /* Number of IO's which can be scheduled
- simultaneously (high-level - not a
- per-component limit)*/
-
- int maxOutstanding; /* maxOutstanding requests (per-component) */
- int autoconfigure; /* automatically configure this RAID set.
- 0 == no, 1 == yes */
- int root_partition; /* Use this set as /
- 0 == no, 1 == yes*/
- int last_unit; /* last unit number (e.g. 0 for /dev/raid0)
- of this component. Used for autoconfigure
- only. */
- int config_order; /* 0 .. n. The order in which the component
- should be auto-configured. E.g. 0 is will
- done first, (and would become raid0).
- This may be in conflict with last_unit!!?! */
- /* Not currently used. */
-
- /*
- * Cleanup stuff
- */
- RF_ShutdownList_t *shutdownList; /* shutdown activities */
- RF_AllocListElem_t *cleanupList; /* memory to be freed at
- * shutdown time */
-
- /*
- * Recon stuff
- */
- RF_HeadSepLimit_t headSepLimit;
- int numFloatingReconBufs;
- int reconInProgress;
- RF_DECLARE_COND(waitForReconCond)
- RF_RaidReconDesc_t *reconDesc; /* reconstruction descriptor */
- RF_ReconCtrl_t **reconControl; /* reconstruction control structure
- * pointers for each row in the array */
-
- /*
- * Array-quiescence stuff
- */
- RF_DECLARE_MUTEX(access_suspend_mutex)
- RF_DECLARE_COND(quiescent_cond)
- RF_IoCount_t accesses_suspended;
- RF_IoCount_t accs_in_flight;
- int access_suspend_release;
- int waiting_for_quiescence;
- RF_CallbackDesc_t *quiesce_wait_list;
-
- /*
- * Statistics
- */
-#if !defined(_KERNEL) && !defined(SIMULATE)
- RF_ThroughputStats_t throughputstats;
-#endif /* !KERNEL && !SIMULATE */
- RF_CumulativeStats_t userstats;
- int parity_rewrite_stripes_done;
- int recon_stripes_done;
- int copyback_stripes_done;
-
- int recon_in_progress;
- int parity_rewrite_in_progress;
- int copyback_in_progress;
-
- /*
- * Engine thread control
- */
- RF_DECLARE_MUTEX(node_queue_mutex)
- RF_DECLARE_COND(node_queue_cond)
- RF_DagNode_t *node_queue;
- RF_Thread_t parity_rewrite_thread;
- RF_Thread_t copyback_thread;
- RF_Thread_t engine_thread;
- RF_Thread_t recon_thread;
- RF_ThreadGroup_t engine_tg;
- int shutdown_engine;
- int dags_in_flight; /* debug */
-
- /*
- * PSS (Parity Stripe Status) stuff
- */
- RF_FreeList_t *pss_freelist;
- long pssTableSize;
-
- /*
- * Reconstruction stuff
- */
- int procsInBufWait;
- int numFullReconBuffers;
- RF_AccTraceEntry_t *recon_tracerecs;
- unsigned long accumXorTimeUs;
- RF_ReconDoneProc_t *recon_done_procs;
- RF_DECLARE_MUTEX(recon_done_proc_mutex)
- /*
- * nAccOutstanding, waitShutdown protected by desc freelist lock
- * (This may seem strange, since that's a central serialization point
- * for a per-array piece of data, but otherwise, it'd be an extra
- * per-array lock, and that'd only be less efficient...)
- */
- RF_DECLARE_COND(outstandingCond)
- int waitShutdown;
- int nAccOutstanding;
-
- RF_DiskId_t **diskids;
- RF_DiskId_t *sparediskids;
-
- int raidid;
- RF_AccTotals_t acc_totals;
- int keep_acc_totals;
-
- struct raidcinfo **raid_cinfo; /* array of component info */
-
- int terminate_disk_queues;
-
- /*
- * XXX
- *
- * config-specific information should be moved
- * somewhere else, or at least hung off this
- * in some generic way
- */
-
- /* used by rf_compute_workload_shift */
- RF_RowCol_t hist_diskreq[RF_MAXROW][RF_MAXCOL];
-
- /* used by declustering */
- int noRotate;
-
-#if RF_INCLUDE_PARITYLOGGING > 0
- /* used by parity logging */
- RF_SectorCount_t regionLogCapacity;
- RF_ParityLogQueue_t parityLogPool; /* pool of unused parity logs */
- RF_RegionInfo_t *regionInfo; /* array of region state */
- int numParityLogs;
- int numSectorsPerLog;
- int regionParityRange;
- int logsInUse; /* debugging */
- RF_ParityLogDiskQueue_t parityLogDiskQueue; /* state of parity
- * logging disk work */
- RF_RegionBufferQueue_t regionBufferPool; /* buffers for holding
- * region log */
- RF_RegionBufferQueue_t parityBufferPool; /* buffers for holding
- * parity */
- caddr_t parityLogBufferHeap; /* pool of unused parity logs */
- RF_Thread_t pLogDiskThreadHandle;
-
-#endif /* RF_INCLUDE_PARITYLOGGING > 0 */
- /* Point back to the softc for this device. This is needed to rid
- * ourselves of the ugly static device arrays.
- * XXX Will this affect compatibility with NetBSD?
- */
- void *sc;
-};
-#endif /* !_RF__RF_RAID_H_ */
diff --git a/sys/dev/raidframe/rf_raid0.c b/sys/dev/raidframe/rf_raid0.c
deleted file mode 100644
index 5eefabb..0000000
--- a/sys/dev/raidframe/rf_raid0.c
+++ /dev/null
@@ -1,163 +0,0 @@
-/* $NetBSD: rf_raid0.c,v 1.4 2000/01/07 03:41:02 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/***************************************
- *
- * rf_raid0.c -- implements RAID Level 0
- *
- ***************************************/
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_raid0.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_dagffrd.h>
-#include <dev/raidframe/rf_dagffwr.h>
-#include <dev/raidframe/rf_dagutils.h>
-#include <dev/raidframe/rf_dagfuncs.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_configure.h>
-#include <dev/raidframe/rf_parityscan.h>
-
-typedef struct RF_Raid0ConfigInfo_s {
- RF_RowCol_t *stripeIdentifier;
-} RF_Raid0ConfigInfo_t;
-
-int
-rf_ConfigureRAID0(
- RF_ShutdownList_t ** listp,
- RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr)
-{
- RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
- RF_Raid0ConfigInfo_t *info;
- RF_RowCol_t i;
-
- /* create a RAID level 0 configuration structure */
- RF_MallocAndAdd(info, sizeof(RF_Raid0ConfigInfo_t), (RF_Raid0ConfigInfo_t *), raidPtr->cleanupList);
- if (info == NULL)
- return (ENOMEM);
- layoutPtr->layoutSpecificInfo = (void *) info;
-
- RF_MallocAndAdd(info->stripeIdentifier, raidPtr->numCol * sizeof(RF_RowCol_t), (RF_RowCol_t *), raidPtr->cleanupList);
- if (info->stripeIdentifier == NULL)
- return (ENOMEM);
- for (i = 0; i < raidPtr->numCol; i++)
- info->stripeIdentifier[i] = i;
-
- RF_ASSERT(raidPtr->numRow == 1);
- raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * raidPtr->numCol * layoutPtr->sectorsPerStripeUnit;
- layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk;
- layoutPtr->dataSectorsPerStripe = raidPtr->numCol * layoutPtr->sectorsPerStripeUnit;
- layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
- layoutPtr->numDataCol = raidPtr->numCol;
- layoutPtr->numParityCol = 0;
- return (0);
-}
-
-void
-rf_MapSectorRAID0(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t raidSector,
- RF_RowCol_t * row,
- RF_RowCol_t * col,
- RF_SectorNum_t * diskSector,
- int remap)
-{
- RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
- *row = 0;
- *col = SUID % raidPtr->numCol;
- *diskSector = (SUID / raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit +
- (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
-}
-
-void
-rf_MapParityRAID0(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t raidSector,
- RF_RowCol_t * row,
- RF_RowCol_t * col,
- RF_SectorNum_t * diskSector,
- int remap)
-{
- *row = *col = 0;
- *diskSector = 0;
-}
-
-void
-rf_IdentifyStripeRAID0(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t addr,
- RF_RowCol_t ** diskids,
- RF_RowCol_t * outRow)
-{
- RF_Raid0ConfigInfo_t *info;
-
- info = raidPtr->Layout.layoutSpecificInfo;
- *diskids = info->stripeIdentifier;
- *outRow = 0;
-}
-
-void
-rf_MapSIDToPSIDRAID0(
- RF_RaidLayout_t * layoutPtr,
- RF_StripeNum_t stripeID,
- RF_StripeNum_t * psID,
- RF_ReconUnitNum_t * which_ru)
-{
- *which_ru = 0;
- *psID = stripeID;
-}
-
-void
-rf_RAID0DagSelect(
- RF_Raid_t * raidPtr,
- RF_IoType_t type,
- RF_AccessStripeMap_t * asmap,
- RF_VoidFuncPtr * createFunc)
-{
- *createFunc = ((type == RF_IO_TYPE_READ) ?
- (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG : (RF_VoidFuncPtr) rf_CreateRAID0WriteDAG);
-}
-
-int
-rf_VerifyParityRAID0(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t raidAddr,
- RF_PhysDiskAddr_t * parityPDA,
- int correct_it,
- RF_RaidAccessFlags_t flags)
-{
- /*
- * No parity is always okay.
- */
- return (RF_PARITY_OKAY);
-}
diff --git a/sys/dev/raidframe/rf_raid0.h b/sys/dev/raidframe/rf_raid0.h
deleted file mode 100644
index 36aae81..0000000
--- a/sys/dev/raidframe/rf_raid0.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_raid0.h,v 1.3 1999/02/05 00:06:15 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/* rf_raid0.h - header file for RAID Level 0 */
-
-#ifndef _RF__RF_RAID0_H_
-#define _RF__RF_RAID0_H_
-
-int
-rf_ConfigureRAID0(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr);
-void
-rf_MapSectorRAID0(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
- RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap);
-void
-rf_MapParityRAID0(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
- RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap);
-void
-rf_IdentifyStripeRAID0(RF_Raid_t * raidPtr, RF_RaidAddr_t addr,
- RF_RowCol_t ** diskids, RF_RowCol_t * outRow);
-void
-rf_MapSIDToPSIDRAID0(RF_RaidLayout_t * layoutPtr,
- RF_StripeNum_t stripeID, RF_StripeNum_t * psID,
- RF_ReconUnitNum_t * which_ru);
-void
-rf_RAID0DagSelect(RF_Raid_t * raidPtr, RF_IoType_t type,
- RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc);
-int
-rf_VerifyParityRAID0(RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr,
- RF_PhysDiskAddr_t * parityPDA, int correct_it, RF_RaidAccessFlags_t flags);
-
-#endif /* !_RF__RF_RAID0_H_ */
diff --git a/sys/dev/raidframe/rf_raid1.c b/sys/dev/raidframe/rf_raid1.c
deleted file mode 100644
index 845e316..0000000
--- a/sys/dev/raidframe/rf_raid1.c
+++ /dev/null
@@ -1,691 +0,0 @@
-/* $NetBSD: rf_raid1.c,v 1.5 2000/01/08 22:57:30 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: William V. Courtright II
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*****************************************************************************
- *
- * rf_raid1.c -- implements RAID Level 1
- *
- *****************************************************************************/
-
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_raid1.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_dagffrd.h>
-#include <dev/raidframe/rf_dagffwr.h>
-#include <dev/raidframe/rf_dagdegrd.h>
-#include <dev/raidframe/rf_dagutils.h>
-#include <dev/raidframe/rf_dagfuncs.h>
-#include <dev/raidframe/rf_diskqueue.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_utils.h>
-#include <dev/raidframe/rf_parityscan.h>
-#include <dev/raidframe/rf_mcpair.h>
-#include <dev/raidframe/rf_layout.h>
-#include <dev/raidframe/rf_map.h>
-#include <dev/raidframe/rf_engine.h>
-#include <dev/raidframe/rf_reconbuffer.h>
-#include <dev/raidframe/rf_kintf.h>
-
-typedef struct RF_Raid1ConfigInfo_s {
- RF_RowCol_t **stripeIdentifier;
-} RF_Raid1ConfigInfo_t;
-/* start of day code specific to RAID level 1 */
-int
-rf_ConfigureRAID1(
- RF_ShutdownList_t ** listp,
- RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr)
-{
- RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
- RF_Raid1ConfigInfo_t *info;
- RF_RowCol_t i;
-
- /* create a RAID level 1 configuration structure */
- RF_MallocAndAdd(info, sizeof(RF_Raid1ConfigInfo_t), (RF_Raid1ConfigInfo_t *), raidPtr->cleanupList);
- if (info == NULL)
- return (ENOMEM);
- layoutPtr->layoutSpecificInfo = (void *) info;
-
- /* ... and fill it in. */
- info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol / 2, 2, raidPtr->cleanupList);
- if (info->stripeIdentifier == NULL)
- return (ENOMEM);
- for (i = 0; i < (raidPtr->numCol / 2); i++) {
- info->stripeIdentifier[i][0] = (2 * i);
- info->stripeIdentifier[i][1] = (2 * i) + 1;
- }
-
- RF_ASSERT(raidPtr->numRow == 1);
-
- /* this implementation of RAID level 1 uses one row of numCol disks
- * and allows multiple (numCol / 2) stripes per row. A stripe
- * consists of a single data unit and a single parity (mirror) unit.
- * stripe id = raidAddr / stripeUnitSize */
- raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * (raidPtr->numCol / 2) * layoutPtr->sectorsPerStripeUnit;
- layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk * (raidPtr->numCol / 2);
- layoutPtr->dataSectorsPerStripe = layoutPtr->sectorsPerStripeUnit;
- layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
- layoutPtr->numDataCol = 1;
- layoutPtr->numParityCol = 1;
- return (0);
-}
-
-
-/* returns the physical disk location of the primary copy in the mirror pair */
-void
-rf_MapSectorRAID1(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t raidSector,
- RF_RowCol_t * row,
- RF_RowCol_t * col,
- RF_SectorNum_t * diskSector,
- int remap)
-{
- RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
- RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2);
-
- *row = 0;
- *col = 2 * mirrorPair;
- *diskSector = ((SUID / (raidPtr->numCol / 2)) * raidPtr->Layout.sectorsPerStripeUnit) + (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
-}
-
-
-/* Map Parity
- *
- * returns the physical disk location of the secondary copy in the mirror
- * pair
- */
-void
-rf_MapParityRAID1(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t raidSector,
- RF_RowCol_t * row,
- RF_RowCol_t * col,
- RF_SectorNum_t * diskSector,
- int remap)
-{
- RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
- RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2);
-
- *row = 0;
- *col = (2 * mirrorPair) + 1;
-
- *diskSector = ((SUID / (raidPtr->numCol / 2)) * raidPtr->Layout.sectorsPerStripeUnit) + (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
-}
-
-
-/* IdentifyStripeRAID1
- *
- * returns a list of disks for a given redundancy group
- */
-void
-rf_IdentifyStripeRAID1(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t addr,
- RF_RowCol_t ** diskids,
- RF_RowCol_t * outRow)
-{
- RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr);
- RF_Raid1ConfigInfo_t *info = raidPtr->Layout.layoutSpecificInfo;
- RF_ASSERT(stripeID >= 0);
- RF_ASSERT(addr >= 0);
- *outRow = 0;
- *diskids = info->stripeIdentifier[stripeID % (raidPtr->numCol / 2)];
- RF_ASSERT(*diskids);
-}
-
-
-/* MapSIDToPSIDRAID1
- *
- * maps a logical stripe to a stripe in the redundant array
- */
-void
-rf_MapSIDToPSIDRAID1(
- RF_RaidLayout_t * layoutPtr,
- RF_StripeNum_t stripeID,
- RF_StripeNum_t * psID,
- RF_ReconUnitNum_t * which_ru)
-{
- *which_ru = 0;
- *psID = stripeID;
-}
-
-
-
-/******************************************************************************
- * select a graph to perform a single-stripe access
- *
- * Parameters: raidPtr - description of the physical array
- * type - type of operation (read or write) requested
- * asmap - logical & physical addresses for this access
- * createFunc - name of function to use to create the graph
- *****************************************************************************/
-
-void
-rf_RAID1DagSelect(
- RF_Raid_t * raidPtr,
- RF_IoType_t type,
- RF_AccessStripeMap_t * asmap,
- RF_VoidFuncPtr * createFunc)
-{
- RF_RowCol_t frow, fcol, or, oc;
- RF_PhysDiskAddr_t *failedPDA;
- int prior_recon;
- RF_RowStatus_t rstat;
- RF_SectorNum_t oo;
-
-
- RF_ASSERT(RF_IO_IS_R_OR_W(type));
-
- if (asmap->numDataFailed + asmap->numParityFailed > 1) {
- RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n");
- *createFunc = NULL;
- return;
- }
- if (asmap->numDataFailed + asmap->numParityFailed) {
- /*
- * We've got a fault. Re-map to spare space, iff applicable.
- * Shouldn't the arch-independent code do this for us?
- * Anyway, it turns out if we don't do this here, then when
- * we're reconstructing, writes go only to the surviving
- * original disk, and aren't reflected on the reconstructed
- * spare. Oops. --jimz
- */
- failedPDA = asmap->failedPDAs[0];
- frow = failedPDA->row;
- fcol = failedPDA->col;
- rstat = raidPtr->status[frow];
- prior_recon = (rstat == rf_rs_reconfigured) || (
- (rstat == rf_rs_reconstructing) ?
- rf_CheckRUReconstructed(raidPtr->reconControl[frow]->reconMap, failedPDA->startSector) : 0
- );
- if (prior_recon) {
- or = frow;
- oc = fcol;
- oo = failedPDA->startSector;
- /*
- * If we did distributed sparing, we'd monkey with that here.
- * But we don't, so we'll
- */
- failedPDA->row = raidPtr->Disks[frow][fcol].spareRow;
- failedPDA->col = raidPtr->Disks[frow][fcol].spareCol;
- /*
- * Redirect other components, iff necessary. This looks
- * pretty suspicious to me, but it's what the raid5
- * DAG select does.
- */
- if (asmap->parityInfo->next) {
- if (failedPDA == asmap->parityInfo) {
- failedPDA->next->row = failedPDA->row;
- failedPDA->next->col = failedPDA->col;
- } else {
- if (failedPDA == asmap->parityInfo->next) {
- asmap->parityInfo->row = failedPDA->row;
- asmap->parityInfo->col = failedPDA->col;
- }
- }
- }
- if (rf_dagDebug || rf_mapDebug) {
- printf("raid%d: Redirected type '%c' r %d c %d o %ld -> r %d c %d o %ld\n",
- raidPtr->raidid, type, or, oc,
- (long) oo, failedPDA->row,
- failedPDA->col,
- (long) failedPDA->startSector);
- }
- asmap->numDataFailed = asmap->numParityFailed = 0;
- }
- }
- if (type == RF_IO_TYPE_READ) {
- if (asmap->numDataFailed == 0)
- *createFunc = (RF_VoidFuncPtr) rf_CreateMirrorIdleReadDAG;
- else
- *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneDegradedReadDAG;
- } else {
- *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG;
- }
-}
-
-int
-rf_VerifyParityRAID1(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t raidAddr,
- RF_PhysDiskAddr_t * parityPDA,
- int correct_it,
- RF_RaidAccessFlags_t flags)
-{
- int nbytes, bcount, stripeWidth, ret, i, j, nbad, *bbufs;
- RF_DagNode_t *blockNode, *unblockNode, *wrBlock;
- RF_DagHeader_t *rd_dag_h, *wr_dag_h;
- RF_AccessStripeMapHeader_t *asm_h;
- RF_AllocListElem_t *allocList;
- RF_AccTraceEntry_t tracerec;
- RF_ReconUnitNum_t which_ru;
- RF_RaidLayout_t *layoutPtr;
- RF_AccessStripeMap_t *aasm;
- RF_SectorCount_t nsector;
- RF_RaidAddr_t startAddr;
- char *buf, *buf1, *buf2;
- RF_PhysDiskAddr_t *pda;
- RF_StripeNum_t psID;
- RF_MCPair_t *mcpair;
-
- layoutPtr = &raidPtr->Layout;
- startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr);
- nsector = parityPDA->numSector;
- nbytes = rf_RaidAddressToByte(raidPtr, nsector);
- psID = rf_RaidAddressToParityStripeID(layoutPtr, raidAddr, &which_ru);
-
- asm_h = NULL;
- rd_dag_h = wr_dag_h = NULL;
- mcpair = NULL;
-
- ret = RF_PARITY_COULD_NOT_VERIFY;
-
- rf_MakeAllocList(allocList);
- if (allocList == NULL)
- return (RF_PARITY_COULD_NOT_VERIFY);
- mcpair = rf_AllocMCPair();
- if (mcpair == NULL)
- goto done;
- RF_ASSERT(layoutPtr->numDataCol == layoutPtr->numParityCol);
- stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol;
- bcount = nbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol);
- RF_MallocAndAdd(buf, bcount, (char *), allocList);
- if (buf == NULL)
- goto done;
- if (rf_verifyParityDebug) {
- printf("raid%d: RAID1 parity verify: buf=%lx bcount=%d (%lx - %lx)\n",
- raidPtr->raidid, (long) buf, bcount, (long) buf,
- (long) buf + bcount);
- }
- /*
- * Generate a DAG which will read the entire stripe- then we can
- * just compare data chunks versus "parity" chunks.
- */
-
- rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, nbytes, buf,
- rf_DiskReadFunc, rf_DiskReadUndoFunc, "Rod", allocList, flags,
- RF_IO_NORMAL_PRIORITY);
- if (rd_dag_h == NULL)
- goto done;
- blockNode = rd_dag_h->succedents[0];
- unblockNode = blockNode->succedents[0]->succedents[0];
-
- /*
- * Map the access to physical disk addresses (PDAs)- this will
- * get us both a list of data addresses, and "parity" addresses
- * (which are really mirror copies).
- */
- asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe,
- buf, RF_DONT_REMAP);
- aasm = asm_h->stripeMap;
-
- buf1 = buf;
- /*
- * Loop through the data blocks, setting up read nodes for each.
- */
- for (pda = aasm->physInfo, i = 0; i < layoutPtr->numDataCol; i++, pda = pda->next) {
- RF_ASSERT(pda);
-
- rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1);
-
- RF_ASSERT(pda->numSector != 0);
- if (rf_TryToRedirectPDA(raidPtr, pda, 0)) {
- /* cannot verify parity with dead disk */
- goto done;
- }
- pda->bufPtr = buf1;
- blockNode->succedents[i]->params[0].p = pda;
- blockNode->succedents[i]->params[1].p = buf1;
- blockNode->succedents[i]->params[2].v = psID;
- blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- buf1 += nbytes;
- }
- RF_ASSERT(pda == NULL);
- /*
- * keep i, buf1 running
- *
- * Loop through parity blocks, setting up read nodes for each.
- */
- for (pda = aasm->parityInfo; i < layoutPtr->numDataCol + layoutPtr->numParityCol; i++, pda = pda->next) {
- RF_ASSERT(pda);
- rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1);
- RF_ASSERT(pda->numSector != 0);
- if (rf_TryToRedirectPDA(raidPtr, pda, 0)) {
- /* cannot verify parity with dead disk */
- goto done;
- }
- pda->bufPtr = buf1;
- blockNode->succedents[i]->params[0].p = pda;
- blockNode->succedents[i]->params[1].p = buf1;
- blockNode->succedents[i]->params[2].v = psID;
- blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- buf1 += nbytes;
- }
- RF_ASSERT(pda == NULL);
-
- bzero((char *) &tracerec, sizeof(tracerec));
- rd_dag_h->tracerec = &tracerec;
-
- if (rf_verifyParityDebug > 1) {
- printf("raid%d: RAID1 parity verify read dag:\n",
- raidPtr->raidid);
- rf_PrintDAGList(rd_dag_h);
- }
- RF_LOCK_MUTEX(mcpair->mutex);
- mcpair->flag = 0;
- rf_DispatchDAG(rd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
- (void *) mcpair);
- while (mcpair->flag == 0) {
- RF_WAIT_MCPAIR(mcpair);
- }
- RF_UNLOCK_MUTEX(mcpair->mutex);
-
- if (rd_dag_h->status != rf_enable) {
- RF_ERRORMSG("Unable to verify raid1 parity: can't read stripe\n");
- ret = RF_PARITY_COULD_NOT_VERIFY;
- goto done;
- }
- /*
- * buf1 is the beginning of the data blocks chunk
- * buf2 is the beginning of the parity blocks chunk
- */
- buf1 = buf;
- buf2 = buf + (nbytes * layoutPtr->numDataCol);
- ret = RF_PARITY_OKAY;
- /*
- * bbufs is "bad bufs"- an array whose entries are the data
- * column numbers where we had miscompares. (That is, column 0
- * and column 1 of the array are mirror copies, and are considered
- * "data column 0" for this purpose).
- */
- RF_MallocAndAdd(bbufs, layoutPtr->numParityCol * sizeof(int), (int *),
- allocList);
- nbad = 0;
- /*
- * Check data vs "parity" (mirror copy).
- */
- for (i = 0; i < layoutPtr->numDataCol; i++) {
- if (rf_verifyParityDebug) {
- printf("raid%d: RAID1 parity verify %d bytes: i=%d buf1=%lx buf2=%lx buf=%lx\n",
- raidPtr->raidid, nbytes, i, (long) buf1,
- (long) buf2, (long) buf);
- }
- ret = bcmp(buf1, buf2, nbytes);
- if (ret) {
- if (rf_verifyParityDebug > 1) {
- for (j = 0; j < nbytes; j++) {
- if (buf1[j] != buf2[j])
- break;
- }
- printf("psid=%ld j=%d\n", (long) psID, j);
- printf("buf1 %02x %02x %02x %02x %02x\n", buf1[0] & 0xff,
- buf1[1] & 0xff, buf1[2] & 0xff, buf1[3] & 0xff, buf1[4] & 0xff);
- printf("buf2 %02x %02x %02x %02x %02x\n", buf2[0] & 0xff,
- buf2[1] & 0xff, buf2[2] & 0xff, buf2[3] & 0xff, buf2[4] & 0xff);
- }
- if (rf_verifyParityDebug) {
- printf("raid%d: RAID1: found bad parity, i=%d\n", raidPtr->raidid, i);
- }
- /*
- * Parity is bad. Keep track of which columns were bad.
- */
- if (bbufs)
- bbufs[nbad] = i;
- nbad++;
- ret = RF_PARITY_BAD;
- }
- buf1 += nbytes;
- buf2 += nbytes;
- }
-
- if ((ret != RF_PARITY_OKAY) && correct_it) {
- ret = RF_PARITY_COULD_NOT_CORRECT;
- if (rf_verifyParityDebug) {
- printf("raid%d: RAID1 parity verify: parity not correct\n", raidPtr->raidid);
- }
- if (bbufs == NULL)
- goto done;
- /*
- * Make a DAG with one write node for each bad unit. We'll simply
- * write the contents of the data unit onto the parity unit for
- * correction. (It's possible that the mirror copy was the correct
- * copy, and that we're spooging good data by writing bad over it,
- * but there's no way we can know that.
- */
- wr_dag_h = rf_MakeSimpleDAG(raidPtr, nbad, nbytes, buf,
- rf_DiskWriteFunc, rf_DiskWriteUndoFunc, "Wnp", allocList, flags,
- RF_IO_NORMAL_PRIORITY);
- if (wr_dag_h == NULL)
- goto done;
- wrBlock = wr_dag_h->succedents[0];
- /*
- * Fill in a write node for each bad compare.
- */
- for (i = 0; i < nbad; i++) {
- j = i + layoutPtr->numDataCol;
- pda = blockNode->succedents[j]->params[0].p;
- pda->bufPtr = blockNode->succedents[i]->params[1].p;
- wrBlock->succedents[i]->params[0].p = pda;
- wrBlock->succedents[i]->params[1].p = pda->bufPtr;
- wrBlock->succedents[i]->params[2].v = psID;
- wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
- }
- bzero((char *) &tracerec, sizeof(tracerec));
- wr_dag_h->tracerec = &tracerec;
- if (rf_verifyParityDebug > 1) {
- printf("Parity verify write dag:\n");
- rf_PrintDAGList(wr_dag_h);
- }
- RF_LOCK_MUTEX(mcpair->mutex);
- mcpair->flag = 0;
- /* fire off the write DAG */
- rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
- (void *) mcpair);
- while (!mcpair->flag) {
- RF_WAIT_COND(mcpair->cond, mcpair->mutex);
- }
- RF_UNLOCK_MUTEX(mcpair->mutex);
- if (wr_dag_h->status != rf_enable) {
- RF_ERRORMSG("Unable to correct RAID1 parity in VerifyParity\n");
- goto done;
- }
- ret = RF_PARITY_CORRECTED;
- }
-done:
- /*
- * All done. We might've gotten here without doing part of the function,
- * so cleanup what we have to and return our running status.
- */
- if (asm_h)
- rf_FreeAccessStripeMap(asm_h);
- if (rd_dag_h)
- rf_FreeDAG(rd_dag_h);
- if (wr_dag_h)
- rf_FreeDAG(wr_dag_h);
- if (mcpair)
- rf_FreeMCPair(mcpair);
- rf_FreeAllocList(allocList);
- if (rf_verifyParityDebug) {
- printf("raid%d: RAID1 parity verify, returning %d\n",
- raidPtr->raidid, ret);
- }
- return (ret);
-}
-
-int
-rf_SubmitReconBufferRAID1(rbuf, keep_it, use_committed)
- RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */
- int keep_it; /* whether we can keep this buffer or we have
- * to return it */
- int use_committed; /* whether to use a committed or an available
- * recon buffer */
-{
- RF_ReconParityStripeStatus_t *pssPtr;
- RF_ReconCtrl_t *reconCtrlPtr;
- RF_RaidLayout_t *layoutPtr;
- int retcode, created;
- RF_CallbackDesc_t *cb, *p;
- RF_ReconBuffer_t *t;
- RF_Raid_t *raidPtr;
- caddr_t ta;
-
- retcode = 0;
- created = 0;
-
- raidPtr = rbuf->raidPtr;
- layoutPtr = &raidPtr->Layout;
- reconCtrlPtr = raidPtr->reconControl[rbuf->row];
-
- RF_ASSERT(rbuf);
- RF_ASSERT(rbuf->col != reconCtrlPtr->fcol);
-
- if (rf_reconbufferDebug) {
- printf("raid%d: RAID1 reconbuffer submission r%d c%d psid %ld ru%d (failed offset %ld)\n",
- raidPtr->raidid, rbuf->row, rbuf->col,
- (long) rbuf->parityStripeID, rbuf->which_ru,
- (long) rbuf->failedDiskSectorOffset);
- }
- if (rf_reconDebug) {
- printf("RAID1 reconbuffer submit psid %ld buf %lx\n",
- (long) rbuf->parityStripeID, (long) rbuf->buffer);
- printf("RAID1 psid %ld %02x %02x %02x %02x %02x\n",
- (long) rbuf->parityStripeID,
- rbuf->buffer[0], rbuf->buffer[1], rbuf->buffer[2], rbuf->buffer[3],
- rbuf->buffer[4]);
- }
- RF_LOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
-
- RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
-
- pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable,
- rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created);
- RF_ASSERT(pssPtr); /* if it didn't exist, we wouldn't have gotten
- * an rbuf for it */
-
- /*
- * Since this is simple mirroring, the first submission for a stripe is also
- * treated as the last.
- */
-
- t = NULL;
- if (keep_it) {
- if (rf_reconbufferDebug) {
- printf("raid%d: RAID1 rbuf submission: keeping rbuf\n",
- raidPtr->raidid);
- }
- t = rbuf;
- } else {
- if (use_committed) {
- if (rf_reconbufferDebug) {
- printf("raid%d: RAID1 rbuf submission: using committed rbuf\n", raidPtr->raidid);
- }
- t = reconCtrlPtr->committedRbufs;
- RF_ASSERT(t);
- reconCtrlPtr->committedRbufs = t->next;
- t->next = NULL;
- } else
- if (reconCtrlPtr->floatingRbufs) {
- if (rf_reconbufferDebug) {
- printf("raid%d: RAID1 rbuf submission: using floating rbuf\n", raidPtr->raidid);
- }
- t = reconCtrlPtr->floatingRbufs;
- reconCtrlPtr->floatingRbufs = t->next;
- t->next = NULL;
- }
- }
- if (t == NULL) {
- if (rf_reconbufferDebug) {
- printf("raid%d: RAID1 rbuf submission: waiting for rbuf\n", raidPtr->raidid);
- }
- RF_ASSERT((keep_it == 0) && (use_committed == 0));
- raidPtr->procsInBufWait++;
- if ((raidPtr->procsInBufWait == (raidPtr->numCol - 1))
- && (raidPtr->numFullReconBuffers == 0)) {
- /* ruh-ro */
- RF_ERRORMSG("Buffer wait deadlock\n");
- rf_PrintPSStatusTable(raidPtr, rbuf->row);
- RF_PANIC();
- }
- pssPtr->flags |= RF_PSS_BUFFERWAIT;
- cb = rf_AllocCallbackDesc();
- cb->row = rbuf->row;
- cb->col = rbuf->col;
- cb->callbackArg.v = rbuf->parityStripeID;
- cb->callbackArg2.v = rbuf->which_ru;
- cb->next = NULL;
- if (reconCtrlPtr->bufferWaitList == NULL) {
- /* we are the wait list- lucky us */
- reconCtrlPtr->bufferWaitList = cb;
- } else {
- /* append to wait list */
- for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next);
- p->next = cb;
- }
- retcode = 1;
- goto out;
- }
- if (t != rbuf) {
- t->row = rbuf->row;
- t->col = reconCtrlPtr->fcol;
- t->parityStripeID = rbuf->parityStripeID;
- t->which_ru = rbuf->which_ru;
- t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset;
- t->spRow = rbuf->spRow;
- t->spCol = rbuf->spCol;
- t->spOffset = rbuf->spOffset;
- /* Swap buffers. DANCE! */
- ta = t->buffer;
- t->buffer = rbuf->buffer;
- rbuf->buffer = ta;
- }
- /*
- * Use the rbuf we've been given as the target.
- */
- RF_ASSERT(pssPtr->rbuf == NULL);
- pssPtr->rbuf = t;
-
- t->count = 1;
- /*
- * Below, we use 1 for numDataCol (which is equal to the count in the
- * previous line), so we'll always be done.
- */
- rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, 1);
-
-out:
- RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
- RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
- if (rf_reconbufferDebug) {
- printf("raid%d: RAID1 rbuf submission: returning %d\n",
- raidPtr->raidid, retcode);
- }
- return (retcode);
-}
diff --git a/sys/dev/raidframe/rf_raid1.h b/sys/dev/raidframe/rf_raid1.h
deleted file mode 100644
index 484cbcf..0000000
--- a/sys/dev/raidframe/rf_raid1.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_raid1.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: William V. Courtright II
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/* header file for RAID Level 1 */
-
-#ifndef _RF__RF_RAID1_H_
-#define _RF__RF_RAID1_H_
-
-#include <dev/raidframe/rf_types.h>
-
-int
-rf_ConfigureRAID1(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr);
-void
-rf_MapSectorRAID1(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
- RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap);
-void
-rf_MapParityRAID1(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
- RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap);
-void
-rf_IdentifyStripeRAID1(RF_Raid_t * raidPtr, RF_RaidAddr_t addr,
- RF_RowCol_t ** diskids, RF_RowCol_t * outRow);
-void
-rf_MapSIDToPSIDRAID1(RF_RaidLayout_t * layoutPtr,
- RF_StripeNum_t stripeID, RF_StripeNum_t * psID,
- RF_ReconUnitNum_t * which_ru);
-void
-rf_RAID1DagSelect(RF_Raid_t * raidPtr, RF_IoType_t type,
- RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc);
-int
-rf_VerifyParityRAID1(RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr,
- RF_PhysDiskAddr_t * parityPDA, int correct_it, RF_RaidAccessFlags_t flags);
-int
-rf_SubmitReconBufferRAID1(RF_ReconBuffer_t * rbuf, int keep_int,
- int use_committed);
-
-#endif /* !_RF__RF_RAID1_H_ */
diff --git a/sys/dev/raidframe/rf_raid4.c b/sys/dev/raidframe/rf_raid4.c
deleted file mode 100644
index d080319..0000000
--- a/sys/dev/raidframe/rf_raid4.c
+++ /dev/null
@@ -1,159 +0,0 @@
-/* $NetBSD: rf_raid4.c,v 1.4 2000/01/07 03:41:02 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Rachad Youssef
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/***************************************
- *
- * rf_raid4.c -- implements RAID Level 4
- *
- ***************************************/
-
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_dagutils.h>
-#include <dev/raidframe/rf_dagfuncs.h>
-#include <dev/raidframe/rf_dagffrd.h>
-#include <dev/raidframe/rf_dagffwr.h>
-#include <dev/raidframe/rf_dagdegrd.h>
-#include <dev/raidframe/rf_dagdegwr.h>
-#include <dev/raidframe/rf_raid4.h>
-#include <dev/raidframe/rf_general.h>
-
-typedef struct RF_Raid4ConfigInfo_s {
- RF_RowCol_t *stripeIdentifier; /* filled in at config time & used by
- * IdentifyStripe */
-} RF_Raid4ConfigInfo_t;
-
-
-
-int
-rf_ConfigureRAID4(
- RF_ShutdownList_t ** listp,
- RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr)
-{
- RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
- RF_Raid4ConfigInfo_t *info;
- int i;
-
- /* create a RAID level 4 configuration structure ... */
- RF_MallocAndAdd(info, sizeof(RF_Raid4ConfigInfo_t), (RF_Raid4ConfigInfo_t *), raidPtr->cleanupList);
- if (info == NULL)
- return (ENOMEM);
- layoutPtr->layoutSpecificInfo = (void *) info;
-
- /* ... and fill it in. */
- RF_MallocAndAdd(info->stripeIdentifier, raidPtr->numCol * sizeof(RF_RowCol_t), (RF_RowCol_t *), raidPtr->cleanupList);
- if (info->stripeIdentifier == NULL)
- return (ENOMEM);
- for (i = 0; i < raidPtr->numCol; i++)
- info->stripeIdentifier[i] = i;
-
- RF_ASSERT(raidPtr->numRow == 1);
-
- /* fill in the remaining layout parameters */
- layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk;
- layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
- layoutPtr->numDataCol = raidPtr->numCol - 1;
- layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
- layoutPtr->numParityCol = 1;
- raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
-
- return (0);
-}
-
-int
-rf_GetDefaultNumFloatingReconBuffersRAID4(RF_Raid_t * raidPtr)
-{
- return (20);
-}
-
-RF_HeadSepLimit_t
-rf_GetDefaultHeadSepLimitRAID4(RF_Raid_t * raidPtr)
-{
- return (20);
-}
-
-void
-rf_MapSectorRAID4(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t raidSector,
- RF_RowCol_t * row,
- RF_RowCol_t * col,
- RF_SectorNum_t * diskSector,
- int remap)
-{
- RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
- *row = 0;
- *col = SUID % raidPtr->Layout.numDataCol;
- *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit +
- (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
-}
-
-void
-rf_MapParityRAID4(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t raidSector,
- RF_RowCol_t * row,
- RF_RowCol_t * col,
- RF_SectorNum_t * diskSector,
- int remap)
-{
- RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
-
- *row = 0;
- *col = raidPtr->Layout.numDataCol;
- *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit +
- (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
-}
-
-void
-rf_IdentifyStripeRAID4(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t addr,
- RF_RowCol_t ** diskids,
- RF_RowCol_t * outRow)
-{
- RF_Raid4ConfigInfo_t *info = raidPtr->Layout.layoutSpecificInfo;
-
- *outRow = 0;
- *diskids = info->stripeIdentifier;
-}
-
-void
-rf_MapSIDToPSIDRAID4(
- RF_RaidLayout_t * layoutPtr,
- RF_StripeNum_t stripeID,
- RF_StripeNum_t * psID,
- RF_ReconUnitNum_t * which_ru)
-{
- *which_ru = 0;
- *psID = stripeID;
-}
diff --git a/sys/dev/raidframe/rf_raid4.h b/sys/dev/raidframe/rf_raid4.h
deleted file mode 100644
index 56df05a..0000000
--- a/sys/dev/raidframe/rf_raid4.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_raid4.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Rachad Youssef
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/* rf_raid4.h header file for RAID Level 4 */
-
-#ifndef _RF__RF_RAID4_H_
-#define _RF__RF_RAID4_H_
-
-int
-rf_ConfigureRAID4(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr);
-int rf_GetDefaultNumFloatingReconBuffersRAID4(RF_Raid_t * raidPtr);
-RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitRAID4(RF_Raid_t * raidPtr);
-void
-rf_MapSectorRAID4(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
- RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap);
-void
-rf_MapParityRAID4(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
- RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap);
-void
-rf_IdentifyStripeRAID4(RF_Raid_t * raidPtr, RF_RaidAddr_t addr,
- RF_RowCol_t ** diskids, RF_RowCol_t * outRow);
-void
-rf_MapSIDToPSIDRAID4(RF_RaidLayout_t * layoutPtr,
- RF_StripeNum_t stripeID, RF_StripeNum_t * psID,
- RF_ReconUnitNum_t * which_ru);
-void
-rf_RAID4DagSelect(RF_Raid_t * raidPtr, RF_IoType_t type,
- RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc);
-
-#endif /* !_RF__RF_RAID4_H_ */
diff --git a/sys/dev/raidframe/rf_raid5.c b/sys/dev/raidframe/rf_raid5.c
deleted file mode 100644
index 794e5a3..0000000
--- a/sys/dev/raidframe/rf_raid5.c
+++ /dev/null
@@ -1,322 +0,0 @@
-/* $NetBSD: rf_raid5.c,v 1.4 2000/01/08 22:57:30 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/******************************************************************************
- *
- * rf_raid5.c -- implements RAID Level 5
- *
- *****************************************************************************/
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_raid5.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_dagffrd.h>
-#include <dev/raidframe/rf_dagffwr.h>
-#include <dev/raidframe/rf_dagdegrd.h>
-#include <dev/raidframe/rf_dagdegwr.h>
-#include <dev/raidframe/rf_dagutils.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_map.h>
-#include <dev/raidframe/rf_utils.h>
-
-typedef struct RF_Raid5ConfigInfo_s {
- RF_RowCol_t **stripeIdentifier; /* filled in at config time and used
- * by IdentifyStripe */
-} RF_Raid5ConfigInfo_t;
-
-int
-rf_ConfigureRAID5(
- RF_ShutdownList_t ** listp,
- RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr)
-{
- RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
- RF_Raid5ConfigInfo_t *info;
- RF_RowCol_t i, j, startdisk;
-
- /* create a RAID level 5 configuration structure */
- RF_MallocAndAdd(info, sizeof(RF_Raid5ConfigInfo_t), (RF_Raid5ConfigInfo_t *), raidPtr->cleanupList);
- if (info == NULL)
- return (ENOMEM);
- layoutPtr->layoutSpecificInfo = (void *) info;
-
- RF_ASSERT(raidPtr->numRow == 1);
-
- /* the stripe identifier must identify the disks in each stripe, IN
- * THE ORDER THAT THEY APPEAR IN THE STRIPE. */
- info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, raidPtr->numCol, raidPtr->cleanupList);
- if (info->stripeIdentifier == NULL)
- return (ENOMEM);
- startdisk = 0;
- for (i = 0; i < raidPtr->numCol; i++) {
- for (j = 0; j < raidPtr->numCol; j++) {
- info->stripeIdentifier[i][j] = (startdisk + j) % raidPtr->numCol;
- }
- if ((--startdisk) < 0)
- startdisk = raidPtr->numCol - 1;
- }
-
- /* fill in the remaining layout parameters */
- layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk;
- layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
- layoutPtr->numDataCol = raidPtr->numCol - 1;
- layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
- layoutPtr->numParityCol = 1;
- layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk;
-
- raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
-
- return (0);
-}
-
-int
-rf_GetDefaultNumFloatingReconBuffersRAID5(RF_Raid_t * raidPtr)
-{
- return (20);
-}
-
-RF_HeadSepLimit_t
-rf_GetDefaultHeadSepLimitRAID5(RF_Raid_t * raidPtr)
-{
- return (10);
-}
-#if !defined(__NetBSD__) && !defined(__FreeBSD__) && !defined(_KERNEL)
-/* not currently used */
-int
-rf_ShutdownRAID5(RF_Raid_t * raidPtr)
-{
- return (0);
-}
-#endif
-
-void
-rf_MapSectorRAID5(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t raidSector,
- RF_RowCol_t * row,
- RF_RowCol_t * col,
- RF_SectorNum_t * diskSector,
- int remap)
-{
- RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
- *row = 0;
- *col = (SUID % raidPtr->numCol);
- *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit +
- (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
-}
-
-void
-rf_MapParityRAID5(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t raidSector,
- RF_RowCol_t * row,
- RF_RowCol_t * col,
- RF_SectorNum_t * diskSector,
- int remap)
-{
- RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
-
- *row = 0;
- *col = raidPtr->Layout.numDataCol - (SUID / raidPtr->Layout.numDataCol) % raidPtr->numCol;
- *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit +
- (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
-}
-
-void
-rf_IdentifyStripeRAID5(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t addr,
- RF_RowCol_t ** diskids,
- RF_RowCol_t * outRow)
-{
- RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr);
- RF_Raid5ConfigInfo_t *info = (RF_Raid5ConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
-
- *outRow = 0;
- *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol];
-}
-
-void
-rf_MapSIDToPSIDRAID5(
- RF_RaidLayout_t * layoutPtr,
- RF_StripeNum_t stripeID,
- RF_StripeNum_t * psID,
- RF_ReconUnitNum_t * which_ru)
-{
- *which_ru = 0;
- *psID = stripeID;
-}
-/* select an algorithm for performing an access. Returns two pointers,
- * one to a function that will return information about the DAG, and
- * another to a function that will create the dag.
- */
-void
-rf_RaidFiveDagSelect(
- RF_Raid_t * raidPtr,
- RF_IoType_t type,
- RF_AccessStripeMap_t * asmap,
- RF_VoidFuncPtr * createFunc)
-{
- RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
- RF_PhysDiskAddr_t *failedPDA = NULL;
- RF_RowCol_t frow, fcol;
- RF_RowStatus_t rstat;
- int prior_recon;
-
- RF_ASSERT(RF_IO_IS_R_OR_W(type));
-
- if (asmap->numDataFailed + asmap->numParityFailed > 1) {
- RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n");
- /* *infoFunc = */ *createFunc = NULL;
- return;
- } else
- if (asmap->numDataFailed + asmap->numParityFailed == 1) {
-
- /* if under recon & already reconstructed, redirect
- * the access to the spare drive and eliminate the
- * failure indication */
- failedPDA = asmap->failedPDAs[0];
- frow = failedPDA->row;
- fcol = failedPDA->col;
- rstat = raidPtr->status[failedPDA->row];
- prior_recon = (rstat == rf_rs_reconfigured) || (
- (rstat == rf_rs_reconstructing) ?
- rf_CheckRUReconstructed(raidPtr->reconControl[frow]->reconMap, failedPDA->startSector) : 0
- );
- if (prior_recon) {
- RF_RowCol_t or = failedPDA->row, oc = failedPDA->col;
- RF_SectorNum_t oo = failedPDA->startSector;
-
- if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) { /* redirect to dist
- * spare space */
-
- if (failedPDA == asmap->parityInfo) {
-
- /* parity has failed */
- (layoutPtr->map->MapParity) (raidPtr, failedPDA->raidAddress, &failedPDA->row,
- &failedPDA->col, &failedPDA->startSector, RF_REMAP);
-
- if (asmap->parityInfo->next) { /* redir 2nd component,
- * if any */
- RF_PhysDiskAddr_t *p = asmap->parityInfo->next;
- RF_SectorNum_t SUoffs = p->startSector % layoutPtr->sectorsPerStripeUnit;
- p->row = failedPDA->row;
- p->col = failedPDA->col;
- p->startSector = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, failedPDA->startSector) +
- SUoffs; /* cheating:
- * startSector is not
- * really a RAID address */
- }
- } else
- if (asmap->parityInfo->next && failedPDA == asmap->parityInfo->next) {
- RF_ASSERT(0); /* should not ever
- * happen */
- } else {
-
- /* data has failed */
- (layoutPtr->map->MapSector) (raidPtr, failedPDA->raidAddress, &failedPDA->row,
- &failedPDA->col, &failedPDA->startSector, RF_REMAP);
-
- }
-
- } else { /* redirect to dedicated spare
- * space */
-
- failedPDA->row = raidPtr->Disks[frow][fcol].spareRow;
- failedPDA->col = raidPtr->Disks[frow][fcol].spareCol;
-
- /* the parity may have two distinct
- * components, both of which may need
- * to be redirected */
- if (asmap->parityInfo->next) {
- if (failedPDA == asmap->parityInfo) {
- failedPDA->next->row = failedPDA->row;
- failedPDA->next->col = failedPDA->col;
- } else
- if (failedPDA == asmap->parityInfo->next) { /* paranoid: should
- * never occur */
- asmap->parityInfo->row = failedPDA->row;
- asmap->parityInfo->col = failedPDA->col;
- }
- }
- }
-
- RF_ASSERT(failedPDA->col != -1);
-
- if (rf_dagDebug || rf_mapDebug) {
- printf("raid%d: Redirected type '%c' r %d c %d o %ld -> r %d c %d o %ld\n",
- raidPtr->raidid, type, or, oc,
- (long) oo, failedPDA->row,
- failedPDA->col,
- (long) failedPDA->startSector);
- }
- asmap->numDataFailed = asmap->numParityFailed = 0;
- }
- }
- /* all dags begin/end with block/unblock node therefore, hdrSucc &
- * termAnt counts should always be 1 also, these counts should not be
- * visible outside dag creation routines - manipulating the counts
- * here should be removed */
- if (type == RF_IO_TYPE_READ) {
- if (asmap->numDataFailed == 0)
- *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG;
- else
- *createFunc = (RF_VoidFuncPtr) rf_CreateRaidFiveDegradedReadDAG;
- } else {
-
-
- /* if mirroring, always use large writes. If the access
- * requires two distinct parity updates, always do a small
- * write. If the stripe contains a failure but the access
- * does not, do a small write. The first conditional
- * (numStripeUnitsAccessed <= numDataCol/2) uses a
- * less-than-or-equal rather than just a less-than because
- * when G is 3 or 4, numDataCol/2 is 1, and I want
- * single-stripe-unit updates to use just one disk. */
- if ((asmap->numDataFailed + asmap->numParityFailed) == 0) {
- if (rf_suppressLocksAndLargeWrites ||
- (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) ||
- (asmap->parityInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) {
- *createFunc = (RF_VoidFuncPtr) rf_CreateSmallWriteDAG;
- } else
- *createFunc = (RF_VoidFuncPtr) rf_CreateLargeWriteDAG;
- } else {
- if (asmap->numParityFailed == 1)
- *createFunc = (RF_VoidFuncPtr) rf_CreateNonRedundantWriteDAG;
- else
- if (asmap->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit)
- *createFunc = NULL;
- else
- *createFunc = (RF_VoidFuncPtr) rf_CreateDegradedWriteDAG;
- }
- }
-}
diff --git a/sys/dev/raidframe/rf_raid5.h b/sys/dev/raidframe/rf_raid5.h
deleted file mode 100644
index 17549fe..0000000
--- a/sys/dev/raidframe/rf_raid5.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_raid5.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/* rf_raid5.h - header file for RAID Level 5 */
-
-#ifndef _RF__RF_RAID5_H_
-#define _RF__RF_RAID5_H_
-
-int
-rf_ConfigureRAID5(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr);
-int rf_GetDefaultNumFloatingReconBuffersRAID5(RF_Raid_t * raidPtr);
-RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitRAID5(RF_Raid_t * raidPtr);
-void
-rf_MapSectorRAID5(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
- RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap);
-void
-rf_MapParityRAID5(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
- RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap);
-void
-rf_IdentifyStripeRAID5(RF_Raid_t * raidPtr, RF_RaidAddr_t addr,
- RF_RowCol_t ** diskids, RF_RowCol_t * outRow);
-void
-rf_MapSIDToPSIDRAID5(RF_RaidLayout_t * layoutPtr,
- RF_StripeNum_t stripeID, RF_StripeNum_t * psID,
- RF_ReconUnitNum_t * which_ru);
-void
-rf_RaidFiveDagSelect(RF_Raid_t * raidPtr, RF_IoType_t type,
- RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc);
-
-#endif /* !_RF__RF_RAID5_H_ */
diff --git a/sys/dev/raidframe/rf_raid5_rotatedspare.c b/sys/dev/raidframe/rf_raid5_rotatedspare.c
deleted file mode 100644
index f167a5f..0000000
--- a/sys/dev/raidframe/rf_raid5_rotatedspare.c
+++ /dev/null
@@ -1,177 +0,0 @@
-/* $NetBSD: rf_raid5_rotatedspare.c,v 1.5 2001/01/26 05:16:58 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Khalil Amiri
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/**************************************************************************
- *
- * rf_raid5_rotated_spare.c -- implements RAID Level 5 with rotated sparing
- *
- **************************************************************************/
-
-#include <dev/raidframe/rf_archs.h>
-
-#if RF_INCLUDE_RAID5_RS > 0
-
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_raid5.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_dagutils.h>
-#include <dev/raidframe/rf_dagfuncs.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_utils.h>
-#include <dev/raidframe/rf_raid5_rotatedspare.h>
-
-typedef struct RF_Raid5RSConfigInfo_s {
- RF_RowCol_t **stripeIdentifier; /* filled in at config time & used by
- * IdentifyStripe */
-} RF_Raid5RSConfigInfo_t;
-
-int
-rf_ConfigureRAID5_RS(
- RF_ShutdownList_t ** listp,
- RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr)
-{
- RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
- RF_Raid5RSConfigInfo_t *info;
- RF_RowCol_t i, j, startdisk;
-
- /* create a RAID level 5 configuration structure */
- RF_MallocAndAdd(info, sizeof(RF_Raid5RSConfigInfo_t), (RF_Raid5RSConfigInfo_t *), raidPtr->cleanupList);
- if (info == NULL)
- return (ENOMEM);
- layoutPtr->layoutSpecificInfo = (void *) info;
-
- RF_ASSERT(raidPtr->numRow == 1);
- RF_ASSERT(raidPtr->numCol >= 3);
-
- /* the stripe identifier must identify the disks in each stripe, IN
- * THE ORDER THAT THEY APPEAR IN THE STRIPE. */
- info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, raidPtr->numCol, raidPtr->cleanupList);
- if (info->stripeIdentifier == NULL)
- return (ENOMEM);
- startdisk = 0;
- for (i = 0; i < raidPtr->numCol; i++) {
- for (j = 0; j < raidPtr->numCol; j++) {
- info->stripeIdentifier[i][j] = (startdisk + j) % raidPtr->numCol;
- }
- if ((--startdisk) < 0)
- startdisk = raidPtr->numCol - 1;
- }
-
- /* fill in the remaining layout parameters */
- layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk;
- layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
- layoutPtr->numDataCol = raidPtr->numCol - 2;
- layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
- layoutPtr->numParityCol = 1;
- layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk;
- raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
-
- raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
-
- return (0);
-}
-
-RF_ReconUnitCount_t
-rf_GetNumSpareRUsRAID5_RS(raidPtr)
- RF_Raid_t *raidPtr;
-{
- return (raidPtr->Layout.stripeUnitsPerDisk / raidPtr->numCol);
-}
-
-void
-rf_MapSectorRAID5_RS(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t raidSector,
- RF_RowCol_t * row,
- RF_RowCol_t * col,
- RF_SectorNum_t * diskSector,
- int remap)
-{
- RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
-
- *row = 0;
- if (remap) {
- *col = raidPtr->numCol - 1 - (1 + SUID / raidPtr->Layout.numDataCol) % raidPtr->numCol;
- *col = (*col + 1) % raidPtr->numCol; /* spare unit is rotated
- * with parity; line
- * above maps to parity */
- } else {
- *col = (SUID + (SUID / raidPtr->Layout.numDataCol)) % raidPtr->numCol;
- }
- *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit +
- (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
-}
-
-void
-rf_MapParityRAID5_RS(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t raidSector,
- RF_RowCol_t * row,
- RF_RowCol_t * col,
- RF_SectorNum_t * diskSector,
- int remap)
-{
- RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
-
- *row = 0;
- *col = raidPtr->numCol - 1 - (1 + SUID / raidPtr->Layout.numDataCol) % raidPtr->numCol;
- *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit +
- (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
- if (remap)
- *col = (*col + 1) % raidPtr->numCol;
-}
-
-void
-rf_IdentifyStripeRAID5_RS(
- RF_Raid_t * raidPtr,
- RF_RaidAddr_t addr,
- RF_RowCol_t ** diskids,
- RF_RowCol_t * outRow)
-{
- RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr);
- RF_Raid5RSConfigInfo_t *info = (RF_Raid5RSConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
- *outRow = 0;
- *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol];
-
-}
-
-void
-rf_MapSIDToPSIDRAID5_RS(
- RF_RaidLayout_t * layoutPtr,
- RF_StripeNum_t stripeID,
- RF_StripeNum_t * psID,
- RF_ReconUnitNum_t * which_ru)
-{
- *which_ru = 0;
- *psID = stripeID;
-}
-#endif /* RF_INCLUDE_RAID5_RS > 0 */
diff --git a/sys/dev/raidframe/rf_raid5_rotatedspare.h b/sys/dev/raidframe/rf_raid5_rotatedspare.h
deleted file mode 100644
index 779150f..0000000
--- a/sys/dev/raidframe/rf_raid5_rotatedspare.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_raid5_rotatedspare.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Khalil Amiri
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/* rf_raid5_rotatedspare.h - header file for RAID Level 5 with rotated sparing */
-
-#ifndef _RF__RF_RAID5_ROTATEDSPARE_H_
-#define _RF__RF_RAID5_ROTATEDSPARE_H_
-
-int
-rf_ConfigureRAID5_RS(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr);
-RF_ReconUnitCount_t rf_GetNumSpareRUsRAID5_RS(RF_Raid_t * raidPtr);
-void
-rf_MapSectorRAID5_RS(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
- RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap);
-void
-rf_MapParityRAID5_RS(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
- RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap);
-void
-rf_IdentifyStripeRAID5_RS(RF_Raid_t * raidPtr, RF_RaidAddr_t addr,
- RF_RowCol_t ** diskids, RF_RowCol_t * outRow);
-void
-rf_MapSIDToPSIDRAID5_RS(RF_RaidLayout_t * layoutPtr,
- RF_StripeNum_t stripeID, RF_StripeNum_t * psID,
- RF_ReconUnitNum_t * which_ru);
-
-#endif /* !_RF__RF_RAID5_ROTATEDSPARE_H_ */
diff --git a/sys/dev/raidframe/rf_raidframe.h b/sys/dev/raidframe/rf_raidframe.h
deleted file mode 100644
index fd711bd..0000000
--- a/sys/dev/raidframe/rf_raidframe.h
+++ /dev/null
@@ -1,162 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_raidframe.h,v 1.11 2000/05/28 00:48:31 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*****************************************************
- *
- * rf_raidframe.h
- *
- * main header file for using raidframe in the kernel.
- *
- *****************************************************/
-
-
-#ifndef _RF__RF_RAIDFRAME_H_
-#define _RF__RF_RAIDFRAME_H_
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_configure.h>
-#include <dev/raidframe/rf_disks.h>
-#include <dev/raidframe/rf_raid.h>
-
-typedef RF_uint32 RF_ReconReqFlags_t;
-
-struct rf_recon_req { /* used to tell the kernel to fail a disk */
- RF_RowCol_t row, col;
- RF_ReconReqFlags_t flags;
- void *raidPtr; /* used internally; need not be set at ioctl
- * time */
- struct rf_recon_req *next; /* used internally; need not be set at
- * ioctl time */
-};
-
-struct RF_SparetWait_s {
- int C, G, fcol; /* C = # disks in row, G = # units in stripe,
- * fcol = which disk has failed */
-
- RF_StripeCount_t SUsPerPU; /* this stuff is the info required to
- * create a spare table */
- int TablesPerSpareRegion;
- int BlocksPerTable;
- RF_StripeCount_t TableDepthInPUs;
- RF_StripeCount_t SpareSpaceDepthPerRegionInSUs;
-
- RF_SparetWait_t *next; /* used internally; need not be set at ioctl
- * time */
-};
-
-typedef struct RF_DeviceConfig_s {
- u_int rows;
- u_int cols;
- u_int maxqdepth;
- int ndevs;
- RF_RaidDisk_t devs[RF_MAX_DISKS];
- int nspares;
- RF_RaidDisk_t spares[RF_MAX_DISKS];
-} RF_DeviceConfig_t;
-
-typedef struct RF_ProgressInfo_s {
- RF_uint64 remaining;
- RF_uint64 completed;
- RF_uint64 total;
-} RF_ProgressInfo_t;
-
-/* flags that can be put in the rf_recon_req structure */
-#define RF_FDFLAGS_NONE 0x0 /* just fail the disk */
-#define RF_FDFLAGS_RECON 0x1 /* fail and initiate recon */
-
-#define RAIDFRAME_CONFIGURE _IOW ('r', 1, void *) /* config an array */
-#if defined(__NetBSD__)
-#define RAIDFRAME_SHUTDOWN _IO ('r', 2) /* shutdown the array */
-#elif defined(__FreeBSD__)
-#define RAIDFRAME_SHUTDOWN _IOW ('r', 2, int) /* shutdown the array */
-#endif
-#define RAIDFRAME_TUR _IOW ('r', 3, dev_t) /* debug only: test
- * ready */
-#define RAIDFRAME_TEST_ACC _IOWR('r', 4, struct rf_test_acc)
- /* run a test access */
-#define RAIDFRAME_FAIL_DISK _IOW ('r', 5, struct rf_recon_req)
- /* fail a disk &
- * optionally start
- * recon */
-#define RAIDFRAME_CHECK_RECON_STATUS _IOR('r', 6, int) /* get reconstruction %
- * complete on indicated
- * row */
-#define RAIDFRAME_REWRITEPARITY _IO ('r', 7) /* rewrite (initialize)
- * all parity */
-#define RAIDFRAME_COPYBACK _IO ('r', 8) /* copy reconstructed
- * data back to replaced
- * disk */
-#define RAIDFRAME_SPARET_WAIT _IOR ('r', 9, RF_SparetWait_t)
- /* does not return until
- * kernel needs a spare
- * table */
-#define RAIDFRAME_SEND_SPARET _IOW ('r', 10, void *) /* used to send a spare
- * table down into the
- * kernel */
-#define RAIDFRAME_ABORT_SPARET_WAIT _IO ('r', 11) /* used to wake up the
- * sparemap daemon &
- * tell it to exit */
-#define RAIDFRAME_START_ATRACE _IO ('r', 12) /* start tracing
- * accesses */
-#define RAIDFRAME_STOP_ATRACE _IO ('r', 13) /* stop tracing
- * accesses */
-#define RAIDFRAME_GET_SIZE _IOR ('r', 14, int) /* get size (# sectors)
- * in raid device */
-#define RAIDFRAME_GET_INFO _IOWR ('r', 15, RF_DeviceConfig_t *)
- /* get configuration */
-#define RAIDFRAME_RESET_ACCTOTALS _IO ('r', 16) /* reset AccTotals for
- * device */
-#define RAIDFRAME_GET_ACCTOTALS _IOR ('r', 17, RF_AccTotals_t)
- /* retrieve AccTotals
- * for device */
-#define RAIDFRAME_KEEP_ACCTOTALS _IOW ('r', 18, int) /* turn AccTotals on or
- * off for device */
-#define RAIDFRAME_GET_COMPONENT_LABEL _IOWR ('r', 19, RF_ComponentLabel_t)
-#define RAIDFRAME_SET_COMPONENT_LABEL _IOW ('r', 20, RF_ComponentLabel_t)
-
-#define RAIDFRAME_INIT_LABELS _IOW ('r', 21, RF_ComponentLabel_t)
-#define RAIDFRAME_ADD_HOT_SPARE _IOW ('r', 22, RF_SingleComponent_t)
-#define RAIDFRAME_REMOVE_HOT_SPARE _IOW ('r', 23, RF_SingleComponent_t)
-#define RAIDFRAME_REBUILD_IN_PLACE _IOW ('r', 24, RF_SingleComponent_t)
-#define RAIDFRAME_CHECK_PARITY _IOWR ('r', 25, int)
-#define RAIDFRAME_CHECK_PARITYREWRITE_STATUS _IOR ('r', 26, int)
-#define RAIDFRAME_CHECK_COPYBACK_STATUS _IOR ('r', 27, int)
-#define RAIDFRAME_SET_AUTOCONFIG _IOWR ('r', 28, int)
-#define RAIDFRAME_SET_ROOT _IOWR ('r', 29, int)
-#define RAIDFRAME_DELETE_COMPONENT _IOW ('r', 30, RF_SingleComponent_t)
-#define RAIDFRAME_INCORPORATE_HOT_SPARE _IOW ('r', 31, RF_SingleComponent_t)
-
-/* 'Extended' status versions */
-#define RAIDFRAME_CHECK_RECON_STATUS_EXT _IOR('r', 32, RF_ProgressInfo_t)
-#define RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT _IOR ('r', 33, \
- RF_ProgressInfo_t)
-#define RAIDFRAME_CHECK_COPYBACK_STATUS_EXT _IOR ('r', 34, RF_ProgressInfo_t)
-#define RAIDFRAME_GET_UNIT _IOWR ('r', 35, int)
-
-#endif /* !_RF__RF_RAIDFRAME_H_ */
diff --git a/sys/dev/raidframe/rf_reconbuffer.c b/sys/dev/raidframe/rf_reconbuffer.c
deleted file mode 100644
index 5831d5a..0000000
--- a/sys/dev/raidframe/rf_reconbuffer.c
+++ /dev/null
@@ -1,468 +0,0 @@
-/* $NetBSD: rf_reconbuffer.c,v 1.5 2001/01/27 20:10:49 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/***************************************************
- *
- * rf_reconbuffer.c -- reconstruction buffer manager
- *
- ***************************************************/
-
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_reconbuffer.h>
-#include <dev/raidframe/rf_acctrace.h>
-#include <dev/raidframe/rf_etimer.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_debugprint.h>
-#include <dev/raidframe/rf_revent.h>
-#include <dev/raidframe/rf_reconutil.h>
-#include <dev/raidframe/rf_nwayxor.h>
-
-#define Dprintf1(s,a) if (rf_reconbufferDebug) printf(s,a)
-#define Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s,a,b)
-#define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s,a,b,c)
-#define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s,a,b,c,d)
-#define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s,a,b,c,d,e)
-
-/*****************************************************************************
- *
- * Submit a reconstruction buffer to the manager for XOR. We can only
- * submit a buffer if (1) we can xor into an existing buffer, which
- * means we don't have to acquire a new one, (2) we can acquire a
- * floating recon buffer, or (3) the caller has indicated that we are
- * allowed to keep the submitted buffer.
- *
- * Returns non-zero if and only if we were not able to submit.
- * In this case, we append the current disk ID to the wait list on the
- * indicated RU, so that it will be re-enabled when we acquire a buffer
- * for this RU.
- *
- ****************************************************************************/
-
-/*
- * nWayXorFuncs[i] is a pointer to a function that will xor "i"
- * bufs into the accumulating sum.
- */
-static RF_VoidFuncPtr nWayXorFuncs[] = {
- NULL,
- (RF_VoidFuncPtr) rf_nWayXor1,
- (RF_VoidFuncPtr) rf_nWayXor2,
- (RF_VoidFuncPtr) rf_nWayXor3,
- (RF_VoidFuncPtr) rf_nWayXor4,
- (RF_VoidFuncPtr) rf_nWayXor5,
- (RF_VoidFuncPtr) rf_nWayXor6,
- (RF_VoidFuncPtr) rf_nWayXor7,
- (RF_VoidFuncPtr) rf_nWayXor8,
- (RF_VoidFuncPtr) rf_nWayXor9
-};
-
-int
-rf_SubmitReconBuffer(rbuf, keep_it, use_committed)
- RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */
- int keep_it; /* whether we can keep this buffer or we have
- * to return it */
- int use_committed; /* whether to use a committed or an available
- * recon buffer */
-{
- RF_LayoutSW_t *lp;
- int rc;
-
- lp = rbuf->raidPtr->Layout.map;
- rc = lp->SubmitReconBuffer(rbuf, keep_it, use_committed);
- return (rc);
-}
-
-int
-rf_SubmitReconBufferBasic(rbuf, keep_it, use_committed)
- RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */
- int keep_it; /* whether we can keep this buffer or we have
- * to return it */
- int use_committed; /* whether to use a committed or an available
- * recon buffer */
-{
- RF_Raid_t *raidPtr = rbuf->raidPtr;
- RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
- RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[rbuf->row];
- RF_ReconParityStripeStatus_t *pssPtr;
- RF_ReconBuffer_t *targetRbuf, *t = NULL; /* temporary rbuf
- * pointers */
- caddr_t ta; /* temporary data buffer pointer */
- RF_CallbackDesc_t *cb, *p;
- int retcode = 0, created = 0;
-
- RF_Etimer_t timer;
-
- /* makes no sense to have a submission from the failed disk */
- RF_ASSERT(rbuf);
- RF_ASSERT(rbuf->col != reconCtrlPtr->fcol);
-
- Dprintf5("RECON: submission by row %d col %d for psid %ld ru %d (failed offset %ld)\n",
- rbuf->row, rbuf->col, (long) rbuf->parityStripeID, rbuf->which_ru, (long) rbuf->failedDiskSectorOffset);
-
- RF_LOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
-
- RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
-
- pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created);
- RF_ASSERT(pssPtr); /* if it didn't exist, we wouldn't have gotten
- * an rbuf for it */
-
- /* check to see if enough buffers have accumulated to do an XOR. If
- * so, there's no need to acquire a floating rbuf. Before we can do
- * any XORing, we must have acquired a destination buffer. If we
- * have, then we can go ahead and do the XOR if (1) including this
- * buffer, enough bufs have accumulated, or (2) this is the last
- * submission for this stripe. Otherwise, we have to go acquire a
- * floating rbuf. */
-
- targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
- if ((targetRbuf != NULL) &&
- ((pssPtr->xorBufCount == rf_numBufsToAccumulate - 1) || (targetRbuf->count + pssPtr->xorBufCount + 1 == layoutPtr->numDataCol))) {
- pssPtr->rbufsForXor[pssPtr->xorBufCount++] = rbuf; /* install this buffer */
- Dprintf3("RECON: row %d col %d invoking a %d-way XOR\n", rbuf->row, rbuf->col, pssPtr->xorBufCount);
- RF_ETIMER_START(timer);
- rf_MultiWayReconXor(raidPtr, pssPtr);
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- raidPtr->accumXorTimeUs += RF_ETIMER_VAL_US(timer);
- if (!keep_it) {
- raidPtr->recon_tracerecs[rbuf->col].xor_us = RF_ETIMER_VAL_US(timer);
- RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
- RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
- raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us +=
- RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
- RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
-
- rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
- }
- rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol);
-
- /* if use_committed is on, we _must_ consume a buffer off the
- * committed list. */
- if (use_committed) {
- t = reconCtrlPtr->committedRbufs;
- RF_ASSERT(t);
- reconCtrlPtr->committedRbufs = t->next;
- rf_ReleaseFloatingReconBuffer(raidPtr, rbuf->row, t);
- }
- if (keep_it) {
- RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
- RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
- rf_FreeReconBuffer(rbuf);
- return (retcode);
- }
- goto out;
- }
- /* set the value of "t", which we'll use as the rbuf from here on */
- if (keep_it) {
- t = rbuf;
- } else {
- if (use_committed) { /* if a buffer has been committed to
- * us, use it */
- t = reconCtrlPtr->committedRbufs;
- RF_ASSERT(t);
- reconCtrlPtr->committedRbufs = t->next;
- t->next = NULL;
- } else
- if (reconCtrlPtr->floatingRbufs) {
- t = reconCtrlPtr->floatingRbufs;
- reconCtrlPtr->floatingRbufs = t->next;
- t->next = NULL;
- }
- }
-
- /* If we weren't able to acquire a buffer, append to the end of the
- * buf list in the recon ctrl struct. */
- if (!t) {
- RF_ASSERT(!keep_it && !use_committed);
- Dprintf2("RECON: row %d col %d failed to acquire floating rbuf\n", rbuf->row, rbuf->col);
-
- raidPtr->procsInBufWait++;
- if ((raidPtr->procsInBufWait == raidPtr->numCol - 1) && (raidPtr->numFullReconBuffers == 0)) {
- printf("Buffer wait deadlock detected. Exiting.\n");
- rf_PrintPSStatusTable(raidPtr, rbuf->row);
- RF_PANIC();
- }
- pssPtr->flags |= RF_PSS_BUFFERWAIT;
- cb = rf_AllocCallbackDesc(); /* append to buf wait list in
- * recon ctrl structure */
- cb->row = rbuf->row;
- cb->col = rbuf->col;
- cb->callbackArg.v = rbuf->parityStripeID;
- cb->callbackArg2.v = rbuf->which_ru;
- cb->next = NULL;
- if (!reconCtrlPtr->bufferWaitList)
- reconCtrlPtr->bufferWaitList = cb;
- else { /* might want to maintain head/tail pointers
- * here rather than search for end of list */
- for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next);
- p->next = cb;
- }
- retcode = 1;
- goto out;
- }
- Dprintf2("RECON: row %d col %d acquired rbuf\n", rbuf->row, rbuf->col);
- RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
- RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
- raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us +=
- RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
- RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
-
- rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
-
- /* initialize the buffer */
- if (t != rbuf) {
- t->row = rbuf->row;
- t->col = reconCtrlPtr->fcol;
- t->parityStripeID = rbuf->parityStripeID;
- t->which_ru = rbuf->which_ru;
- t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset;
- t->spRow = rbuf->spRow;
- t->spCol = rbuf->spCol;
- t->spOffset = rbuf->spOffset;
-
- ta = t->buffer;
- t->buffer = rbuf->buffer;
- rbuf->buffer = ta; /* swap buffers */
- }
- /* the first installation always gets installed as the destination
- * buffer. subsequent installations get stacked up to allow for
- * multi-way XOR */
- if (!pssPtr->rbuf) {
- pssPtr->rbuf = t;
- t->count = 1;
- } else
- pssPtr->rbufsForXor[pssPtr->xorBufCount++] = t; /* install this buffer */
-
- rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol); /* the buffer is full if
- * G=2 */
-
-out:
- RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
- RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
- return (retcode);
-}
-
-int
-rf_MultiWayReconXor(raidPtr, pssPtr)
- RF_Raid_t *raidPtr;
- RF_ReconParityStripeStatus_t *pssPtr; /* the pss descriptor for this
- * parity stripe */
-{
- int i, numBufs = pssPtr->xorBufCount;
- int numBytes = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU);
- RF_ReconBuffer_t **rbufs = (RF_ReconBuffer_t **) pssPtr->rbufsForXor;
- RF_ReconBuffer_t *targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
-
- RF_ASSERT(pssPtr->rbuf != NULL);
- RF_ASSERT(numBufs > 0 && numBufs < RF_PS_MAX_BUFS);
-#ifdef _KERNEL
-#ifndef __NetBSD__
-#ifndef __FreeBSD__
- thread_block(); /* yield the processor before doing a big XOR */
-#endif
-#endif
-#endif /* _KERNEL */
- /*
- * XXX
- *
- * What if more than 9 bufs?
- */
- nWayXorFuncs[numBufs] (pssPtr->rbufsForXor, targetRbuf, numBytes / sizeof(long));
-
- /* release all the reconstruction buffers except the last one, which
- * belongs to the disk whose submission caused this XOR to take place */
- for (i = 0; i < numBufs - 1; i++) {
- if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING)
- rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]->row, rbufs[i]);
- else
- if (rbufs[i]->type == RF_RBUF_TYPE_FORCED)
- rf_FreeReconBuffer(rbufs[i]);
- else
- RF_ASSERT(0);
- }
- targetRbuf->count += pssPtr->xorBufCount;
- pssPtr->xorBufCount = 0;
- return (0);
-}
-/* removes one full buffer from one of the full-buffer lists and returns it.
- *
- * ASSUMES THE RB_MUTEX IS UNLOCKED AT ENTRY.
- */
-RF_ReconBuffer_t *
-rf_GetFullReconBuffer(reconCtrlPtr)
- RF_ReconCtrl_t *reconCtrlPtr;
-{
- RF_ReconBuffer_t *p;
-
- RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
-
- if ((p = reconCtrlPtr->priorityList) != NULL) {
- reconCtrlPtr->priorityList = p->next;
- p->next = NULL;
- goto out;
- }
- if ((p = reconCtrlPtr->fullBufferList) != NULL) {
- reconCtrlPtr->fullBufferList = p->next;
- p->next = NULL;
- goto out;
- }
-out:
- RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
- return (p);
-}
-
-
-/* if the reconstruction buffer is full, move it to the full list,
- * which is maintained sorted by failed disk sector offset
- *
- * ASSUMES THE RB_MUTEX IS LOCKED AT ENTRY. */
-int
-rf_CheckForFullRbuf(raidPtr, reconCtrl, pssPtr, numDataCol)
- RF_Raid_t *raidPtr;
- RF_ReconCtrl_t *reconCtrl;
- RF_ReconParityStripeStatus_t *pssPtr;
- int numDataCol;
-{
- RF_ReconBuffer_t *p, *pt, *rbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
-
- if (rbuf->count == numDataCol) {
- raidPtr->numFullReconBuffers++;
- Dprintf2("RECON: rbuf for psid %ld ru %d has filled\n",
- (long) rbuf->parityStripeID, rbuf->which_ru);
- if (!reconCtrl->fullBufferList || (rbuf->failedDiskSectorOffset < reconCtrl->fullBufferList->failedDiskSectorOffset)) {
- Dprintf2("RECON: rbuf for psid %ld ru %d is head of list\n",
- (long) rbuf->parityStripeID, rbuf->which_ru);
- rbuf->next = reconCtrl->fullBufferList;
- reconCtrl->fullBufferList = rbuf;
- } else {
- for (pt = reconCtrl->fullBufferList, p = pt->next; p && p->failedDiskSectorOffset < rbuf->failedDiskSectorOffset; pt = p, p = p->next);
- rbuf->next = p;
- pt->next = rbuf;
- Dprintf2("RECON: rbuf for psid %ld ru %d is in list\n",
- (long) rbuf->parityStripeID, rbuf->which_ru);
- }
-#if 0
- pssPtr->writeRbuf = pssPtr->rbuf; /* DEBUG ONLY: we like
- * to be able to find
- * this rbuf while it's
- * awaiting write */
-#else
- rbuf->pssPtr = pssPtr;
-#endif
- pssPtr->rbuf = NULL;
- rf_CauseReconEvent(raidPtr, rbuf->row, rbuf->col, NULL, RF_REVENT_BUFREADY);
- }
- return (0);
-}
-
-
-/* release a floating recon buffer for someone else to use.
- * assumes the rb_mutex is LOCKED at entry
- */
-void
-rf_ReleaseFloatingReconBuffer(raidPtr, row, rbuf)
- RF_Raid_t *raidPtr;
- RF_RowCol_t row;
- RF_ReconBuffer_t *rbuf;
-{
- RF_ReconCtrl_t *rcPtr = raidPtr->reconControl[row];
- RF_CallbackDesc_t *cb;
-
- Dprintf2("RECON: releasing rbuf for psid %ld ru %d\n",
- (long) rbuf->parityStripeID, rbuf->which_ru);
-
- /* if anyone is waiting on buffers, wake one of them up. They will
- * subsequently wake up anyone else waiting on their RU */
- if (rcPtr->bufferWaitList) {
- rbuf->next = rcPtr->committedRbufs;
- rcPtr->committedRbufs = rbuf;
- cb = rcPtr->bufferWaitList;
- rcPtr->bufferWaitList = cb->next;
- rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 1, RF_REVENT_BUFCLEAR); /* arg==1 => we've
- * committed a buffer */
- rf_FreeCallbackDesc(cb);
- raidPtr->procsInBufWait--;
- } else {
- rbuf->next = rcPtr->floatingRbufs;
- rcPtr->floatingRbufs = rbuf;
- }
-}
-/* release any disk that is waiting on a buffer for the indicated RU.
- * assumes the rb_mutex is LOCKED at entry
- */
-void
-rf_ReleaseBufferWaiters(raidPtr, pssPtr)
- RF_Raid_t *raidPtr;
- RF_ReconParityStripeStatus_t *pssPtr;
-{
- RF_CallbackDesc_t *cb1, *cb = pssPtr->bufWaitList;
-
- Dprintf2("RECON: releasing buf waiters for psid %ld ru %d\n",
- (long) pssPtr->parityStripeID, pssPtr->which_ru);
- pssPtr->flags &= ~RF_PSS_BUFFERWAIT;
- while (cb) {
- cb1 = cb->next;
- cb->next = NULL;
- rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFCLEAR); /* arg==0 => we haven't
- * committed a buffer */
- rf_FreeCallbackDesc(cb);
- cb = cb1;
- }
- pssPtr->bufWaitList = NULL;
-}
-/* when reconstruction is forced on an RU, there may be some disks waiting to
- * acquire a buffer for that RU. Since we allocate a new buffer as part of
- * the forced-reconstruction process, we no longer have to wait for any
- * buffers, so we wakeup any waiter that we find in the bufferWaitList
- *
- * assumes the rb_mutex is LOCKED at entry
- */
-void
-rf_ReleaseBufferWaiter(rcPtr, rbuf)
- RF_ReconCtrl_t *rcPtr;
- RF_ReconBuffer_t *rbuf;
-{
- RF_CallbackDesc_t *cb, *cbt;
-
- for (cbt = NULL, cb = rcPtr->bufferWaitList; cb; cbt = cb, cb = cb->next) {
- if ((cb->callbackArg.v == rbuf->parityStripeID) && (cb->callbackArg2.v == rbuf->which_ru)) {
- Dprintf2("RECON: Dropping row %d col %d from buffer wait list\n", cb->row, cb->col);
- if (cbt)
- cbt->next = cb->next;
- else
- rcPtr->bufferWaitList = cb->next;
- rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFREADY); /* arg==0 => no
- * committed buffer */
- rf_FreeCallbackDesc(cb);
- return;
- }
- }
-}
diff --git a/sys/dev/raidframe/rf_reconbuffer.h b/sys/dev/raidframe/rf_reconbuffer.h
deleted file mode 100644
index 1a5407e..0000000
--- a/sys/dev/raidframe/rf_reconbuffer.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_reconbuffer.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*******************************************************************
- *
- * rf_reconbuffer.h -- header file for reconstruction buffer manager
- *
- *******************************************************************/
-
-#ifndef _RF__RF_RECONBUFFER_H_
-#define _RF__RF_RECONBUFFER_H_
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_reconstruct.h>
-
-int
-rf_SubmitReconBuffer(RF_ReconBuffer_t * rbuf, int keep_int,
- int use_committed);
-int
-rf_SubmitReconBufferBasic(RF_ReconBuffer_t * rbuf, int keep_int,
- int use_committed);
-int
-rf_MultiWayReconXor(RF_Raid_t * raidPtr,
- RF_ReconParityStripeStatus_t * pssPtr);
-RF_ReconBuffer_t *rf_GetFullReconBuffer(RF_ReconCtrl_t * reconCtrlPtr);
-int
-rf_CheckForFullRbuf(RF_Raid_t * raidPtr, RF_ReconCtrl_t * reconCtrl,
- RF_ReconParityStripeStatus_t * pssPtr, int numDataCol);
-void
-rf_ReleaseFloatingReconBuffer(RF_Raid_t * raidPtr, RF_RowCol_t row,
- RF_ReconBuffer_t * rbuf);
-void
-rf_ReleaseBufferWaiters(RF_Raid_t * raidPtr,
- RF_ReconParityStripeStatus_t * pssPtr);
-void rf_ReleaseBufferWaiter(RF_ReconCtrl_t * rcPtr, RF_ReconBuffer_t * rbuf);
-
-#endif /* !_RF__RF_RECONBUFFER_H_ */
diff --git a/sys/dev/raidframe/rf_reconmap.c b/sys/dev/raidframe/rf_reconmap.c
deleted file mode 100644
index 261d339..0000000
--- a/sys/dev/raidframe/rf_reconmap.c
+++ /dev/null
@@ -1,396 +0,0 @@
-/* $NetBSD: rf_reconmap.c,v 1.6 1999/08/14 21:44:24 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*************************************************************************
- * rf_reconmap.c
- *
- * code to maintain a map of what sectors have/have not been reconstructed
- *
- *************************************************************************/
-
-#include <dev/raidframe/rf_raid.h>
-#include <sys/time.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_utils.h>
-
-/* special pointer values indicating that a reconstruction unit
- * has been either totally reconstructed or not at all. Both
- * are illegal pointer values, so you have to be careful not to
- * dereference through them. RU_NOTHING must be zero, since
- * MakeReconMap uses bzero to initialize the structure. These are used
- * only at the head of the list.
- */
-#define RU_ALL ((RF_ReconMapListElem_t *) -1)
-#define RU_NOTHING ((RF_ReconMapListElem_t *) 0)
-
-/* used to mark the end of the list */
-#define RU_NIL ((RF_ReconMapListElem_t *) 0)
-
-
-static void
-compact_stat_entry(RF_Raid_t * raidPtr, RF_ReconMap_t * mapPtr,
- int i);
-static void crunch_list(RF_ReconMap_t * mapPtr, RF_ReconMapListElem_t * listPtr);
-static RF_ReconMapListElem_t *
-MakeReconMapListElem(RF_SectorNum_t startSector,
- RF_SectorNum_t stopSector, RF_ReconMapListElem_t * next);
-static void
-FreeReconMapListElem(RF_ReconMap_t * mapPtr,
- RF_ReconMapListElem_t * p);
-static void update_size(RF_ReconMap_t * mapPtr, int size);
-static void PrintList(RF_ReconMapListElem_t * listPtr);
-
-/*-----------------------------------------------------------------------------
- *
- * Creates and initializes new Reconstruction map
- *
- *-----------------------------------------------------------------------------*/
-
-RF_ReconMap_t *
-rf_MakeReconMap(raidPtr, ru_sectors, disk_sectors, spareUnitsPerDisk)
- RF_Raid_t *raidPtr;
- RF_SectorCount_t ru_sectors; /* size of reconstruction unit in
- * sectors */
- RF_SectorCount_t disk_sectors; /* size of disk in sectors */
- RF_ReconUnitCount_t spareUnitsPerDisk; /* zero unless distributed
- * sparing */
-{
- RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
- RF_ReconUnitCount_t num_rus = layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerRU;
- RF_ReconMap_t *p;
- int rc;
-
- RF_Malloc(p, sizeof(RF_ReconMap_t), (RF_ReconMap_t *));
- p->sectorsPerReconUnit = ru_sectors;
- p->sectorsInDisk = disk_sectors;
-
- p->totalRUs = num_rus;
- p->spareRUs = spareUnitsPerDisk;
- p->unitsLeft = num_rus - spareUnitsPerDisk;
-
- RF_Malloc(p->status, num_rus * sizeof(RF_ReconMapListElem_t *), (RF_ReconMapListElem_t **));
- RF_ASSERT(p->status != (RF_ReconMapListElem_t **) NULL);
-
- (void) bzero((char *) p->status, num_rus * sizeof(RF_ReconMapListElem_t *));
-
- p->size = sizeof(RF_ReconMap_t) + num_rus * sizeof(RF_ReconMapListElem_t *);
- p->maxSize = p->size;
-
- rc = rf_mutex_init(&p->mutex, __FUNCTION__);
- if (rc) {
- RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- RF_Free(p->status, num_rus * sizeof(RF_ReconMapListElem_t *));
- RF_Free(p, sizeof(RF_ReconMap_t));
- return (NULL);
- }
- return (p);
-}
-
-
-/*-----------------------------------------------------------------------------
- *
- * marks a new set of sectors as reconstructed. All the possible mergings get
- * complicated. To simplify matters, the approach I take is to just dump
- * something into the list, and then clean it up (i.e. merge elements and
- * eliminate redundant ones) in a second pass over the list (compact_stat_entry()).
- * Not 100% efficient, since a structure can be allocated and then immediately
- * freed, but it keeps this code from becoming (more of) a nightmare of
- * special cases. The only thing that compact_stat_entry() assumes is that the
- * list is sorted by startSector, and so this is the only condition I maintain
- * here. (MCH)
- *
- *-----------------------------------------------------------------------------*/
-
-void
-rf_ReconMapUpdate(raidPtr, mapPtr, startSector, stopSector)
- RF_Raid_t *raidPtr;
- RF_ReconMap_t *mapPtr;
- RF_SectorNum_t startSector;
- RF_SectorNum_t stopSector;
-{
- RF_SectorCount_t sectorsPerReconUnit = mapPtr->sectorsPerReconUnit;
- RF_SectorNum_t i, first_in_RU, last_in_RU;
- RF_ReconMapListElem_t *p, *pt;
-
- RF_LOCK_MUTEX(mapPtr->mutex);
- RF_ASSERT(startSector >= 0 && stopSector < mapPtr->sectorsInDisk && stopSector >= startSector);
-
- while (startSector <= stopSector) {
- i = startSector / mapPtr->sectorsPerReconUnit;
- first_in_RU = i * sectorsPerReconUnit;
- last_in_RU = first_in_RU + sectorsPerReconUnit - 1;
- p = mapPtr->status[i];
- if (p != RU_ALL) {
- if (p == RU_NOTHING || p->startSector > startSector) { /* insert at front of
- * list */
-
- mapPtr->status[i] = MakeReconMapListElem(startSector, RF_MIN(stopSector, last_in_RU), (p == RU_NOTHING) ? NULL : p);
- update_size(mapPtr, sizeof(RF_ReconMapListElem_t));
-
- } else {/* general case */
- do { /* search for place to insert */
- pt = p;
- p = p->next;
- } while (p && (p->startSector < startSector));
- pt->next = MakeReconMapListElem(startSector, RF_MIN(stopSector, last_in_RU), p);
- update_size(mapPtr, sizeof(RF_ReconMapListElem_t));
- }
- compact_stat_entry(raidPtr, mapPtr, i);
- }
- startSector = RF_MIN(stopSector, last_in_RU) + 1;
- }
- RF_UNLOCK_MUTEX(mapPtr->mutex);
-}
-
-
-
-/*-----------------------------------------------------------------------------
- *
- * performs whatever list compactions can be done, and frees any space
- * that is no longer necessary. Assumes only that the list is sorted
- * by startSector. crunch_list() compacts a single list as much as possible,
- * and the second block of code deletes the entire list if possible.
- * crunch_list() is also called from MakeReconMapAccessList().
- *
- * When a recon unit is detected to be fully reconstructed, we set the
- * corresponding bit in the parity stripe map so that the head follow
- * code will not select this parity stripe again. This is redundant (but
- * harmless) when compact_stat_entry is called from the reconstruction code,
- * but necessary when called from the user-write code.
- *
- *-----------------------------------------------------------------------------*/
-
-static void
-compact_stat_entry(raidPtr, mapPtr, i)
- RF_Raid_t *raidPtr;
- RF_ReconMap_t *mapPtr;
- int i;
-{
- RF_SectorCount_t sectorsPerReconUnit = mapPtr->sectorsPerReconUnit;
- RF_ReconMapListElem_t *p = mapPtr->status[i];
-
- crunch_list(mapPtr, p);
-
- if ((p->startSector == i * sectorsPerReconUnit) &&
- (p->stopSector == i * sectorsPerReconUnit + sectorsPerReconUnit - 1)) {
- mapPtr->status[i] = RU_ALL;
- mapPtr->unitsLeft--;
- FreeReconMapListElem(mapPtr, p);
- }
-}
-
-static void
-crunch_list(mapPtr, listPtr)
- RF_ReconMap_t *mapPtr;
- RF_ReconMapListElem_t *listPtr;
-{
- RF_ReconMapListElem_t *pt, *p = listPtr;
-
- if (!p)
- return;
- pt = p;
- p = p->next;
- while (p) {
- if (pt->stopSector >= p->startSector - 1) {
- pt->stopSector = RF_MAX(pt->stopSector, p->stopSector);
- pt->next = p->next;
- FreeReconMapListElem(mapPtr, p);
- p = pt->next;
- } else {
- pt = p;
- p = p->next;
- }
- }
-}
-/*-----------------------------------------------------------------------------
- *
- * Allocate and fill a new list element
- *
- *-----------------------------------------------------------------------------*/
-
-static RF_ReconMapListElem_t *
-MakeReconMapListElem(
- RF_SectorNum_t startSector,
- RF_SectorNum_t stopSector,
- RF_ReconMapListElem_t * next)
-{
- RF_ReconMapListElem_t *p;
-
- RF_Malloc(p, sizeof(RF_ReconMapListElem_t), (RF_ReconMapListElem_t *));
- if (p == NULL)
- return (NULL);
- p->startSector = startSector;
- p->stopSector = stopSector;
- p->next = next;
- return (p);
-}
-/*-----------------------------------------------------------------------------
- *
- * Free a list element
- *
- *-----------------------------------------------------------------------------*/
-
-static void
-FreeReconMapListElem(mapPtr, p)
- RF_ReconMap_t *mapPtr;
- RF_ReconMapListElem_t *p;
-{
- int delta;
-
- if (mapPtr) {
- delta = 0 - (int) sizeof(RF_ReconMapListElem_t);
- update_size(mapPtr, delta);
- }
- RF_Free(p, sizeof(*p));
-}
-/*-----------------------------------------------------------------------------
- *
- * Free an entire status structure. Inefficient, but can be called at any time.
- *
- *-----------------------------------------------------------------------------*/
-void
-rf_FreeReconMap(mapPtr)
- RF_ReconMap_t *mapPtr;
-{
- RF_ReconMapListElem_t *p, *q;
- RF_ReconUnitCount_t numRUs;
- RF_ReconUnitNum_t i;
-
- numRUs = mapPtr->sectorsInDisk / mapPtr->sectorsPerReconUnit;
- if (mapPtr->sectorsInDisk % mapPtr->sectorsPerReconUnit)
- numRUs++;
-
- for (i = 0; i < numRUs; i++) {
- p = mapPtr->status[i];
- while (p != RU_NOTHING && p != RU_ALL) {
- q = p;
- p = p->next;
- RF_Free(q, sizeof(*q));
- }
- }
- rf_mutex_destroy(&mapPtr->mutex);
- RF_Free(mapPtr->status, mapPtr->totalRUs * sizeof(RF_ReconMapListElem_t *));
- RF_Free(mapPtr, sizeof(RF_ReconMap_t));
-}
-/*-----------------------------------------------------------------------------
- *
- * returns nonzero if the indicated RU has been reconstructed already
- *
- *---------------------------------------------------------------------------*/
-
-int
-rf_CheckRUReconstructed(mapPtr, startSector)
- RF_ReconMap_t *mapPtr;
- RF_SectorNum_t startSector;
-{
- RF_ReconMapListElem_t *l; /* used for searching */
- RF_ReconUnitNum_t i;
-
- i = startSector / mapPtr->sectorsPerReconUnit;
- l = mapPtr->status[i];
- return ((l == RU_ALL) ? 1 : 0);
-}
-
-RF_ReconUnitCount_t
-rf_UnitsLeftToReconstruct(mapPtr)
- RF_ReconMap_t *mapPtr;
-{
- RF_ASSERT(mapPtr != NULL);
- return (mapPtr->unitsLeft);
-}
-/* updates the size fields of a status descriptor */
-static void
-update_size(mapPtr, size)
- RF_ReconMap_t *mapPtr;
- int size;
-{
- mapPtr->size += size;
- mapPtr->maxSize = RF_MAX(mapPtr->size, mapPtr->maxSize);
-}
-
-static void
-PrintList(listPtr)
- RF_ReconMapListElem_t *listPtr;
-{
- while (listPtr) {
- printf("%d,%d -> ", (int) listPtr->startSector, (int) listPtr->stopSector);
- listPtr = listPtr->next;
- }
- printf("\n");
-}
-
-void
-rf_PrintReconMap(raidPtr, mapPtr, frow, fcol)
- RF_Raid_t *raidPtr;
- RF_ReconMap_t *mapPtr;
- RF_RowCol_t frow;
- RF_RowCol_t fcol;
-{
- RF_ReconUnitCount_t numRUs;
- RF_ReconMapListElem_t *p;
- RF_ReconUnitNum_t i;
-
- numRUs = mapPtr->totalRUs;
- if (mapPtr->sectorsInDisk % mapPtr->sectorsPerReconUnit)
- numRUs++;
-
- for (i = 0; i < numRUs; i++) {
- p = mapPtr->status[i];
- if (p == RU_ALL)/* printf("[%d] ALL\n",i) */
- ;
- else
- if (p == RU_NOTHING) {
- printf("%d: Unreconstructed\n", i);
- } else {
- printf("%d: ", i);
- PrintList(p);
- }
- }
-}
-
-void
-rf_PrintReconSchedule(mapPtr, starttime)
- RF_ReconMap_t *mapPtr;
- struct timeval *starttime;
-{
- static int old_pctg = -1;
- struct timeval tv, diff;
- int new_pctg;
-
- new_pctg = 100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 / mapPtr->totalRUs);
- if (new_pctg != old_pctg) {
- RF_GETTIME(tv);
- RF_TIMEVAL_DIFF(starttime, &tv, &diff);
- printf("%d %d.%06d\n", (int) new_pctg, (int) diff.tv_sec, (int) diff.tv_usec);
- old_pctg = new_pctg;
- }
-}
diff --git a/sys/dev/raidframe/rf_reconmap.h b/sys/dev/raidframe/rf_reconmap.h
deleted file mode 100644
index 2fee059..0000000
--- a/sys/dev/raidframe/rf_reconmap.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_reconmap.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/******************************************************************************
- * rf_reconMap.h -- Header file describing reconstruction status data structure
- ******************************************************************************/
-
-#ifndef _RF__RF_RECONMAP_H_
-#define _RF__RF_RECONMAP_H_
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_threadstuff.h>
-
-/*
- * Main reconstruction status descriptor. size and maxsize are used for
- * monitoring only: they have no function for reconstruction.
- */
-struct RF_ReconMap_s {
- RF_SectorCount_t sectorsPerReconUnit; /* sectors per reconstruct
- * unit */
- RF_SectorCount_t sectorsInDisk; /* total sectors in disk */
- RF_SectorCount_t unitsLeft; /* recon units left to recon */
- RF_ReconUnitCount_t totalRUs; /* total recon units on disk */
- RF_ReconUnitCount_t spareRUs; /* total number of spare RUs on failed
- * disk */
- RF_StripeCount_t totalParityStripes; /* total number of parity
- * stripes in array */
- u_int size; /* overall size of this structure */
- u_int maxSize; /* maximum size so far */
- RF_ReconMapListElem_t **status; /* array of ptrs to list elements */
- RF_DECLARE_MUTEX(mutex)
-};
-/* a list element */
-struct RF_ReconMapListElem_s {
- RF_SectorNum_t startSector; /* bounding sect nums on this block */
- RF_SectorNum_t stopSector;
- RF_ReconMapListElem_t *next; /* next element in list */
-};
-
-RF_ReconMap_t *
-rf_MakeReconMap(RF_Raid_t * raidPtr, RF_SectorCount_t ru_sectors,
- RF_SectorCount_t disk_sectors, RF_ReconUnitCount_t spareUnitsPerDisk);
-
-void
-rf_ReconMapUpdate(RF_Raid_t * raidPtr, RF_ReconMap_t * mapPtr,
- RF_SectorNum_t startSector, RF_SectorNum_t stopSector);
-
-void rf_FreeReconMap(RF_ReconMap_t * mapPtr);
-
-int rf_CheckRUReconstructed(RF_ReconMap_t * mapPtr, RF_SectorNum_t startSector);
-
-RF_ReconUnitCount_t rf_UnitsLeftToReconstruct(RF_ReconMap_t * mapPtr);
-
-void
-rf_PrintReconMap(RF_Raid_t * raidPtr, RF_ReconMap_t * mapPtr,
- RF_RowCol_t frow, RF_RowCol_t fcol);
-
-void rf_PrintReconSchedule(RF_ReconMap_t * mapPtr, struct timeval * starttime);
-
-#endif /* !_RF__RF_RECONMAP_H_ */
diff --git a/sys/dev/raidframe/rf_reconstruct.c b/sys/dev/raidframe/rf_reconstruct.c
deleted file mode 100644
index e24d440..0000000
--- a/sys/dev/raidframe/rf_reconstruct.c
+++ /dev/null
@@ -1,1682 +0,0 @@
-/* $NetBSD: rf_reconstruct.c,v 1.27 2001/01/26 02:16:24 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/************************************************************
- *
- * rf_reconstruct.c -- code to perform on-line reconstruction
- *
- ************************************************************/
-
-#include <dev/raidframe/rf_types.h>
-#include <sys/time.h>
-#if defined(__FreeBSD__)
-#include <sys/systm.h>
-#if __FreeBSD_version > 500005
-#include <sys/bio.h>
-#endif
-#endif
-#include <sys/buf.h>
-#include <sys/errno.h>
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/proc.h>
-#if defined(__NetBSD__)
-#include <sys/ioctl.h>
-#elif defined(__FreeBSD__)
-#include <sys/ioccom.h>
-#endif
-#include <sys/fcntl.h>
-#include <sys/vnode.h>
-
-
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_reconutil.h>
-#include <dev/raidframe/rf_revent.h>
-#include <dev/raidframe/rf_reconbuffer.h>
-#include <dev/raidframe/rf_acctrace.h>
-#include <dev/raidframe/rf_etimer.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_desc.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_freelist.h>
-#include <dev/raidframe/rf_debugprint.h>
-#include <dev/raidframe/rf_driver.h>
-#include <dev/raidframe/rf_utils.h>
-#include <dev/raidframe/rf_shutdown.h>
-
-#include <dev/raidframe/rf_kintf.h>
-
-/* setting these to -1 causes them to be set to their default values if not set by debug options */
-
-#define Dprintf(s) if (rf_reconDebug) rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
-#define Dprintf1(s,a) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL)
-#define Dprintf2(s,a,b) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL)
-#define Dprintf3(s,a,b,c) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL)
-#define Dprintf4(s,a,b,c,d) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),NULL,NULL,NULL,NULL)
-#define Dprintf5(s,a,b,c,d,e) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),NULL,NULL,NULL)
-#define Dprintf6(s,a,b,c,d,e,f) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),NULL,NULL)
-#define Dprintf7(s,a,b,c,d,e,f,g) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),(void *)((unsigned long)g),NULL)
-
-#define DDprintf1(s,a) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL)
-#define DDprintf2(s,a,b) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL)
-
-static RF_FreeList_t *rf_recond_freelist;
-#define RF_MAX_FREE_RECOND 4
-#define RF_RECOND_INC 1
-
-static RF_RaidReconDesc_t *
-AllocRaidReconDesc(RF_Raid_t * raidPtr,
- RF_RowCol_t row, RF_RowCol_t col, RF_RaidDisk_t * spareDiskPtr,
- int numDisksDone, RF_RowCol_t srow, RF_RowCol_t scol);
-static void FreeReconDesc(RF_RaidReconDesc_t * reconDesc);
-static int
-ProcessReconEvent(RF_Raid_t * raidPtr, RF_RowCol_t frow,
- RF_ReconEvent_t * event);
-static int
-IssueNextReadRequest(RF_Raid_t * raidPtr, RF_RowCol_t row,
- RF_RowCol_t col);
-static int TryToRead(RF_Raid_t * raidPtr, RF_RowCol_t row, RF_RowCol_t col);
-static int
-ComputePSDiskOffsets(RF_Raid_t * raidPtr, RF_StripeNum_t psid,
- RF_RowCol_t row, RF_RowCol_t col, RF_SectorNum_t * outDiskOffset,
- RF_SectorNum_t * outFailedDiskSectorOffset, RF_RowCol_t * spRow,
- RF_RowCol_t * spCol, RF_SectorNum_t * spOffset);
-static int IssueNextWriteRequest(RF_Raid_t * raidPtr, RF_RowCol_t row);
-static int ReconReadDoneProc(void *arg, int status);
-static int ReconWriteDoneProc(void *arg, int status);
-static void
-CheckForNewMinHeadSep(RF_Raid_t * raidPtr, RF_RowCol_t row,
- RF_HeadSepLimit_t hsCtr);
-static int
-CheckHeadSeparation(RF_Raid_t * raidPtr, RF_PerDiskReconCtrl_t * ctrl,
- RF_RowCol_t row, RF_RowCol_t col, RF_HeadSepLimit_t hsCtr,
- RF_ReconUnitNum_t which_ru);
-static int
-CheckForcedOrBlockedReconstruction(RF_Raid_t * raidPtr,
- RF_ReconParityStripeStatus_t * pssPtr, RF_PerDiskReconCtrl_t * ctrl,
- RF_RowCol_t row, RF_RowCol_t col, RF_StripeNum_t psid,
- RF_ReconUnitNum_t which_ru);
-static void ForceReconReadDoneProc(void *arg, int status);
-
-static void rf_ShutdownReconstruction(void *);
-
-struct RF_ReconDoneProc_s {
- void (*proc) (RF_Raid_t *, void *);
- void *arg;
- RF_ReconDoneProc_t *next;
-};
-
-static RF_FreeList_t *rf_rdp_freelist;
-#define RF_MAX_FREE_RDP 4
-#define RF_RDP_INC 1
-
-static void
-SignalReconDone(RF_Raid_t * raidPtr)
-{
- RF_ReconDoneProc_t *p;
-
- RF_LOCK_MUTEX(raidPtr->recon_done_proc_mutex);
- for (p = raidPtr->recon_done_procs; p; p = p->next) {
- p->proc(raidPtr, p->arg);
- }
- RF_UNLOCK_MUTEX(raidPtr->recon_done_proc_mutex);
-}
-
-int
-rf_RegisterReconDoneProc(
- RF_Raid_t * raidPtr,
- void (*proc) (RF_Raid_t *, void *),
- void *arg,
- RF_ReconDoneProc_t ** handlep)
-{
- RF_ReconDoneProc_t *p;
-
- RF_FREELIST_GET(rf_rdp_freelist, p, next, (RF_ReconDoneProc_t *));
- if (p == NULL)
- return (ENOMEM);
- p->proc = proc;
- p->arg = arg;
- RF_LOCK_MUTEX(raidPtr->recon_done_proc_mutex);
- p->next = raidPtr->recon_done_procs;
- raidPtr->recon_done_procs = p;
- RF_UNLOCK_MUTEX(raidPtr->recon_done_proc_mutex);
- if (handlep)
- *handlep = p;
- return (0);
-}
-/**************************************************************************
- *
- * sets up the parameters that will be used by the reconstruction process
- * currently there are none, except for those that the layout-specific
- * configuration (e.g. rf_ConfigureDeclustered) routine sets up.
- *
- * in the kernel, we fire off the recon thread.
- *
- **************************************************************************/
-static void
-rf_ShutdownReconstruction(ignored)
- void *ignored;
-{
- RF_FREELIST_DESTROY(rf_recond_freelist, next, (RF_RaidReconDesc_t *));
- RF_FREELIST_DESTROY(rf_rdp_freelist, next, (RF_ReconDoneProc_t *));
-}
-
-int
-rf_ConfigureReconstruction(listp)
- RF_ShutdownList_t **listp;
-{
- int rc;
-
- RF_FREELIST_CREATE(rf_recond_freelist, RF_MAX_FREE_RECOND,
- RF_RECOND_INC, sizeof(RF_RaidReconDesc_t));
- if (rf_recond_freelist == NULL)
- return (ENOMEM);
- RF_FREELIST_CREATE(rf_rdp_freelist, RF_MAX_FREE_RDP,
- RF_RDP_INC, sizeof(RF_ReconDoneProc_t));
- if (rf_rdp_freelist == NULL) {
- RF_FREELIST_DESTROY(rf_recond_freelist, next, (RF_RaidReconDesc_t *));
- return (ENOMEM);
- }
- rc = rf_ShutdownCreate(listp, rf_ShutdownReconstruction, NULL);
- if (rc) {
- RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n",
- __FILE__, __LINE__, rc);
- rf_ShutdownReconstruction(NULL);
- return (rc);
- }
- return (0);
-}
-
-static RF_RaidReconDesc_t *
-AllocRaidReconDesc(raidPtr, row, col, spareDiskPtr, numDisksDone, srow, scol)
- RF_Raid_t *raidPtr;
- RF_RowCol_t row;
- RF_RowCol_t col;
- RF_RaidDisk_t *spareDiskPtr;
- int numDisksDone;
- RF_RowCol_t srow;
- RF_RowCol_t scol;
-{
-
- RF_RaidReconDesc_t *reconDesc;
-
- RF_FREELIST_GET(rf_recond_freelist, reconDesc, next, (RF_RaidReconDesc_t *));
-
- reconDesc->raidPtr = raidPtr;
- reconDesc->row = row;
- reconDesc->col = col;
- reconDesc->spareDiskPtr = spareDiskPtr;
- reconDesc->numDisksDone = numDisksDone;
- reconDesc->srow = srow;
- reconDesc->scol = scol;
- reconDesc->state = 0;
- reconDesc->next = NULL;
-
- return (reconDesc);
-}
-
-static void
-FreeReconDesc(reconDesc)
- RF_RaidReconDesc_t *reconDesc;
-{
-#if RF_RECON_STATS > 0
- printf("RAIDframe: %lu recon event waits, %lu recon delays\n",
- (long) reconDesc->numReconEventWaits, (long) reconDesc->numReconExecDelays);
-#endif /* RF_RECON_STATS > 0 */
- printf("RAIDframe: %lu max exec ticks\n",
- (long) reconDesc->maxReconExecTicks);
-#if (RF_RECON_STATS > 0) || defined(KERNEL)
- printf("\n");
-#endif /* (RF_RECON_STATS > 0) || KERNEL */
- RF_FREELIST_FREE(rf_recond_freelist, reconDesc, next);
-}
-
-
-/*****************************************************************************
- *
- * primary routine to reconstruct a failed disk. This should be called from
- * within its own thread. It won't return until reconstruction completes,
- * fails, or is aborted.
- *****************************************************************************/
-int
-rf_ReconstructFailedDisk(raidPtr, row, col)
- RF_Raid_t *raidPtr;
- RF_RowCol_t row;
- RF_RowCol_t col;
-{
- RF_LayoutSW_t *lp;
- int rc;
-
- lp = raidPtr->Layout.map;
- if (lp->SubmitReconBuffer) {
- /*
- * The current infrastructure only supports reconstructing one
- * disk at a time for each array.
- */
- RF_LOCK_MUTEX(raidPtr->mutex);
- while (raidPtr->reconInProgress) {
- RF_WAIT_COND(raidPtr->waitForReconCond, raidPtr->mutex);
- }
- raidPtr->reconInProgress++;
- RF_UNLOCK_MUTEX(raidPtr->mutex);
- rc = rf_ReconstructFailedDiskBasic(raidPtr, row, col);
- RF_LOCK_MUTEX(raidPtr->mutex);
- raidPtr->reconInProgress--;
- RF_UNLOCK_MUTEX(raidPtr->mutex);
- } else {
- RF_ERRORMSG1("RECON: no way to reconstruct failed disk for arch %c\n",
- lp->parityConfig);
- rc = EIO;
- }
- RF_SIGNAL_COND(raidPtr->waitForReconCond);
- wakeup(&raidPtr->waitForReconCond); /* XXX Methinks this will be
- * needed at some point... GO */
- return (rc);
-}
-
-int
-rf_ReconstructFailedDiskBasic(raidPtr, row, col)
- RF_Raid_t *raidPtr;
- RF_RowCol_t row;
- RF_RowCol_t col;
-{
- RF_ComponentLabel_t *c_label;
- RF_RaidDisk_t *spareDiskPtr = NULL;
- RF_RaidReconDesc_t *reconDesc;
- RF_RowCol_t srow, scol;
- int numDisksDone = 0, rc;
-
- RF_Malloc(c_label, sizeof(RF_ComponentLabel_t), (RF_ComponentLabel_t *));
- if (c_label == NULL) {
- printf("rf_ReconstructInPlace: Out of memory?\n");
- return (ENOMEM);
- }
-
- /* first look for a spare drive onto which to reconstruct the data */
- /* spare disk descriptors are stored in row 0. This may have to
- * change eventually */
-
- RF_LOCK_MUTEX(raidPtr->mutex);
- RF_ASSERT(raidPtr->Disks[row][col].status == rf_ds_failed);
-
- if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
- if (raidPtr->status[row] != rf_rs_degraded) {
- RF_ERRORMSG2("Unable to reconstruct disk at row %d col %d because status not degraded\n", row, col);
- RF_UNLOCK_MUTEX(raidPtr->mutex);
- RF_Free(c_label, sizeof(RF_ComponentLabel_t));
- return (EINVAL);
- }
- srow = row;
- scol = (-1);
- } else {
- srow = 0;
- for (scol = raidPtr->numCol; scol < raidPtr->numCol + raidPtr->numSpare; scol++) {
- if (raidPtr->Disks[srow][scol].status == rf_ds_spare) {
- spareDiskPtr = &raidPtr->Disks[srow][scol];
- spareDiskPtr->status = rf_ds_used_spare;
- break;
- }
- }
- if (!spareDiskPtr) {
- RF_ERRORMSG2("Unable to reconstruct disk at row %d col %d because no spares are available\n", row, col);
- RF_UNLOCK_MUTEX(raidPtr->mutex);
- RF_Free(c_label, sizeof(RF_ComponentLabel_t));
- return (ENOSPC);
- }
- printf("RECON: initiating reconstruction on row %d col %d -> spare at row %d col %d\n", row, col, srow, scol);
- }
- RF_UNLOCK_MUTEX(raidPtr->mutex);
-
- reconDesc = AllocRaidReconDesc((void *) raidPtr, row, col, spareDiskPtr, numDisksDone, srow, scol);
- raidPtr->reconDesc = (void *) reconDesc;
-#if RF_RECON_STATS > 0
- reconDesc->hsStallCount = 0;
- reconDesc->numReconExecDelays = 0;
- reconDesc->numReconEventWaits = 0;
-#endif /* RF_RECON_STATS > 0 */
- reconDesc->reconExecTimerRunning = 0;
- reconDesc->reconExecTicks = 0;
- reconDesc->maxReconExecTicks = 0;
- rc = rf_ContinueReconstructFailedDisk(reconDesc);
-
- if (!rc) {
- /* fix up the component label */
- /* Don't actually need the read here.. */
- raidread_component_label(
- raidPtr->raid_cinfo[srow][scol].ci_dev,
- raidPtr->raid_cinfo[srow][scol].ci_vp,
- c_label);
-
- raid_init_component_label( raidPtr, c_label);
- c_label->row = row;
- c_label->column = col;
- c_label->clean = RF_RAID_DIRTY;
- c_label->status = rf_ds_optimal;
- c_label->partitionSize = raidPtr->Disks[srow][scol].partitionSize;
-
- /* We've just done a rebuild based on all the other
- disks, so at this point the parity is known to be
- clean, even if it wasn't before. */
-
- /* XXX doesn't hold for RAID 6!! */
-
- raidPtr->parity_good = RF_RAID_CLEAN;
-
- /* XXXX MORE NEEDED HERE */
-
- raidwrite_component_label(
- raidPtr->raid_cinfo[srow][scol].ci_dev,
- raidPtr->raid_cinfo[srow][scol].ci_vp,
- c_label);
-
- }
- RF_Free(c_label, sizeof(RF_ComponentLabel_t));
- return (rc);
-}
-
-/*
-
- Allow reconstructing a disk in-place -- i.e. component /dev/sd2e goes AWOL,
- and you don't get a spare until the next Monday. With this function
- (and hot-swappable drives) you can now put your new disk containing
- /dev/sd2e on the bus, scsictl it alive, and then use raidctl(8) to
- rebuild the data "on the spot".
-
-*/
-
-int
-rf_ReconstructInPlace(raidPtr, row, col)
- RF_Raid_t *raidPtr;
- RF_RowCol_t row;
- RF_RowCol_t col;
-{
- RF_RaidDisk_t *spareDiskPtr = NULL;
- RF_RaidReconDesc_t *reconDesc;
- RF_LayoutSW_t *lp;
- RF_RaidDisk_t *badDisk;
- RF_ComponentLabel_t *c_label;
- int numDisksDone = 0, rc;
- struct vnode *vp;
- int retcode;
- int ac;
-
- RF_Malloc(c_label, sizeof(RF_ComponentLabel_t), (RF_ComponentLabel_t *));
- if (c_label == NULL) {
- printf("rf_ReconstructInPlace: Out of memory?\n");
- return (ENOMEM);
- }
-
- lp = raidPtr->Layout.map;
- if (lp->SubmitReconBuffer) {
- /*
- * The current infrastructure only supports reconstructing one
- * disk at a time for each array.
- */
- RF_LOCK_MUTEX(raidPtr->mutex);
- if ((raidPtr->Disks[row][col].status == rf_ds_optimal) &&
- (raidPtr->numFailures > 0)) {
- /* XXX 0 above shouldn't be constant!!! */
- /* some component other than this has failed.
- Let's not make things worse than they already
- are... */
- printf("RAIDFRAME: Unable to reconstruct to disk at:\n");
- printf(" Row: %d Col: %d Too many failures.\n",
- row, col);
- RF_UNLOCK_MUTEX(raidPtr->mutex);
- RF_Free(c_label, sizeof(RF_ComponentLabel_t));
- return (EINVAL);
- }
- if (raidPtr->Disks[row][col].status == rf_ds_reconstructing) {
- printf("RAIDFRAME: Unable to reconstruct to disk at:\n");
- printf(" Row: %d Col: %d Reconstruction already occuring!\n", row, col);
-
- RF_UNLOCK_MUTEX(raidPtr->mutex);
- RF_Free(c_label, sizeof(RF_ComponentLabel_t));
- return (EINVAL);
- }
-
-
- if (raidPtr->Disks[row][col].status != rf_ds_failed) {
- /* "It's gone..." */
- raidPtr->numFailures++;
- raidPtr->Disks[row][col].status = rf_ds_failed;
- raidPtr->status[row] = rf_rs_degraded;
- rf_update_component_labels(raidPtr,
- RF_NORMAL_COMPONENT_UPDATE);
- }
-
- while (raidPtr->reconInProgress) {
- RF_WAIT_COND(raidPtr->waitForReconCond, raidPtr->mutex);
- }
-
- raidPtr->reconInProgress++;
-
-
- /* first look for a spare drive onto which to reconstruct
- the data. spare disk descriptors are stored in row 0.
- This may have to change eventually */
-
- /* Actually, we don't care if it's failed or not...
- On a RAID set with correct parity, this function
- should be callable on any component without ill affects. */
- /* RF_ASSERT(raidPtr->Disks[row][col].status == rf_ds_failed);
- */
-
- if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
- RF_ERRORMSG2("Unable to reconstruct to disk at row %d col %d: operation not supported for RF_DISTRIBUTE_SPARE\n", row, col);
-
- raidPtr->reconInProgress--;
- RF_UNLOCK_MUTEX(raidPtr->mutex);
- RF_Free(c_label, sizeof(RF_ComponentLabel_t));
- return (EINVAL);
- }
-
- /* XXX need goop here to see if the disk is alive,
- and, if not, make it so... */
-
-
-
- badDisk = &raidPtr->Disks[row][col];
-
- /* This device may have been opened successfully the
- first time. Close it before trying to open it again.. */
-
- if (raidPtr->raid_cinfo[row][col].ci_vp != NULL) {
- printf("Closed the open device: %s\n",
- raidPtr->Disks[row][col].devname);
- vp = raidPtr->raid_cinfo[row][col].ci_vp;
- ac = raidPtr->Disks[row][col].auto_configured;
- rf_close_component(raidPtr, vp, ac);
- raidPtr->raid_cinfo[row][col].ci_vp = NULL;
- }
- /* note that this disk was *not* auto_configured (any longer)*/
- raidPtr->Disks[row][col].auto_configured = 0;
-
- printf("About to (re-)open the device for rebuilding: %s\n",
- raidPtr->Disks[row][col].devname);
-
- retcode = raid_getcomponentsize(raidPtr, row, col);
-
- if (retcode) {
- printf("raid%d: rebuilding: raidlookup on device: %s failed: %d!\n",
- raidPtr->raidid, raidPtr->Disks[row][col].devname,
- retcode);
-
- /* XXX the component isn't responding properly...
- must be still dead :-( */
- raidPtr->reconInProgress--;
- RF_UNLOCK_MUTEX(raidPtr->mutex);
- RF_Free(c_label, sizeof(RF_ComponentLabel_t));
- return(retcode);
-
- }
-
- spareDiskPtr = &raidPtr->Disks[row][col];
- spareDiskPtr->status = rf_ds_used_spare;
-
- printf("RECON: initiating in-place reconstruction on\n");
- printf(" row %d col %d -> spare at row %d col %d\n",
- row, col, row, col);
-
- RF_UNLOCK_MUTEX(raidPtr->mutex);
-
- reconDesc = AllocRaidReconDesc((void *) raidPtr, row, col,
- spareDiskPtr, numDisksDone,
- row, col);
- raidPtr->reconDesc = (void *) reconDesc;
-#if RF_RECON_STATS > 0
- reconDesc->hsStallCount = 0;
- reconDesc->numReconExecDelays = 0;
- reconDesc->numReconEventWaits = 0;
-#endif /* RF_RECON_STATS > 0 */
- reconDesc->reconExecTimerRunning = 0;
- reconDesc->reconExecTicks = 0;
- reconDesc->maxReconExecTicks = 0;
- rc = rf_ContinueReconstructFailedDisk(reconDesc);
-
- RF_LOCK_MUTEX(raidPtr->mutex);
- raidPtr->reconInProgress--;
- RF_UNLOCK_MUTEX(raidPtr->mutex);
-
- } else {
- RF_ERRORMSG1("RECON: no way to reconstruct failed disk for arch %c\n",
- lp->parityConfig);
- rc = EIO;
- }
- RF_LOCK_MUTEX(raidPtr->mutex);
-
- if (!rc) {
- /* Need to set these here, as at this point it'll be claiming
- that the disk is in rf_ds_spared! But we know better :-) */
-
- raidPtr->Disks[row][col].status = rf_ds_optimal;
- raidPtr->status[row] = rf_rs_optimal;
-
- /* fix up the component label */
- /* Don't actually need the read here.. */
- raidread_component_label(raidPtr->raid_cinfo[row][col].ci_dev,
- raidPtr->raid_cinfo[row][col].ci_vp,
- c_label);
-
- raid_init_component_label(raidPtr, c_label);
-
- c_label->row = row;
- c_label->column = col;
-
- /* We've just done a rebuild based on all the other
- disks, so at this point the parity is known to be
- clean, even if it wasn't before. */
-
- /* XXX doesn't hold for RAID 6!! */
-
- raidPtr->parity_good = RF_RAID_CLEAN;
-
- raidwrite_component_label(raidPtr->raid_cinfo[row][col].ci_dev,
- raidPtr->raid_cinfo[row][col].ci_vp,
- c_label);
-
- }
- RF_UNLOCK_MUTEX(raidPtr->mutex);
- RF_SIGNAL_COND(raidPtr->waitForReconCond);
- wakeup(&raidPtr->waitForReconCond);
- RF_Free(c_label, sizeof(RF_ComponentLabel_t));
- return (rc);
-}
-
-
-int
-rf_ContinueReconstructFailedDisk(reconDesc)
- RF_RaidReconDesc_t *reconDesc;
-{
- RF_Raid_t *raidPtr = reconDesc->raidPtr;
- RF_RowCol_t row = reconDesc->row;
- RF_RowCol_t col = reconDesc->col;
- RF_RowCol_t srow = reconDesc->srow;
- RF_RowCol_t scol = reconDesc->scol;
- RF_ReconMap_t *mapPtr;
-
- RF_ReconEvent_t *event;
- struct timeval etime, elpsd;
- unsigned long xor_s, xor_resid_us;
- int retcode, i, ds;
-
- switch (reconDesc->state) {
-
-
- case 0:
-
- raidPtr->accumXorTimeUs = 0;
-
- /* create one trace record per physical disk */
- RF_Malloc(raidPtr->recon_tracerecs, raidPtr->numCol * sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
-
- /* quiesce the array prior to starting recon. this is needed
- * to assure no nasty interactions with pending user writes.
- * We need to do this before we change the disk or row status. */
- reconDesc->state = 1;
-
- Dprintf("RECON: begin request suspend\n");
- retcode = rf_SuspendNewRequestsAndWait(raidPtr);
- Dprintf("RECON: end request suspend\n");
- rf_StartUserStats(raidPtr); /* zero out the stats kept on
- * user accs */
-
- /* fall through to state 1 */
-
- case 1:
-
- RF_LOCK_MUTEX(raidPtr->mutex);
-
- /* create the reconstruction control pointer and install it in
- * the right slot */
- raidPtr->reconControl[row] = rf_MakeReconControl(reconDesc, row, col, srow, scol);
- mapPtr = raidPtr->reconControl[row]->reconMap;
- raidPtr->status[row] = rf_rs_reconstructing;
- raidPtr->Disks[row][col].status = rf_ds_reconstructing;
- raidPtr->Disks[row][col].spareRow = srow;
- raidPtr->Disks[row][col].spareCol = scol;
-
- RF_UNLOCK_MUTEX(raidPtr->mutex);
-
- RF_GETTIME(raidPtr->reconControl[row]->starttime);
-
- /* now start up the actual reconstruction: issue a read for
- * each surviving disk */
-
- reconDesc->numDisksDone = 0;
- for (i = 0; i < raidPtr->numCol; i++) {
- if (i != col) {
- /* find and issue the next I/O on the
- * indicated disk */
- if (IssueNextReadRequest(raidPtr, row, i)) {
- Dprintf2("RECON: done issuing for r%d c%d\n", row, i);
- reconDesc->numDisksDone++;
- }
- }
- }
-
- case 2:
- Dprintf("RECON: resume requests\n");
- rf_ResumeNewRequests(raidPtr);
-
-
- reconDesc->state = 3;
-
- case 3:
-
- /* process reconstruction events until all disks report that
- * they've completed all work */
- mapPtr = raidPtr->reconControl[row]->reconMap;
-
-
-
- while (reconDesc->numDisksDone < raidPtr->numCol - 1) {
-
- event = rf_GetNextReconEvent(reconDesc, row, (void (*) (void *)) rf_ContinueReconstructFailedDisk, reconDesc);
- RF_ASSERT(event);
-
- if (ProcessReconEvent(raidPtr, row, event))
- reconDesc->numDisksDone++;
- raidPtr->reconControl[row]->numRUsTotal =
- mapPtr->totalRUs;
- raidPtr->reconControl[row]->numRUsComplete =
- mapPtr->totalRUs -
- rf_UnitsLeftToReconstruct(mapPtr);
-
- raidPtr->reconControl[row]->percentComplete =
- (raidPtr->reconControl[row]->numRUsComplete * 100 / raidPtr->reconControl[row]->numRUsTotal);
- if (rf_prReconSched) {
- rf_PrintReconSchedule(raidPtr->reconControl[row]->reconMap, &(raidPtr->reconControl[row]->starttime));
- }
- }
-
-
-
- reconDesc->state = 4;
-
-
- case 4:
- mapPtr = raidPtr->reconControl[row]->reconMap;
- if (rf_reconDebug) {
- printf("RECON: all reads completed\n");
- }
- /* at this point all the reads have completed. We now wait
- * for any pending writes to complete, and then we're done */
-
- while (rf_UnitsLeftToReconstruct(raidPtr->reconControl[row]->reconMap) > 0) {
-
- event = rf_GetNextReconEvent(reconDesc, row, (void (*) (void *)) rf_ContinueReconstructFailedDisk, reconDesc);
- RF_ASSERT(event);
-
- (void) ProcessReconEvent(raidPtr, row, event); /* ignore return code */
- raidPtr->reconControl[row]->percentComplete = 100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 / mapPtr->totalRUs);
- if (rf_prReconSched) {
- rf_PrintReconSchedule(raidPtr->reconControl[row]->reconMap, &(raidPtr->reconControl[row]->starttime));
- }
- }
- reconDesc->state = 5;
-
- case 5:
- /* Success: mark the dead disk as reconstructed. We quiesce
- * the array here to assure no nasty interactions with pending
- * user accesses when we free up the psstatus structure as
- * part of FreeReconControl() */
-
- reconDesc->state = 6;
-
- retcode = rf_SuspendNewRequestsAndWait(raidPtr);
- rf_StopUserStats(raidPtr);
- rf_PrintUserStats(raidPtr); /* print out the stats on user
- * accs accumulated during
- * recon */
-
- /* fall through to state 6 */
- case 6:
-
-
-
- RF_LOCK_MUTEX(raidPtr->mutex);
- raidPtr->numFailures--;
- ds = (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE);
- raidPtr->Disks[row][col].status = (ds) ? rf_ds_dist_spared : rf_ds_spared;
- raidPtr->status[row] = (ds) ? rf_rs_reconfigured : rf_rs_optimal;
- RF_UNLOCK_MUTEX(raidPtr->mutex);
- RF_GETTIME(etime);
- RF_TIMEVAL_DIFF(&(raidPtr->reconControl[row]->starttime), &etime, &elpsd);
-
- /* XXX -- why is state 7 different from state 6 if there is no
- * return() here? -- XXX Note that I set elpsd above & use it
- * below, so if you put a return here you'll have to fix this.
- * (also, FreeReconControl is called below) */
-
- case 7:
-
- rf_ResumeNewRequests(raidPtr);
-
- printf("Reconstruction of disk at row %d col %d completed\n",
- row, col);
- xor_s = raidPtr->accumXorTimeUs / 1000000;
- xor_resid_us = raidPtr->accumXorTimeUs % 1000000;
- printf("Recon time was %d.%06d seconds, accumulated XOR time was %ld us (%ld.%06ld)\n",
- (int) elpsd.tv_sec, (int) elpsd.tv_usec, raidPtr->accumXorTimeUs, xor_s, xor_resid_us);
- printf(" (start time %d sec %d usec, end time %d sec %d usec)\n",
- (int) raidPtr->reconControl[row]->starttime.tv_sec,
- (int) raidPtr->reconControl[row]->starttime.tv_usec,
- (int) etime.tv_sec, (int) etime.tv_usec);
-
-#if RF_RECON_STATS > 0
- printf("Total head-sep stall count was %d\n",
- (int) reconDesc->hsStallCount);
-#endif /* RF_RECON_STATS > 0 */
- rf_FreeReconControl(raidPtr, row);
- RF_Free(raidPtr->recon_tracerecs, raidPtr->numCol * sizeof(RF_AccTraceEntry_t));
- FreeReconDesc(reconDesc);
-
- }
-
- SignalReconDone(raidPtr);
- return (0);
-}
-/*****************************************************************************
- * do the right thing upon each reconstruction event.
- * returns nonzero if and only if there is nothing left unread on the
- * indicated disk
- *****************************************************************************/
-static int
-ProcessReconEvent(raidPtr, frow, event)
- RF_Raid_t *raidPtr;
- RF_RowCol_t frow;
- RF_ReconEvent_t *event;
-{
- int retcode = 0, submitblocked;
- RF_ReconBuffer_t *rbuf;
- RF_SectorCount_t sectorsPerRU;
-
- Dprintf1("RECON: ProcessReconEvent type %d\n", event->type);
- switch (event->type) {
-
- /* a read I/O has completed */
- case RF_REVENT_READDONE:
- rbuf = raidPtr->reconControl[frow]->perDiskInfo[event->col].rbuf;
- Dprintf3("RECON: READDONE EVENT: row %d col %d psid %ld\n",
- frow, event->col, rbuf->parityStripeID);
- Dprintf7("RECON: done read psid %ld buf %lx %02x %02x %02x %02x %02x\n",
- rbuf->parityStripeID, rbuf->buffer, rbuf->buffer[0] & 0xff, rbuf->buffer[1] & 0xff,
- rbuf->buffer[2] & 0xff, rbuf->buffer[3] & 0xff, rbuf->buffer[4] & 0xff);
- rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg);
- submitblocked = rf_SubmitReconBuffer(rbuf, 0, 0);
- Dprintf1("RECON: submitblocked=%d\n", submitblocked);
- if (!submitblocked)
- retcode = IssueNextReadRequest(raidPtr, frow, event->col);
- break;
-
- /* a write I/O has completed */
- case RF_REVENT_WRITEDONE:
- if (rf_floatingRbufDebug) {
- rf_CheckFloatingRbufCount(raidPtr, 1);
- }
- sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU;
- rbuf = (RF_ReconBuffer_t *) event->arg;
- rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg);
- Dprintf3("RECON: WRITEDONE EVENT: psid %d ru %d (%d %% complete)\n",
- rbuf->parityStripeID, rbuf->which_ru, raidPtr->reconControl[frow]->percentComplete);
- rf_ReconMapUpdate(raidPtr, raidPtr->reconControl[frow]->reconMap,
- rbuf->failedDiskSectorOffset, rbuf->failedDiskSectorOffset + sectorsPerRU - 1);
- rf_RemoveFromActiveReconTable(raidPtr, frow, rbuf->parityStripeID, rbuf->which_ru);
-
- if (rbuf->type == RF_RBUF_TYPE_FLOATING) {
- RF_LOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex);
- raidPtr->numFullReconBuffers--;
- rf_ReleaseFloatingReconBuffer(raidPtr, frow, rbuf);
- RF_UNLOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex);
- } else
- if (rbuf->type == RF_RBUF_TYPE_FORCED)
- rf_FreeReconBuffer(rbuf);
- else
- RF_ASSERT(0);
- break;
-
- case RF_REVENT_BUFCLEAR: /* A buffer-stall condition has been
- * cleared */
- Dprintf2("RECON: BUFCLEAR EVENT: row %d col %d\n", frow, event->col);
- submitblocked = rf_SubmitReconBuffer(raidPtr->reconControl[frow]->perDiskInfo[event->col].rbuf, 0, (int) (long) event->arg);
- RF_ASSERT(!submitblocked); /* we wouldn't have gotten the
- * BUFCLEAR event if we
- * couldn't submit */
- retcode = IssueNextReadRequest(raidPtr, frow, event->col);
- break;
-
- case RF_REVENT_BLOCKCLEAR: /* A user-write reconstruction
- * blockage has been cleared */
- DDprintf2("RECON: BLOCKCLEAR EVENT: row %d col %d\n", frow, event->col);
- retcode = TryToRead(raidPtr, frow, event->col);
- break;
-
- case RF_REVENT_HEADSEPCLEAR: /* A max-head-separation
- * reconstruction blockage has been
- * cleared */
- Dprintf2("RECON: HEADSEPCLEAR EVENT: row %d col %d\n", frow, event->col);
- retcode = TryToRead(raidPtr, frow, event->col);
- break;
-
- /* a buffer has become ready to write */
- case RF_REVENT_BUFREADY:
- Dprintf2("RECON: BUFREADY EVENT: row %d col %d\n", frow, event->col);
- retcode = IssueNextWriteRequest(raidPtr, frow);
- if (rf_floatingRbufDebug) {
- rf_CheckFloatingRbufCount(raidPtr, 1);
- }
- break;
-
- /* we need to skip the current RU entirely because it got
- * recon'd while we were waiting for something else to happen */
- case RF_REVENT_SKIP:
- DDprintf2("RECON: SKIP EVENT: row %d col %d\n", frow, event->col);
- retcode = IssueNextReadRequest(raidPtr, frow, event->col);
- break;
-
- /* a forced-reconstruction read access has completed. Just
- * submit the buffer */
- case RF_REVENT_FORCEDREADDONE:
- rbuf = (RF_ReconBuffer_t *) event->arg;
- rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg);
- DDprintf2("RECON: FORCEDREADDONE EVENT: row %d col %d\n", frow, event->col);
- submitblocked = rf_SubmitReconBuffer(rbuf, 1, 0);
- RF_ASSERT(!submitblocked);
- break;
-
- default:
- RF_PANIC();
- }
- rf_FreeReconEventDesc(event);
- return (retcode);
-}
-/*****************************************************************************
- *
- * find the next thing that's needed on the indicated disk, and issue
- * a read request for it. We assume that the reconstruction buffer
- * associated with this process is free to receive the data. If
- * reconstruction is blocked on the indicated RU, we issue a
- * blockage-release request instead of a physical disk read request.
- * If the current disk gets too far ahead of the others, we issue a
- * head-separation wait request and return.
- *
- * ctrl->{ru_count, curPSID, diskOffset} and
- * rbuf->failedDiskSectorOffset are maintained to point to the unit
- * we're currently accessing. Note that this deviates from the
- * standard C idiom of having counters point to the next thing to be
- * accessed. This allows us to easily retry when we're blocked by
- * head separation or reconstruction-blockage events.
- *
- * returns nonzero if and only if there is nothing left unread on the
- * indicated disk
- *
- *****************************************************************************/
-static int
-IssueNextReadRequest(raidPtr, row, col)
- RF_Raid_t *raidPtr;
- RF_RowCol_t row;
- RF_RowCol_t col;
-{
- RF_PerDiskReconCtrl_t *ctrl = &raidPtr->reconControl[row]->perDiskInfo[col];
- RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
- RF_ReconBuffer_t *rbuf = ctrl->rbuf;
- RF_ReconUnitCount_t RUsPerPU = layoutPtr->SUsPerPU / layoutPtr->SUsPerRU;
- RF_SectorCount_t sectorsPerRU = layoutPtr->sectorsPerStripeUnit * layoutPtr->SUsPerRU;
- int do_new_check = 0, retcode = 0, status;
-
- /* if we are currently the slowest disk, mark that we have to do a new
- * check */
- if (ctrl->headSepCounter <= raidPtr->reconControl[row]->minHeadSepCounter)
- do_new_check = 1;
-
- while (1) {
-
- ctrl->ru_count++;
- if (ctrl->ru_count < RUsPerPU) {
- ctrl->diskOffset += sectorsPerRU;
- rbuf->failedDiskSectorOffset += sectorsPerRU;
- } else {
- ctrl->curPSID++;
- ctrl->ru_count = 0;
- /* code left over from when head-sep was based on
- * parity stripe id */
- if (ctrl->curPSID >= raidPtr->reconControl[row]->lastPSID) {
- CheckForNewMinHeadSep(raidPtr, row, ++(ctrl->headSepCounter));
- return (1); /* finito! */
- }
- /* find the disk offsets of the start of the parity
- * stripe on both the current disk and the failed
- * disk. skip this entire parity stripe if either disk
- * does not appear in the indicated PS */
- status = ComputePSDiskOffsets(raidPtr, ctrl->curPSID, row, col, &ctrl->diskOffset, &rbuf->failedDiskSectorOffset,
- &rbuf->spRow, &rbuf->spCol, &rbuf->spOffset);
- if (status) {
- ctrl->ru_count = RUsPerPU - 1;
- continue;
- }
- }
- rbuf->which_ru = ctrl->ru_count;
-
- /* skip this RU if it's already been reconstructed */
- if (rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, rbuf->failedDiskSectorOffset)) {
- Dprintf2("Skipping psid %ld ru %d: already reconstructed\n", ctrl->curPSID, ctrl->ru_count);
- continue;
- }
- break;
- }
- ctrl->headSepCounter++;
- if (do_new_check)
- CheckForNewMinHeadSep(raidPtr, row, ctrl->headSepCounter); /* update min if needed */
-
-
- /* at this point, we have definitely decided what to do, and we have
- * only to see if we can actually do it now */
- rbuf->parityStripeID = ctrl->curPSID;
- rbuf->which_ru = ctrl->ru_count;
- bzero((char *) &raidPtr->recon_tracerecs[col], sizeof(raidPtr->recon_tracerecs[col]));
- raidPtr->recon_tracerecs[col].reconacc = 1;
- RF_ETIMER_START(raidPtr->recon_tracerecs[col].recon_timer);
- retcode = TryToRead(raidPtr, row, col);
- return (retcode);
-}
-
-/*
- * tries to issue the next read on the indicated disk. We may be
- * blocked by (a) the heads being too far apart, or (b) recon on the
- * indicated RU being blocked due to a write by a user thread. In
- * this case, we issue a head-sep or blockage wait request, which will
- * cause this same routine to be invoked again later when the blockage
- * has cleared.
- */
-
-static int
-TryToRead(raidPtr, row, col)
- RF_Raid_t *raidPtr;
- RF_RowCol_t row;
- RF_RowCol_t col;
-{
- RF_PerDiskReconCtrl_t *ctrl = &raidPtr->reconControl[row]->perDiskInfo[col];
- RF_SectorCount_t sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU;
- RF_StripeNum_t psid = ctrl->curPSID;
- RF_ReconUnitNum_t which_ru = ctrl->ru_count;
- RF_DiskQueueData_t *req;
- int status, created = 0;
- RF_ReconParityStripeStatus_t *pssPtr;
-
- /* if the current disk is too far ahead of the others, issue a
- * head-separation wait and return */
- if (CheckHeadSeparation(raidPtr, ctrl, row, col, ctrl->headSepCounter, which_ru))
- return (0);
- RF_LOCK_PSS_MUTEX(raidPtr, row, psid);
- pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl[row]->pssTable, psid, which_ru, RF_PSS_CREATE, &created);
-
- /* if recon is blocked on the indicated parity stripe, issue a
- * block-wait request and return. this also must mark the indicated RU
- * in the stripe as under reconstruction if not blocked. */
- status = CheckForcedOrBlockedReconstruction(raidPtr, pssPtr, ctrl, row, col, psid, which_ru);
- if (status == RF_PSS_RECON_BLOCKED) {
- Dprintf2("RECON: Stalling psid %ld ru %d: recon blocked\n", psid, which_ru);
- goto out;
- } else
- if (status == RF_PSS_FORCED_ON_WRITE) {
- rf_CauseReconEvent(raidPtr, row, col, NULL, RF_REVENT_SKIP);
- goto out;
- }
- /* make one last check to be sure that the indicated RU didn't get
- * reconstructed while we were waiting for something else to happen.
- * This is unfortunate in that it causes us to make this check twice
- * in the normal case. Might want to make some attempt to re-work
- * this so that we only do this check if we've definitely blocked on
- * one of the above checks. When this condition is detected, we may
- * have just created a bogus status entry, which we need to delete. */
- if (rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, ctrl->rbuf->failedDiskSectorOffset)) {
- Dprintf2("RECON: Skipping psid %ld ru %d: prior recon after stall\n", psid, which_ru);
- if (created)
- rf_PSStatusDelete(raidPtr, raidPtr->reconControl[row]->pssTable, pssPtr);
- rf_CauseReconEvent(raidPtr, row, col, NULL, RF_REVENT_SKIP);
- goto out;
- }
- /* found something to read. issue the I/O */
- Dprintf5("RECON: Read for psid %ld on row %d col %d offset %ld buf %lx\n",
- psid, row, col, ctrl->diskOffset, ctrl->rbuf->buffer);
- RF_ETIMER_STOP(raidPtr->recon_tracerecs[col].recon_timer);
- RF_ETIMER_EVAL(raidPtr->recon_tracerecs[col].recon_timer);
- raidPtr->recon_tracerecs[col].specific.recon.recon_start_to_fetch_us =
- RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[col].recon_timer);
- RF_ETIMER_START(raidPtr->recon_tracerecs[col].recon_timer);
-
- /* should be ok to use a NULL proc pointer here, all the bufs we use
- * should be in kernel space */
- req = rf_CreateDiskQueueData(RF_IO_TYPE_READ, ctrl->diskOffset, sectorsPerRU, ctrl->rbuf->buffer, psid, which_ru,
- ReconReadDoneProc, (void *) ctrl, NULL, &raidPtr->recon_tracerecs[col], (void *) raidPtr, 0, NULL);
-
- RF_ASSERT(req); /* XXX -- fix this -- XXX */
-
- ctrl->rbuf->arg = (void *) req;
- rf_DiskIOEnqueue(&raidPtr->Queues[row][col], req, RF_IO_RECON_PRIORITY);
- pssPtr->issued[col] = 1;
-
-out:
- RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid);
- return (0);
-}
-
-
-/*
- * given a parity stripe ID, we want to find out whether both the
- * current disk and the failed disk exist in that parity stripe. If
- * not, we want to skip this whole PS. If so, we want to find the
- * disk offset of the start of the PS on both the current disk and the
- * failed disk.
- *
- * this works by getting a list of disks comprising the indicated
- * parity stripe, and searching the list for the current and failed
- * disks. Once we've decided they both exist in the parity stripe, we
- * need to decide whether each is data or parity, so that we'll know
- * which mapping function to call to get the corresponding disk
- * offsets.
- *
- * this is kind of unpleasant, but doing it this way allows the
- * reconstruction code to use parity stripe IDs rather than physical
- * disks address to march through the failed disk, which greatly
- * simplifies a lot of code, as well as eliminating the need for a
- * reverse-mapping function. I also think it will execute faster,
- * since the calls to the mapping module are kept to a minimum.
- *
- * ASSUMES THAT THE STRIPE IDENTIFIER IDENTIFIES THE DISKS COMPRISING
- * THE STRIPE IN THE CORRECT ORDER */
-
-
-static int
-ComputePSDiskOffsets(
- RF_Raid_t * raidPtr, /* raid descriptor */
- RF_StripeNum_t psid, /* parity stripe identifier */
- RF_RowCol_t row, /* row and column of disk to find the offsets
- * for */
- RF_RowCol_t col,
- RF_SectorNum_t * outDiskOffset,
- RF_SectorNum_t * outFailedDiskSectorOffset,
- RF_RowCol_t * spRow, /* OUT: row,col of spare unit for failed unit */
- RF_RowCol_t * spCol,
- RF_SectorNum_t * spOffset)
-{ /* OUT: offset into disk containing spare unit */
- RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
- RF_RowCol_t fcol = raidPtr->reconControl[row]->fcol;
- RF_RaidAddr_t sosRaidAddress; /* start-of-stripe */
- RF_RowCol_t *diskids;
- u_int i, j, k, i_offset, j_offset;
- RF_RowCol_t prow, pcol;
- int testcol, testrow;
- RF_RowCol_t stripe;
- RF_SectorNum_t poffset;
- char i_is_parity = 0, j_is_parity = 0;
- RF_RowCol_t stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol;
-
- /* get a listing of the disks comprising that stripe */
- sosRaidAddress = rf_ParityStripeIDToRaidAddress(layoutPtr, psid);
- (layoutPtr->map->IdentifyStripe) (raidPtr, sosRaidAddress, &diskids, &stripe);
- RF_ASSERT(diskids);
-
- /* reject this entire parity stripe if it does not contain the
- * indicated disk or it does not contain the failed disk */
- if (row != stripe)
- goto skipit;
- for (i = 0; i < stripeWidth; i++) {
- if (col == diskids[i])
- break;
- }
- if (i == stripeWidth)
- goto skipit;
- for (j = 0; j < stripeWidth; j++) {
- if (fcol == diskids[j])
- break;
- }
- if (j == stripeWidth) {
- goto skipit;
- }
- /* find out which disk the parity is on */
- (layoutPtr->map->MapParity) (raidPtr, sosRaidAddress, &prow, &pcol, &poffset, RF_DONT_REMAP);
-
- /* find out if either the current RU or the failed RU is parity */
- /* also, if the parity occurs in this stripe prior to the data and/or
- * failed col, we need to decrement i and/or j */
- for (k = 0; k < stripeWidth; k++)
- if (diskids[k] == pcol)
- break;
- RF_ASSERT(k < stripeWidth);
- i_offset = i;
- j_offset = j;
- if (k < i)
- i_offset--;
- else
- if (k == i) {
- i_is_parity = 1;
- i_offset = 0;
- } /* set offsets to zero to disable multiply
- * below */
- if (k < j)
- j_offset--;
- else
- if (k == j) {
- j_is_parity = 1;
- j_offset = 0;
- }
- /* at this point, [ij]_is_parity tells us whether the [current,failed]
- * disk is parity at the start of this RU, and, if data, "[ij]_offset"
- * tells us how far into the stripe the [current,failed] disk is. */
-
- /* call the mapping routine to get the offset into the current disk,
- * repeat for failed disk. */
- if (i_is_parity)
- layoutPtr->map->MapParity(raidPtr, sosRaidAddress + i_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outDiskOffset, RF_DONT_REMAP);
- else
- layoutPtr->map->MapSector(raidPtr, sosRaidAddress + i_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outDiskOffset, RF_DONT_REMAP);
-
- RF_ASSERT(row == testrow && col == testcol);
-
- if (j_is_parity)
- layoutPtr->map->MapParity(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outFailedDiskSectorOffset, RF_DONT_REMAP);
- else
- layoutPtr->map->MapSector(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outFailedDiskSectorOffset, RF_DONT_REMAP);
- RF_ASSERT(row == testrow && fcol == testcol);
-
- /* now locate the spare unit for the failed unit */
- if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) {
- if (j_is_parity)
- layoutPtr->map->MapParity(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, spRow, spCol, spOffset, RF_REMAP);
- else
- layoutPtr->map->MapSector(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, spRow, spCol, spOffset, RF_REMAP);
- } else {
- *spRow = raidPtr->reconControl[row]->spareRow;
- *spCol = raidPtr->reconControl[row]->spareCol;
- *spOffset = *outFailedDiskSectorOffset;
- }
-
- return (0);
-
-skipit:
- Dprintf3("RECON: Skipping psid %ld: nothing needed from r%d c%d\n",
- psid, row, col);
- return (1);
-}
-/* this is called when a buffer has become ready to write to the replacement disk */
-static int
-IssueNextWriteRequest(raidPtr, row)
- RF_Raid_t *raidPtr;
- RF_RowCol_t row;
-{
- RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
- RF_SectorCount_t sectorsPerRU = layoutPtr->sectorsPerStripeUnit * layoutPtr->SUsPerRU;
- RF_RowCol_t fcol = raidPtr->reconControl[row]->fcol;
- RF_ReconBuffer_t *rbuf;
- RF_DiskQueueData_t *req;
-
- rbuf = rf_GetFullReconBuffer(raidPtr->reconControl[row]);
- RF_ASSERT(rbuf); /* there must be one available, or we wouldn't
- * have gotten the event that sent us here */
- RF_ASSERT(rbuf->pssPtr);
-
- rbuf->pssPtr->writeRbuf = rbuf;
- rbuf->pssPtr = NULL;
-
- Dprintf7("RECON: New write (r %d c %d offs %d) for psid %ld ru %d (failed disk offset %ld) buf %lx\n",
- rbuf->spRow, rbuf->spCol, rbuf->spOffset, rbuf->parityStripeID,
- rbuf->which_ru, rbuf->failedDiskSectorOffset, rbuf->buffer);
- Dprintf6("RECON: new write psid %ld %02x %02x %02x %02x %02x\n",
- rbuf->parityStripeID, rbuf->buffer[0] & 0xff, rbuf->buffer[1] & 0xff,
- rbuf->buffer[2] & 0xff, rbuf->buffer[3] & 0xff, rbuf->buffer[4] & 0xff);
-
- /* should be ok to use a NULL b_proc here b/c all addrs should be in
- * kernel space */
- req = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, rbuf->spOffset,
- sectorsPerRU, rbuf->buffer,
- rbuf->parityStripeID, rbuf->which_ru,
- ReconWriteDoneProc, (void *) rbuf, NULL,
- &raidPtr->recon_tracerecs[fcol],
- (void *) raidPtr, 0, NULL);
-
- RF_ASSERT(req); /* XXX -- fix this -- XXX */
-
- rbuf->arg = (void *) req;
- rf_DiskIOEnqueue(&raidPtr->Queues[rbuf->spRow][rbuf->spCol], req, RF_IO_RECON_PRIORITY);
-
- return (0);
-}
-
-/*
- * this gets called upon the completion of a reconstruction read
- * operation the arg is a pointer to the per-disk reconstruction
- * control structure for the process that just finished a read.
- *
- * called at interrupt context in the kernel, so don't do anything
- * illegal here.
- */
-static int
-ReconReadDoneProc(arg, status)
- void *arg;
- int status;
-{
- RF_PerDiskReconCtrl_t *ctrl = (RF_PerDiskReconCtrl_t *) arg;
- RF_Raid_t *raidPtr = ctrl->reconCtrl->reconDesc->raidPtr;
-
- if (status) {
- /*
- * XXX
- */
- printf("Recon read failed!\n");
- RF_PANIC();
- }
- RF_ETIMER_STOP(raidPtr->recon_tracerecs[ctrl->col].recon_timer);
- RF_ETIMER_EVAL(raidPtr->recon_tracerecs[ctrl->col].recon_timer);
- raidPtr->recon_tracerecs[ctrl->col].specific.recon.recon_fetch_to_return_us =
- RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[ctrl->col].recon_timer);
- RF_ETIMER_START(raidPtr->recon_tracerecs[ctrl->col].recon_timer);
-
- rf_CauseReconEvent(raidPtr, ctrl->row, ctrl->col, NULL, RF_REVENT_READDONE);
- return (0);
-}
-/* this gets called upon the completion of a reconstruction write operation.
- * the arg is a pointer to the rbuf that was just written
- *
- * called at interrupt context in the kernel, so don't do anything illegal here.
- */
-static int
-ReconWriteDoneProc(arg, status)
- void *arg;
- int status;
-{
- RF_ReconBuffer_t *rbuf = (RF_ReconBuffer_t *) arg;
-
- Dprintf2("Reconstruction completed on psid %ld ru %d\n", rbuf->parityStripeID, rbuf->which_ru);
- if (status) {
- printf("Recon write failed!\n"); /* fprintf(stderr,"Recon
- * write failed!\n"); */
- RF_PANIC();
- }
- rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, rbuf->row, rbuf->col, arg, RF_REVENT_WRITEDONE);
- return (0);
-}
-
-
-/*
- * computes a new minimum head sep, and wakes up anyone who needs to
- * be woken as a result
- */
-static void
-CheckForNewMinHeadSep(raidPtr, row, hsCtr)
- RF_Raid_t *raidPtr;
- RF_RowCol_t row;
- RF_HeadSepLimit_t hsCtr;
-{
- RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[row];
- RF_HeadSepLimit_t new_min;
- RF_RowCol_t i;
- RF_CallbackDesc_t *p;
- RF_ASSERT(hsCtr >= reconCtrlPtr->minHeadSepCounter); /* from the definition
- * of a minimum */
-
-
- RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
-
- new_min = ~(1L << (8 * sizeof(long) - 1)); /* 0x7FFF....FFF */
- for (i = 0; i < raidPtr->numCol; i++)
- if (i != reconCtrlPtr->fcol) {
- if (reconCtrlPtr->perDiskInfo[i].headSepCounter < new_min)
- new_min = reconCtrlPtr->perDiskInfo[i].headSepCounter;
- }
- /* set the new minimum and wake up anyone who can now run again */
- if (new_min != reconCtrlPtr->minHeadSepCounter) {
- reconCtrlPtr->minHeadSepCounter = new_min;
- Dprintf1("RECON: new min head pos counter val is %ld\n", new_min);
- while (reconCtrlPtr->headSepCBList) {
- if (reconCtrlPtr->headSepCBList->callbackArg.v > new_min)
- break;
- p = reconCtrlPtr->headSepCBList;
- reconCtrlPtr->headSepCBList = p->next;
- p->next = NULL;
- rf_CauseReconEvent(raidPtr, p->row, p->col, NULL, RF_REVENT_HEADSEPCLEAR);
- rf_FreeCallbackDesc(p);
- }
-
- }
- RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
-}
-
-/*
- * checks to see that the maximum head separation will not be violated
- * if we initiate a reconstruction I/O on the indicated disk.
- * Limiting the maximum head separation between two disks eliminates
- * the nasty buffer-stall conditions that occur when one disk races
- * ahead of the others and consumes all of the floating recon buffers.
- * This code is complex and unpleasant but it's necessary to avoid
- * some very nasty, albeit fairly rare, reconstruction behavior.
- *
- * returns non-zero if and only if we have to stop working on the
- * indicated disk due to a head-separation delay.
- */
-static int
-CheckHeadSeparation(
- RF_Raid_t * raidPtr,
- RF_PerDiskReconCtrl_t * ctrl,
- RF_RowCol_t row,
- RF_RowCol_t col,
- RF_HeadSepLimit_t hsCtr,
- RF_ReconUnitNum_t which_ru)
-{
- RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[row];
- RF_CallbackDesc_t *cb, *p, *pt;
- int retval = 0;
-
- /* if we're too far ahead of the slowest disk, stop working on this
- * disk until the slower ones catch up. We do this by scheduling a
- * wakeup callback for the time when the slowest disk has caught up.
- * We define "caught up" with 20% hysteresis, i.e. the head separation
- * must have fallen to at most 80% of the max allowable head
- * separation before we'll wake up.
- *
- */
- RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
- if ((raidPtr->headSepLimit >= 0) &&
- ((ctrl->headSepCounter - reconCtrlPtr->minHeadSepCounter) > raidPtr->headSepLimit)) {
- Dprintf6("raid%d: RECON: head sep stall: row %d col %d hsCtr %ld minHSCtr %ld limit %ld\n",
- raidPtr->raidid, row, col, ctrl->headSepCounter,
- reconCtrlPtr->minHeadSepCounter,
- raidPtr->headSepLimit);
- cb = rf_AllocCallbackDesc();
- /* the minHeadSepCounter value we have to get to before we'll
- * wake up. build in 20% hysteresis. */
- cb->callbackArg.v = (ctrl->headSepCounter - raidPtr->headSepLimit + raidPtr->headSepLimit / 5);
- cb->row = row;
- cb->col = col;
- cb->next = NULL;
-
- /* insert this callback descriptor into the sorted list of
- * pending head-sep callbacks */
- p = reconCtrlPtr->headSepCBList;
- if (!p)
- reconCtrlPtr->headSepCBList = cb;
- else
- if (cb->callbackArg.v < p->callbackArg.v) {
- cb->next = reconCtrlPtr->headSepCBList;
- reconCtrlPtr->headSepCBList = cb;
- } else {
- for (pt = p, p = p->next; p && (p->callbackArg.v < cb->callbackArg.v); pt = p, p = p->next);
- cb->next = p;
- pt->next = cb;
- }
- retval = 1;
-#if RF_RECON_STATS > 0
- ctrl->reconCtrl->reconDesc->hsStallCount++;
-#endif /* RF_RECON_STATS > 0 */
- }
- RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
-
- return (retval);
-}
-/*
- * checks to see if reconstruction has been either forced or blocked
- * by a user operation. if forced, we skip this RU entirely. else if
- * blocked, put ourselves on the wait list. else return 0.
- *
- * ASSUMES THE PSS MUTEX IS LOCKED UPON ENTRY
- */
-static int
-CheckForcedOrBlockedReconstruction(
- RF_Raid_t * raidPtr,
- RF_ReconParityStripeStatus_t * pssPtr,
- RF_PerDiskReconCtrl_t * ctrl,
- RF_RowCol_t row,
- RF_RowCol_t col,
- RF_StripeNum_t psid,
- RF_ReconUnitNum_t which_ru)
-{
- RF_CallbackDesc_t *cb;
- int retcode = 0;
-
- if ((pssPtr->flags & RF_PSS_FORCED_ON_READ) || (pssPtr->flags & RF_PSS_FORCED_ON_WRITE))
- retcode = RF_PSS_FORCED_ON_WRITE;
- else
- if (pssPtr->flags & RF_PSS_RECON_BLOCKED) {
- Dprintf4("RECON: row %d col %d blocked at psid %ld ru %d\n", row, col, psid, which_ru);
- cb = rf_AllocCallbackDesc(); /* append ourselves to
- * the blockage-wait
- * list */
- cb->row = row;
- cb->col = col;
- cb->next = pssPtr->blockWaitList;
- pssPtr->blockWaitList = cb;
- retcode = RF_PSS_RECON_BLOCKED;
- }
- if (!retcode)
- pssPtr->flags |= RF_PSS_UNDER_RECON; /* mark this RU as under
- * reconstruction */
-
- return (retcode);
-}
-/*
- * if reconstruction is currently ongoing for the indicated stripeID,
- * reconstruction is forced to completion and we return non-zero to
- * indicate that the caller must wait. If not, then reconstruction is
- * blocked on the indicated stripe and the routine returns zero. If
- * and only if we return non-zero, we'll cause the cbFunc to get
- * invoked with the cbArg when the reconstruction has completed.
- */
-int
-rf_ForceOrBlockRecon(raidPtr, asmap, cbFunc, cbArg)
- RF_Raid_t *raidPtr;
- RF_AccessStripeMap_t *asmap;
- void (*cbFunc) (RF_Raid_t *, void *);
- void *cbArg;
-{
- RF_RowCol_t row = asmap->physInfo->row; /* which row of the array
- * we're working on */
- RF_StripeNum_t stripeID = asmap->stripeID; /* the stripe ID we're
- * forcing recon on */
- RF_SectorCount_t sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU; /* num sects in one RU */
- RF_ReconParityStripeStatus_t *pssPtr; /* a pointer to the parity
- * stripe status structure */
- RF_StripeNum_t psid; /* parity stripe id */
- RF_SectorNum_t offset, fd_offset; /* disk offset, failed-disk
- * offset */
- RF_RowCol_t *diskids;
- RF_RowCol_t stripe;
- RF_ReconUnitNum_t which_ru; /* RU within parity stripe */
- RF_RowCol_t fcol, diskno, i;
- RF_ReconBuffer_t *new_rbuf; /* ptr to newly allocated rbufs */
- RF_DiskQueueData_t *req;/* disk I/O req to be enqueued */
- RF_CallbackDesc_t *cb;
- int created = 0, nPromoted;
-
- psid = rf_MapStripeIDToParityStripeID(&raidPtr->Layout, stripeID, &which_ru);
-
- RF_LOCK_PSS_MUTEX(raidPtr, row, psid);
-
- pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl[row]->pssTable, psid, which_ru, RF_PSS_CREATE | RF_PSS_RECON_BLOCKED, &created);
-
- /* if recon is not ongoing on this PS, just return */
- if (!(pssPtr->flags & RF_PSS_UNDER_RECON)) {
- RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid);
- return (0);
- }
- /* otherwise, we have to wait for reconstruction to complete on this
- * RU. */
- /* In order to avoid waiting for a potentially large number of
- * low-priority accesses to complete, we force a normal-priority (i.e.
- * not low-priority) reconstruction on this RU. */
- if (!(pssPtr->flags & RF_PSS_FORCED_ON_WRITE) && !(pssPtr->flags & RF_PSS_FORCED_ON_READ)) {
- DDprintf1("Forcing recon on psid %ld\n", psid);
- pssPtr->flags |= RF_PSS_FORCED_ON_WRITE; /* mark this RU as under
- * forced recon */
- pssPtr->flags &= ~RF_PSS_RECON_BLOCKED; /* clear the blockage
- * that we just set */
- fcol = raidPtr->reconControl[row]->fcol;
-
- /* get a listing of the disks comprising the indicated stripe */
- (raidPtr->Layout.map->IdentifyStripe) (raidPtr, asmap->raidAddress, &diskids, &stripe);
- RF_ASSERT(row == stripe);
-
- /* For previously issued reads, elevate them to normal
- * priority. If the I/O has already completed, it won't be
- * found in the queue, and hence this will be a no-op. For
- * unissued reads, allocate buffers and issue new reads. The
- * fact that we've set the FORCED bit means that the regular
- * recon procs will not re-issue these reqs */
- for (i = 0; i < raidPtr->Layout.numDataCol + raidPtr->Layout.numParityCol; i++)
- if ((diskno = diskids[i]) != fcol) {
- if (pssPtr->issued[diskno]) {
- nPromoted = rf_DiskIOPromote(&raidPtr->Queues[row][diskno], psid, which_ru);
- if (rf_reconDebug && nPromoted)
- printf("raid%d: promoted read from row %d col %d\n", raidPtr->raidid, row, diskno);
- } else {
- new_rbuf = rf_MakeReconBuffer(raidPtr, row, diskno, RF_RBUF_TYPE_FORCED); /* create new buf */
- ComputePSDiskOffsets(raidPtr, psid, row, diskno, &offset, &fd_offset,
- &new_rbuf->spRow, &new_rbuf->spCol, &new_rbuf->spOffset); /* find offsets & spare
- * location */
- new_rbuf->parityStripeID = psid; /* fill in the buffer */
- new_rbuf->which_ru = which_ru;
- new_rbuf->failedDiskSectorOffset = fd_offset;
- new_rbuf->priority = RF_IO_NORMAL_PRIORITY;
-
- /* use NULL b_proc b/c all addrs
- * should be in kernel space */
- req = rf_CreateDiskQueueData(RF_IO_TYPE_READ, offset + which_ru * sectorsPerRU, sectorsPerRU, new_rbuf->buffer,
- psid, which_ru, (int (*) (void *, int)) ForceReconReadDoneProc, (void *) new_rbuf, NULL,
- NULL, (void *) raidPtr, 0, NULL);
-
- RF_ASSERT(req); /* XXX -- fix this --
- * XXX */
-
- new_rbuf->arg = req;
- rf_DiskIOEnqueue(&raidPtr->Queues[row][diskno], req, RF_IO_NORMAL_PRIORITY); /* enqueue the I/O */
- Dprintf3("raid%d: Issued new read req on row %d col %d\n", raidPtr->raidid, row, diskno);
- }
- }
- /* if the write is sitting in the disk queue, elevate its
- * priority */
- if (rf_DiskIOPromote(&raidPtr->Queues[row][fcol], psid, which_ru))
- printf("raid%d: promoted write to row %d col %d\n",
- raidPtr->raidid, row, fcol);
- }
- /* install a callback descriptor to be invoked when recon completes on
- * this parity stripe. */
- cb = rf_AllocCallbackDesc();
- /* XXX the following is bogus.. These functions don't really match!!
- * GO */
- cb->callbackFunc = (void (*) (RF_CBParam_t)) cbFunc;
- cb->callbackArg.p = (void *) cbArg;
- cb->next = pssPtr->procWaitList;
- pssPtr->procWaitList = cb;
- DDprintf2("raid%d: Waiting for forced recon on psid %ld\n",
- raidPtr->raidid, psid);
-
- RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid);
- return (1);
-}
-/* called upon the completion of a forced reconstruction read.
- * all we do is schedule the FORCEDREADONE event.
- * called at interrupt context in the kernel, so don't do anything illegal here.
- */
-static void
-ForceReconReadDoneProc(arg, status)
- void *arg;
- int status;
-{
- RF_ReconBuffer_t *rbuf = arg;
-
- if (status) {
- printf("Forced recon read failed!\n"); /* fprintf(stderr,"Forced
- * recon read
- * failed!\n"); */
- RF_PANIC();
- }
- rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, rbuf->row, rbuf->col, (void *) rbuf, RF_REVENT_FORCEDREADDONE);
-}
-/* releases a block on the reconstruction of the indicated stripe */
-int
-rf_UnblockRecon(raidPtr, asmap)
- RF_Raid_t *raidPtr;
- RF_AccessStripeMap_t *asmap;
-{
- RF_RowCol_t row = asmap->origRow;
- RF_StripeNum_t stripeID = asmap->stripeID;
- RF_ReconParityStripeStatus_t *pssPtr;
- RF_ReconUnitNum_t which_ru;
- RF_StripeNum_t psid;
- int created = 0;
- RF_CallbackDesc_t *cb;
-
- psid = rf_MapStripeIDToParityStripeID(&raidPtr->Layout, stripeID, &which_ru);
- RF_LOCK_PSS_MUTEX(raidPtr, row, psid);
- pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl[row]->pssTable, psid, which_ru, RF_PSS_NONE, &created);
-
- /* When recon is forced, the pss desc can get deleted before we get
- * back to unblock recon. But, this can _only_ happen when recon is
- * forced. It would be good to put some kind of sanity check here, but
- * how to decide if recon was just forced or not? */
- if (!pssPtr) {
- /* printf("Warning: no pss descriptor upon unblock on psid %ld
- * RU %d\n",psid,which_ru); */
- if (rf_reconDebug || rf_pssDebug)
- printf("Warning: no pss descriptor upon unblock on psid %ld RU %d\n", (long) psid, which_ru);
- goto out;
- }
- pssPtr->blockCount--;
- Dprintf3("raid%d: unblocking recon on psid %ld: blockcount is %d\n",
- raidPtr->raidid, psid, pssPtr->blockCount);
- if (pssPtr->blockCount == 0) { /* if recon blockage has been released */
-
- /* unblock recon before calling CauseReconEvent in case
- * CauseReconEvent causes us to try to issue a new read before
- * returning here. */
- pssPtr->flags &= ~RF_PSS_RECON_BLOCKED;
-
-
- while (pssPtr->blockWaitList) {
- /* spin through the block-wait list and
- release all the waiters */
- cb = pssPtr->blockWaitList;
- pssPtr->blockWaitList = cb->next;
- cb->next = NULL;
- rf_CauseReconEvent(raidPtr, cb->row, cb->col, NULL, RF_REVENT_BLOCKCLEAR);
- rf_FreeCallbackDesc(cb);
- }
- if (!(pssPtr->flags & RF_PSS_UNDER_RECON)) {
- /* if no recon was requested while recon was blocked */
- rf_PSStatusDelete(raidPtr, raidPtr->reconControl[row]->pssTable, pssPtr);
- }
- }
-out:
- RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid);
- return (0);
-}
diff --git a/sys/dev/raidframe/rf_reconstruct.h b/sys/dev/raidframe/rf_reconstruct.h
deleted file mode 100644
index 318d546..0000000
--- a/sys/dev/raidframe/rf_reconstruct.h
+++ /dev/null
@@ -1,202 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_reconstruct.h,v 1.5 2000/05/28 00:48:30 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*********************************************************
- * rf_reconstruct.h -- header file for reconstruction code
- *********************************************************/
-
-#ifndef _RF__RF_RECONSTRUCT_H_
-#define _RF__RF_RECONSTRUCT_H_
-
-#include <dev/raidframe/rf_types.h>
-#include <sys/time.h>
-#include <dev/raidframe/rf_reconmap.h>
-#include <dev/raidframe/rf_psstatus.h>
-
-/* reconstruction configuration information */
-struct RF_ReconConfig_s {
- unsigned numFloatingReconBufs; /* number of floating recon bufs to
- * use */
- RF_HeadSepLimit_t headSepLimit; /* how far apart the heads are allow
- * to become, in parity stripes */
-};
-/* a reconstruction buffer */
-struct RF_ReconBuffer_s {
- RF_Raid_t *raidPtr; /* void * to avoid recursive includes */
- caddr_t buffer; /* points to the data */
- RF_StripeNum_t parityStripeID; /* the parity stripe that this data
- * relates to */
- int which_ru; /* which reconstruction unit within the PSS */
- RF_SectorNum_t failedDiskSectorOffset; /* the offset into the failed
- * disk */
- RF_RowCol_t row, col; /* which disk this buffer belongs to or is
- * targeted at */
- RF_StripeCount_t count; /* counts the # of SUs installed so far */
- int priority; /* used to force hi priority recon */
- RF_RbufType_t type; /* FORCED or FLOATING */
- char *arrived; /* [x] = 1/0 if SU from disk x has/hasn't
- * arrived */
- RF_ReconBuffer_t *next; /* used for buffer management */
- void *arg; /* generic field for general use */
- RF_RowCol_t spRow, spCol; /* spare disk to which this buf should
- * be written */
- /* if dist sparing off, always identifies the replacement disk */
- RF_SectorNum_t spOffset;/* offset into the spare disk */
- /* if dist sparing off, identical to failedDiskSectorOffset */
- RF_ReconParityStripeStatus_t *pssPtr; /* debug- pss associated with
- * issue-pending write */
-};
-/* a reconstruction event descriptor. The event types currently are:
- * RF_REVENT_READDONE -- a read operation has completed
- * RF_REVENT_WRITEDONE -- a write operation has completed
- * RF_REVENT_BUFREADY -- the buffer manager has produced a full buffer
- * RF_REVENT_BLOCKCLEAR -- a reconstruction blockage has been cleared
- * RF_REVENT_BUFCLEAR -- the buffer manager has released a process blocked on submission
- * RF_REVENT_SKIP -- we need to skip the current RU and go on to the next one, typ. b/c we found recon forced
- * RF_REVENT_FORCEDREADONE- a forced-reconstructoin read operation has completed
- */
-typedef enum RF_Revent_e {
- RF_REVENT_READDONE,
- RF_REVENT_WRITEDONE,
- RF_REVENT_BUFREADY,
- RF_REVENT_BLOCKCLEAR,
- RF_REVENT_BUFCLEAR,
- RF_REVENT_HEADSEPCLEAR,
- RF_REVENT_SKIP,
- RF_REVENT_FORCEDREADDONE
-} RF_Revent_t;
-
-struct RF_ReconEvent_s {
- RF_Revent_t type; /* what kind of event has occurred */
- RF_RowCol_t col; /* row ID is implicit in the queue in which
- * the event is placed */
- void *arg; /* a generic argument */
- RF_ReconEvent_t *next;
-};
-/*
- * Reconstruction control information maintained per-disk
- * (for surviving disks)
- */
-struct RF_PerDiskReconCtrl_s {
- RF_ReconCtrl_t *reconCtrl;
- RF_RowCol_t row, col; /* to make this structure self-identifying */
- RF_StripeNum_t curPSID; /* the next parity stripe ID to check on this
- * disk */
- RF_HeadSepLimit_t headSepCounter; /* counter used to control
- * maximum head separation */
- RF_SectorNum_t diskOffset; /* the offset into the indicated disk
- * of the current PU */
- RF_ReconUnitNum_t ru_count; /* this counts off the recon units
- * within each parity unit */
- RF_ReconBuffer_t *rbuf; /* the recon buffer assigned to this disk */
-};
-/* main reconstruction control structure */
-struct RF_ReconCtrl_s {
- RF_RaidReconDesc_t *reconDesc;
- RF_RowCol_t fcol; /* which column has failed */
- RF_PerDiskReconCtrl_t *perDiskInfo; /* information maintained
- * per-disk */
- RF_ReconMap_t *reconMap;/* map of what has/has not been reconstructed */
- RF_RowCol_t spareRow; /* which of the spare disks we're using */
- RF_RowCol_t spareCol;
- RF_StripeNum_t lastPSID;/* the ID of the last parity stripe we want
- * reconstructed */
- int percentComplete;/* percentage completion of reconstruction */
- int numRUsComplete; /* number of Reconstruction Units done */
- int numRUsTotal; /* total number of Reconstruction Units */
-
- /* reconstruction event queue */
- RF_ReconEvent_t *eventQueue; /* queue of pending reconstruction
- * events */
- RF_DECLARE_MUTEX(eq_mutex) /* mutex for locking event
- * queue */
- RF_DECLARE_COND(eq_cond) /* condition variable for
- * signalling recon events */
- int eq_count; /* debug only */
-
- /* reconstruction buffer management */
- RF_DECLARE_MUTEX(rb_mutex) /* mutex for messing around
- * with recon buffers */
- RF_ReconBuffer_t *floatingRbufs; /* available floating
- * reconstruction buffers */
- RF_ReconBuffer_t *committedRbufs; /* recon buffers that have
- * been committed to some
- * waiting disk */
- RF_ReconBuffer_t *fullBufferList; /* full buffers waiting to be
- * written out */
- RF_ReconBuffer_t *priorityList; /* full buffers that have been
- * elevated to higher priority */
- RF_CallbackDesc_t *bufferWaitList; /* disks that are currently
- * blocked waiting for buffers */
-
- /* parity stripe status table */
- RF_PSStatusHeader_t *pssTable; /* stores the reconstruction status of
- * active parity stripes */
-
- /* maximum-head separation control */
- RF_HeadSepLimit_t minHeadSepCounter; /* the minimum hs counter over
- * all disks */
- RF_CallbackDesc_t *headSepCBList; /* list of callbacks to be
- * done as minPSID advances */
-
- /* performance monitoring */
- struct timeval starttime; /* recon start time */
-
- void (*continueFunc) (void *); /* function to call when io
- * returns */
- void *continueArg; /* argument for Func */
-};
-/* the default priority for reconstruction accesses */
-#define RF_IO_RECON_PRIORITY RF_IO_LOW_PRIORITY
-
-int rf_ConfigureReconstruction(RF_ShutdownList_t ** listp);
-
-int
-rf_ReconstructFailedDisk(RF_Raid_t * raidPtr, RF_RowCol_t row,
- RF_RowCol_t col);
-
-int
-rf_ReconstructFailedDiskBasic(RF_Raid_t * raidPtr, RF_RowCol_t row,
- RF_RowCol_t col);
-
-int
-rf_ReconstructInPlace(RF_Raid_t * raidPtr, RF_RowCol_t row, RF_RowCol_t col);
-
-int rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t * reconDesc);
-
-int
-rf_ForceOrBlockRecon(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap,
- void (*cbFunc) (RF_Raid_t *, void *), void *cbArg);
-
- int rf_UnblockRecon(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap);
-
- int rf_RegisterReconDoneProc(RF_Raid_t * raidPtr, void (*proc) (RF_Raid_t *, void *), void *arg,
- RF_ReconDoneProc_t ** handlep);
-
-#endif /* !_RF__RF_RECONSTRUCT_H_ */
diff --git a/sys/dev/raidframe/rf_reconutil.c b/sys/dev/raidframe/rf_reconutil.c
deleted file mode 100644
index bafff69..0000000
--- a/sys/dev/raidframe/rf_reconutil.c
+++ /dev/null
@@ -1,338 +0,0 @@
-/* $NetBSD: rf_reconutil.c,v 1.3 1999/02/05 00:06:17 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/********************************************
- * rf_reconutil.c -- reconstruction utilities
- ********************************************/
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_desc.h>
-#include <dev/raidframe/rf_reconutil.h>
-#include <dev/raidframe/rf_reconbuffer.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_decluster.h>
-#include <dev/raidframe/rf_raid5_rotatedspare.h>
-#include <dev/raidframe/rf_interdecluster.h>
-#include <dev/raidframe/rf_chaindecluster.h>
-
-/*******************************************************************
- * allocates/frees the reconstruction control information structures
- *******************************************************************/
-RF_ReconCtrl_t *
-rf_MakeReconControl(reconDesc, frow, fcol, srow, scol)
- RF_RaidReconDesc_t *reconDesc;
- RF_RowCol_t frow; /* failed row and column */
- RF_RowCol_t fcol;
- RF_RowCol_t srow; /* identifies which spare we're using */
- RF_RowCol_t scol;
-{
- RF_Raid_t *raidPtr = reconDesc->raidPtr;
- RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
- RF_ReconUnitCount_t RUsPerPU = layoutPtr->SUsPerPU / layoutPtr->SUsPerRU;
- RF_ReconUnitCount_t numSpareRUs;
- RF_ReconCtrl_t *reconCtrlPtr;
- RF_ReconBuffer_t *rbuf;
- RF_LayoutSW_t *lp;
- int retcode, rc;
- RF_RowCol_t i;
-
- lp = raidPtr->Layout.map;
-
- /* make and zero the global reconstruction structure and the per-disk
- * structure */
- RF_Calloc(reconCtrlPtr, 1, sizeof(RF_ReconCtrl_t), (RF_ReconCtrl_t *));
- RF_Calloc(reconCtrlPtr->perDiskInfo, raidPtr->numCol, sizeof(RF_PerDiskReconCtrl_t), (RF_PerDiskReconCtrl_t *)); /* this zeros it */
- reconCtrlPtr->reconDesc = reconDesc;
- reconCtrlPtr->fcol = fcol;
- reconCtrlPtr->spareRow = srow;
- reconCtrlPtr->spareCol = scol;
- reconCtrlPtr->lastPSID = layoutPtr->numStripe / layoutPtr->SUsPerPU;
- reconCtrlPtr->percentComplete = 0;
-
- /* initialize each per-disk recon information structure */
- for (i = 0; i < raidPtr->numCol; i++) {
- reconCtrlPtr->perDiskInfo[i].reconCtrl = reconCtrlPtr;
- reconCtrlPtr->perDiskInfo[i].row = frow;
- reconCtrlPtr->perDiskInfo[i].col = i;
- reconCtrlPtr->perDiskInfo[i].curPSID = -1; /* make it appear as if
- * we just finished an
- * RU */
- reconCtrlPtr->perDiskInfo[i].ru_count = RUsPerPU - 1;
- }
-
- /* Get the number of spare units per disk and the sparemap in case
- * spare is distributed */
-
- if (lp->GetNumSpareRUs) {
- numSpareRUs = lp->GetNumSpareRUs(raidPtr);
- } else {
- numSpareRUs = 0;
- }
-
- /*
- * Not all distributed sparing archs need dynamic mappings
- */
- if (lp->InstallSpareTable) {
- retcode = rf_InstallSpareTable(raidPtr, frow, fcol);
- if (retcode) {
- RF_PANIC(); /* XXX fix this */
- }
- }
- /* make the reconstruction map */
- reconCtrlPtr->reconMap = rf_MakeReconMap(raidPtr, (int) (layoutPtr->SUsPerRU * layoutPtr->sectorsPerStripeUnit),
- raidPtr->sectorsPerDisk, numSpareRUs);
-
- /* make the per-disk reconstruction buffers */
- for (i = 0; i < raidPtr->numCol; i++) {
- reconCtrlPtr->perDiskInfo[i].rbuf = (i == fcol) ? NULL : rf_MakeReconBuffer(raidPtr, frow, i, RF_RBUF_TYPE_EXCLUSIVE);
- }
-
- /* initialize the event queue */
- rc = rf_mutex_init(&reconCtrlPtr->eq_mutex, __FUNCTION__);
- if (rc) {
- /* XXX deallocate, cleanup */
- RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- return (NULL);
- }
- rc = rf_cond_init(&reconCtrlPtr->eq_cond);
- if (rc) {
- /* XXX deallocate, cleanup */
- RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- return (NULL);
- }
- reconCtrlPtr->eventQueue = NULL;
- reconCtrlPtr->eq_count = 0;
-
- /* make the floating recon buffers and append them to the free list */
- rc = rf_mutex_init(&reconCtrlPtr->rb_mutex, __FUNCTION__);
- if (rc) {
- /* XXX deallocate, cleanup */
- RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- return (NULL);
- }
- reconCtrlPtr->fullBufferList = NULL;
- reconCtrlPtr->priorityList = NULL;
- reconCtrlPtr->floatingRbufs = NULL;
- reconCtrlPtr->committedRbufs = NULL;
- for (i = 0; i < raidPtr->numFloatingReconBufs; i++) {
- rbuf = rf_MakeReconBuffer(raidPtr, frow, fcol, RF_RBUF_TYPE_FLOATING);
- rbuf->next = reconCtrlPtr->floatingRbufs;
- reconCtrlPtr->floatingRbufs = rbuf;
- }
-
- /* create the parity stripe status table */
- reconCtrlPtr->pssTable = rf_MakeParityStripeStatusTable(raidPtr);
-
- /* set the initial min head sep counter val */
- reconCtrlPtr->minHeadSepCounter = 0;
-
- return (reconCtrlPtr);
-}
-
-void
-rf_FreeReconControl(raidPtr, row)
- RF_Raid_t *raidPtr;
- RF_RowCol_t row;
-{
- RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[row];
- RF_ReconBuffer_t *t;
- RF_ReconUnitNum_t i;
-
- RF_ASSERT(reconCtrlPtr);
- for (i = 0; i < raidPtr->numCol; i++)
- if (reconCtrlPtr->perDiskInfo[i].rbuf)
- rf_FreeReconBuffer(reconCtrlPtr->perDiskInfo[i].rbuf);
- for (i = 0; i < raidPtr->numFloatingReconBufs; i++) {
- t = reconCtrlPtr->floatingRbufs;
- RF_ASSERT(t);
- reconCtrlPtr->floatingRbufs = t->next;
- rf_FreeReconBuffer(t);
- }
- rf_mutex_destroy(&reconCtrlPtr->rb_mutex);
- rf_mutex_destroy(&reconCtrlPtr->eq_mutex);
- rf_cond_destroy(&reconCtrlPtr->eq_cond);
- rf_FreeReconMap(reconCtrlPtr->reconMap);
- rf_FreeParityStripeStatusTable(raidPtr, reconCtrlPtr->pssTable);
- RF_Free(reconCtrlPtr->perDiskInfo, raidPtr->numCol * sizeof(RF_PerDiskReconCtrl_t));
- RF_Free(reconCtrlPtr, sizeof(*reconCtrlPtr));
-}
-
-
-/******************************************************************************
- * computes the default head separation limit
- *****************************************************************************/
-RF_HeadSepLimit_t
-rf_GetDefaultHeadSepLimit(raidPtr)
- RF_Raid_t *raidPtr;
-{
- RF_HeadSepLimit_t hsl;
- RF_LayoutSW_t *lp;
-
- lp = raidPtr->Layout.map;
- if (lp->GetDefaultHeadSepLimit == NULL)
- return (-1);
- hsl = lp->GetDefaultHeadSepLimit(raidPtr);
- return (hsl);
-}
-
-
-/******************************************************************************
- * computes the default number of floating recon buffers
- *****************************************************************************/
-int
-rf_GetDefaultNumFloatingReconBuffers(raidPtr)
- RF_Raid_t *raidPtr;
-{
- RF_LayoutSW_t *lp;
- int nrb;
-
- lp = raidPtr->Layout.map;
- if (lp->GetDefaultNumFloatingReconBuffers == NULL)
- return (3 * raidPtr->numCol);
- nrb = lp->GetDefaultNumFloatingReconBuffers(raidPtr);
- return (nrb);
-}
-
-
-/******************************************************************************
- * creates and initializes a reconstruction buffer
- *****************************************************************************/
-RF_ReconBuffer_t *
-rf_MakeReconBuffer(
- RF_Raid_t * raidPtr,
- RF_RowCol_t row,
- RF_RowCol_t col,
- RF_RbufType_t type)
-{
- RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
- RF_ReconBuffer_t *t;
- u_int recon_buffer_size = rf_RaidAddressToByte(raidPtr, layoutPtr->SUsPerRU * layoutPtr->sectorsPerStripeUnit);
-
- RF_Malloc(t, sizeof(RF_ReconBuffer_t), (RF_ReconBuffer_t *));
- RF_Malloc(t->buffer, recon_buffer_size, (caddr_t));
- RF_Malloc(t->arrived, raidPtr->numCol * sizeof(char), (char *));
- t->raidPtr = raidPtr;
- t->row = row;
- t->col = col;
- t->priority = RF_IO_RECON_PRIORITY;
- t->type = type;
- t->pssPtr = NULL;
- t->next = NULL;
- return (t);
-}
-/******************************************************************************
- * frees a reconstruction buffer
- *****************************************************************************/
-void
-rf_FreeReconBuffer(rbuf)
- RF_ReconBuffer_t *rbuf;
-{
- RF_Raid_t *raidPtr = rbuf->raidPtr;
- u_int recon_buffer_size = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.SUsPerRU * raidPtr->Layout.sectorsPerStripeUnit);
-
- RF_Free(rbuf->arrived, raidPtr->numCol * sizeof(char));
- RF_Free(rbuf->buffer, recon_buffer_size);
- RF_Free(rbuf, sizeof(*rbuf));
-}
-
-
-/******************************************************************************
- * debug only: sanity check the number of floating recon bufs in use
- *****************************************************************************/
-void
-rf_CheckFloatingRbufCount(raidPtr, dolock)
- RF_Raid_t *raidPtr;
- int dolock;
-{
- RF_ReconParityStripeStatus_t *p;
- RF_PSStatusHeader_t *pssTable;
- RF_ReconBuffer_t *rbuf;
- int i, j, sum = 0;
- RF_RowCol_t frow = 0;
-
- for (i = 0; i < raidPtr->numRow; i++)
- if (raidPtr->reconControl[i]) {
- frow = i;
- break;
- }
- RF_ASSERT(frow >= 0);
-
- if (dolock)
- RF_LOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex);
- pssTable = raidPtr->reconControl[frow]->pssTable;
-
- for (i = 0; i < raidPtr->pssTableSize; i++) {
- RF_LOCK_MUTEX(pssTable[i].mutex);
- for (p = pssTable[i].chain; p; p = p->next) {
- rbuf = (RF_ReconBuffer_t *) p->rbuf;
- if (rbuf && rbuf->type == RF_RBUF_TYPE_FLOATING)
- sum++;
-
- rbuf = (RF_ReconBuffer_t *) p->writeRbuf;
- if (rbuf && rbuf->type == RF_RBUF_TYPE_FLOATING)
- sum++;
-
- for (j = 0; j < p->xorBufCount; j++) {
- rbuf = (RF_ReconBuffer_t *) p->rbufsForXor[j];
- RF_ASSERT(rbuf);
- if (rbuf->type == RF_RBUF_TYPE_FLOATING)
- sum++;
- }
- }
- RF_UNLOCK_MUTEX(pssTable[i].mutex);
- }
-
- for (rbuf = raidPtr->reconControl[frow]->floatingRbufs; rbuf; rbuf = rbuf->next) {
- if (rbuf->type == RF_RBUF_TYPE_FLOATING)
- sum++;
- }
- for (rbuf = raidPtr->reconControl[frow]->committedRbufs; rbuf; rbuf = rbuf->next) {
- if (rbuf->type == RF_RBUF_TYPE_FLOATING)
- sum++;
- }
- for (rbuf = raidPtr->reconControl[frow]->fullBufferList; rbuf; rbuf = rbuf->next) {
- if (rbuf->type == RF_RBUF_TYPE_FLOATING)
- sum++;
- }
- for (rbuf = raidPtr->reconControl[frow]->priorityList; rbuf; rbuf = rbuf->next) {
- if (rbuf->type == RF_RBUF_TYPE_FLOATING)
- sum++;
- }
-
- RF_ASSERT(sum == raidPtr->numFloatingReconBufs);
-
- if (dolock)
- RF_UNLOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex);
-}
diff --git a/sys/dev/raidframe/rf_reconutil.h b/sys/dev/raidframe/rf_reconutil.h
deleted file mode 100644
index 744d7b9..0000000
--- a/sys/dev/raidframe/rf_reconutil.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_reconutil.h,v 1.3 1999/02/05 00:06:17 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/************************************************************
- * rf_reconutil.h -- header file for reconstruction utilities
- ************************************************************/
-
-#ifndef _RF__RF_RECONUTIL_H_
-#define _RF__RF_RECONUTIL_H_
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_reconstruct.h>
-
-RF_ReconCtrl_t *
-rf_MakeReconControl(RF_RaidReconDesc_t * reconDesc,
- RF_RowCol_t frow, RF_RowCol_t fcol, RF_RowCol_t srow, RF_RowCol_t scol);
-void rf_FreeReconControl(RF_Raid_t * raidPtr, RF_RowCol_t row);
-RF_HeadSepLimit_t rf_GetDefaultHeadSepLimit(RF_Raid_t * raidPtr);
-int rf_GetDefaultNumFloatingReconBuffers(RF_Raid_t * raidPtr);
-RF_ReconBuffer_t *
-rf_MakeReconBuffer(RF_Raid_t * raidPtr, RF_RowCol_t row,
- RF_RowCol_t col, RF_RbufType_t type);
-void rf_FreeReconBuffer(RF_ReconBuffer_t * rbuf);
-void rf_CheckFloatingRbufCount(RF_Raid_t * raidPtr, int dolock);
-
-#endif /* !_RF__RF_RECONUTIL_H_ */
diff --git a/sys/dev/raidframe/rf_revent.c b/sys/dev/raidframe/rf_revent.c
deleted file mode 100644
index fcdf82e..0000000
--- a/sys/dev/raidframe/rf_revent.c
+++ /dev/null
@@ -1,230 +0,0 @@
-/* $NetBSD: rf_revent.c,v 1.9 2000/09/21 01:45:46 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author:
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-/*
- * revent.c -- reconstruction event handling code
- */
-
-#include <sys/errno.h>
-
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_revent.h>
-#include <dev/raidframe/rf_etimer.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_freelist.h>
-#include <dev/raidframe/rf_desc.h>
-#include <dev/raidframe/rf_shutdown.h>
-#include <dev/raidframe/rf_kintf.h>
-
-static RF_FreeList_t *rf_revent_freelist;
-#define RF_MAX_FREE_REVENT 128
-#define RF_REVENT_INC 8
-#define RF_REVENT_INITIAL 8
-
-
-
-#include <sys/proc.h>
-#include <sys/kernel.h>
-
-#define DO_WAIT(_rc) \
- RF_LTSLEEP(&(_rc)->eventQueue, PRIBIO, "raidframe eventq", \
- 0, &((_rc)->eq_mutex))
-
-#define DO_SIGNAL(_rc) wakeup(&(_rc)->eventQueue)
-
-
-static void rf_ShutdownReconEvent(void *);
-
-static RF_ReconEvent_t *
-GetReconEventDesc(RF_RowCol_t row, RF_RowCol_t col,
- void *arg, RF_Revent_t type);
-
-static void rf_ShutdownReconEvent(ignored)
- void *ignored;
-{
- RF_FREELIST_DESTROY(rf_revent_freelist, next, (RF_ReconEvent_t *));
-}
-
-int
-rf_ConfigureReconEvent(listp)
- RF_ShutdownList_t **listp;
-{
- int rc;
-
- RF_FREELIST_CREATE(rf_revent_freelist, RF_MAX_FREE_REVENT,
- RF_REVENT_INC, sizeof(RF_ReconEvent_t));
- if (rf_revent_freelist == NULL)
- return (ENOMEM);
- rc = rf_ShutdownCreate(listp, rf_ShutdownReconEvent, NULL);
- if (rc) {
- RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- rf_ShutdownReconEvent(NULL);
- return (rc);
- }
- RF_FREELIST_PRIME(rf_revent_freelist, RF_REVENT_INITIAL, next,
- (RF_ReconEvent_t *));
- return (0);
-}
-
-/* returns the next reconstruction event, blocking the calling thread
- * until one becomes available. will now return null if it is blocked
- * or will return an event if it is not */
-
-RF_ReconEvent_t *
-rf_GetNextReconEvent(reconDesc, row, continueFunc, continueArg)
- RF_RaidReconDesc_t *reconDesc;
- RF_RowCol_t row;
- void (*continueFunc) (void *);
- void *continueArg;
-{
- RF_Raid_t *raidPtr = reconDesc->raidPtr;
- RF_ReconCtrl_t *rctrl = raidPtr->reconControl[row];
- RF_ReconEvent_t *event;
-
- RF_ASSERT(row >= 0 && row <= raidPtr->numRow);
- RF_LOCK_MUTEX(rctrl->eq_mutex);
- /* q null and count==0 must be equivalent conditions */
- RF_ASSERT((rctrl->eventQueue == NULL) == (rctrl->eq_count == 0));
-
- rctrl->continueFunc = continueFunc;
- rctrl->continueArg = continueArg;
-
-
- /* mpsleep timeout value: secs = timo_val/hz. 'ticks' here is
- defined as cycle-counter ticks, not softclock ticks */
-
-#define MAX_RECON_EXEC_USECS (100 * 1000) /* 100 ms */
-#define RECON_DELAY_MS 25
-#define RECON_TIMO ((RECON_DELAY_MS * hz) / 1000)
-
- /* we are not pre-emptible in the kernel, but we don't want to run
- * forever. If we run w/o blocking for more than MAX_RECON_EXEC_TICKS
- * ticks of the cycle counter, delay for RECON_DELAY before
- * continuing. this may murder us with context switches, so we may
- * need to increase both the MAX...TICKS and the RECON_DELAY_MS. */
- if (reconDesc->reconExecTimerRunning) {
- int status;
-
- RF_ETIMER_STOP(reconDesc->recon_exec_timer);
- RF_ETIMER_EVAL(reconDesc->recon_exec_timer);
- reconDesc->reconExecTicks +=
- RF_ETIMER_VAL_US(reconDesc->recon_exec_timer);
- if (reconDesc->reconExecTicks > reconDesc->maxReconExecTicks)
- reconDesc->maxReconExecTicks =
- reconDesc->reconExecTicks;
- if (reconDesc->reconExecTicks >= MAX_RECON_EXEC_USECS) {
- /* we've been running too long. delay for
- * RECON_DELAY_MS */
-#if RF_RECON_STATS > 0
- reconDesc->numReconExecDelays++;
-#endif /* RF_RECON_STATS > 0 */
-
- status = RF_LTSLEEP(&reconDesc->reconExecTicks, PRIBIO,
- "recon delay", RECON_TIMO,
- &rctrl->eq_mutex);
- RF_ASSERT(status == EWOULDBLOCK);
- reconDesc->reconExecTicks = 0;
- }
- }
- while (!rctrl->eventQueue) {
-#if RF_RECON_STATS > 0
- reconDesc->numReconEventWaits++;
-#endif /* RF_RECON_STATS > 0 */
- DO_WAIT(rctrl);
- reconDesc->reconExecTicks = 0; /* we've just waited */
- }
-
- reconDesc->reconExecTimerRunning = 1;
- if (RF_ETIMER_VAL_US(reconDesc->recon_exec_timer)!=0) {
- /* it moved!! reset the timer. */
- RF_ETIMER_START(reconDesc->recon_exec_timer);
- }
- event = rctrl->eventQueue;
- rctrl->eventQueue = event->next;
- event->next = NULL;
- rctrl->eq_count--;
-
- /* q null and count==0 must be equivalent conditions */
- RF_ASSERT((rctrl->eventQueue == NULL) == (rctrl->eq_count == 0));
- RF_UNLOCK_MUTEX(rctrl->eq_mutex);
- return (event);
-}
-/* enqueues a reconstruction event on the indicated queue */
-void
-rf_CauseReconEvent(raidPtr, row, col, arg, type)
- RF_Raid_t *raidPtr;
- RF_RowCol_t row;
- RF_RowCol_t col;
- void *arg;
- RF_Revent_t type;
-{
- RF_ReconCtrl_t *rctrl = raidPtr->reconControl[row];
- RF_ReconEvent_t *event = GetReconEventDesc(row, col, arg, type);
-
- if (type == RF_REVENT_BUFCLEAR) {
- RF_ASSERT(col != rctrl->fcol);
- }
- RF_ASSERT(row >= 0 && row <= raidPtr->numRow && col >= 0 && col <= raidPtr->numCol);
- RF_LOCK_MUTEX(rctrl->eq_mutex);
- /* q null and count==0 must be equivalent conditions */
- RF_ASSERT((rctrl->eventQueue == NULL) == (rctrl->eq_count == 0));
- event->next = rctrl->eventQueue;
- rctrl->eventQueue = event;
- rctrl->eq_count++;
- RF_UNLOCK_MUTEX(rctrl->eq_mutex);
-
- DO_SIGNAL(rctrl);
-}
-/* allocates and initializes a recon event descriptor */
-static RF_ReconEvent_t *
-GetReconEventDesc(row, col, arg, type)
- RF_RowCol_t row;
- RF_RowCol_t col;
- void *arg;
- RF_Revent_t type;
-{
- RF_ReconEvent_t *t;
-
- RF_FREELIST_GET(rf_revent_freelist, t, next, (RF_ReconEvent_t *));
- if (t == NULL)
- return (NULL);
- t->col = col;
- t->arg = arg;
- t->type = type;
- return (t);
-}
-
-void
-rf_FreeReconEventDesc(event)
- RF_ReconEvent_t *event;
-{
- RF_FREELIST_FREE(rf_revent_freelist, event, next);
-}
diff --git a/sys/dev/raidframe/rf_revent.h b/sys/dev/raidframe/rf_revent.h
deleted file mode 100644
index 51c3202..0000000
--- a/sys/dev/raidframe/rf_revent.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_revent.h,v 1.3 1999/02/05 00:06:17 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author:
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*******************************************************************
- *
- * rf_revent.h -- header file for reconstruction event handling code
- *
- *******************************************************************/
-
-#ifndef _RF__RF_REVENT_H_
-#define _RF__RF_REVENT_H_
-
-#include <dev/raidframe/rf_types.h>
-
-int rf_ConfigureReconEvent(RF_ShutdownList_t ** listp);
-
-RF_ReconEvent_t *
-rf_GetNextReconEvent(RF_RaidReconDesc_t * reconDesc,
- RF_RowCol_t row, void (*continueFunc) (void *), void *continueArg);
-
- void rf_CauseReconEvent(RF_Raid_t * raidPtr, RF_RowCol_t row, RF_RowCol_t col,
- void *arg, RF_Revent_t type);
-
- void rf_FreeReconEventDesc(RF_ReconEvent_t * event);
-
-#endif /* !_RF__RF_REVENT_H_ */
diff --git a/sys/dev/raidframe/rf_shutdown.c b/sys/dev/raidframe/rf_shutdown.c
deleted file mode 100644
index e6b5292..0000000
--- a/sys/dev/raidframe/rf_shutdown.c
+++ /dev/null
@@ -1,104 +0,0 @@
-/* $NetBSD: rf_shutdown.c,v 1.6 2000/01/13 23:41:18 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * rf_shutdown.c
- */
-/*
- * Copyright (c) 1996 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Jim Zelenka
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-/*
- * Maintain lists of cleanup functions. Also, mechanisms for coordinating
- * thread startup and shutdown.
- */
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_threadstuff.h>
-#include <dev/raidframe/rf_shutdown.h>
-#include <dev/raidframe/rf_debugMem.h>
-#include <dev/raidframe/rf_freelist.h>
-
-static void
-rf_FreeShutdownEnt(RF_ShutdownList_t * ent)
-{
- FREE(ent, M_RAIDFRAME);
-}
-
-int
-_rf_ShutdownCreate(
- RF_ShutdownList_t ** listp,
- void (*cleanup) (void *arg),
- void *arg,
- char *file,
- int line)
-{
- RF_ShutdownList_t *ent;
-
- /*
- * Have to directly allocate memory here, since we start up before
- * and shutdown after RAIDframe internal allocation system.
- */
- /* ent = (RF_ShutdownList_t *) malloc(sizeof(RF_ShutdownList_t),
- M_RAIDFRAME, M_WAITOK); */
- ent = (RF_ShutdownList_t *) malloc(sizeof(RF_ShutdownList_t),
- M_RAIDFRAME, M_NOWAIT);
- if (ent == NULL)
- return (ENOMEM);
- ent->cleanup = cleanup;
- ent->arg = arg;
- ent->file = file;
- ent->line = line;
- ent->next = *listp;
- *listp = ent;
- return (0);
-}
-
-int
-rf_ShutdownList(RF_ShutdownList_t ** list)
-{
- RF_ShutdownList_t *r, *next;
- char *file;
- int line;
-
- for (r = *list; r; r = next) {
- next = r->next;
- file = r->file;
- line = r->line;
-
- if (rf_shutdownDebug) {
- printf("call shutdown, created %s:%d\n", file, line);
- }
- r->cleanup(r->arg);
-
- if (rf_shutdownDebug) {
- printf("completed shutdown, created %s:%d\n", file, line);
- }
- rf_FreeShutdownEnt(r);
- }
- *list = NULL;
- return (0);
-}
diff --git a/sys/dev/raidframe/rf_shutdown.h b/sys/dev/raidframe/rf_shutdown.h
deleted file mode 100644
index 5abc5ba..0000000
--- a/sys/dev/raidframe/rf_shutdown.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_shutdown.h,v 1.2 1999/02/05 00:06:17 oster Exp $ */
-/*
- * rf_shutdown.h
- */
-/*
- * Copyright (c) 1996 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Jim Zelenka
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-/*
- * Maintain lists of cleanup functions. Also, mechanisms for coordinating
- * thread startup and shutdown.
- */
-
-#ifndef _RF__RF_SHUTDOWN_H_
-#define _RF__RF_SHUTDOWN_H_
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_threadstuff.h>
-
-/*
- * Important note: the shutdown list is run like a stack, new
- * entries pushed on top. Therefore, the most recently added
- * entry (last started) is the first removed (stopped). This
- * should handle system-dependencies pretty nicely- if a system
- * is there when you start another, it'll be there when you
- * shut down another. Hopefully, this subsystem will remove
- * more complexity than it introduces.
- */
-
-struct RF_ShutdownList_s {
- void (*cleanup) (void *arg);
- void *arg;
- char *file;
- int line;
- RF_ShutdownList_t *next;
-};
-#define rf_ShutdownCreate(_listp_,_func_,_arg_) \
- _rf_ShutdownCreate(_listp_,_func_,_arg_,__FILE__,__LINE__)
-
-int _rf_ShutdownCreate(RF_ShutdownList_t ** listp, void (*cleanup) (void *arg),
- void *arg, char *file, int line);
-int rf_ShutdownList(RF_ShutdownList_t ** listp);
-
-#endif /* !_RF__RF_SHUTDOWN_H_ */
diff --git a/sys/dev/raidframe/rf_sstf.c b/sys/dev/raidframe/rf_sstf.c
deleted file mode 100644
index cd9ea56..0000000
--- a/sys/dev/raidframe/rf_sstf.c
+++ /dev/null
@@ -1,658 +0,0 @@
-/* $NetBSD: rf_sstf.c,v 1.6 2001/01/27 20:18:55 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Jim Zelenka
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*******************************************************************************
- *
- * sstf.c -- prioritized shortest seek time first disk queueing code
- *
- ******************************************************************************/
-
-#include <dev/raidframe/rf_alloclist.h>
-#include <dev/raidframe/rf_stripelocks.h>
-#include <dev/raidframe/rf_layout.h>
-#include <dev/raidframe/rf_diskqueue.h>
-#include <dev/raidframe/rf_sstf.h>
-#include <dev/raidframe/rf_debugMem.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_options.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_types.h>
-
-#define DIR_LEFT 1
-#define DIR_RIGHT 2
-#define DIR_EITHER 3
-
-#define SNUM_DIFF(_a_,_b_) (((_a_)>(_b_))?((_a_)-(_b_)):((_b_)-(_a_)))
-
-#define QSUM(_sstfq_) (((_sstfq_)->lopri.qlen)+((_sstfq_)->left.qlen)+((_sstfq_)->right.qlen))
-
-
-static void
-do_sstf_ord_q(RF_DiskQueueData_t **,
- RF_DiskQueueData_t **,
- RF_DiskQueueData_t *);
-
-static RF_DiskQueueData_t *
-closest_to_arm(RF_SstfQ_t *,
- RF_SectorNum_t,
- int *,
- int);
-static void do_dequeue(RF_SstfQ_t *, RF_DiskQueueData_t *);
-
-
-static void
-do_sstf_ord_q(queuep, tailp, req)
- RF_DiskQueueData_t **queuep;
- RF_DiskQueueData_t **tailp;
- RF_DiskQueueData_t *req;
-{
- RF_DiskQueueData_t *r, *s;
-
- if (*queuep == NULL) {
- *queuep = req;
- *tailp = req;
- req->next = NULL;
- req->prev = NULL;
- return;
- }
- if (req->sectorOffset <= (*queuep)->sectorOffset) {
- req->next = *queuep;
- req->prev = NULL;
- (*queuep)->prev = req;
- *queuep = req;
- return;
- }
- if (req->sectorOffset > (*tailp)->sectorOffset) {
- /* optimization */
- r = NULL;
- s = *tailp;
- goto q_at_end;
- }
- for (s = NULL, r = *queuep; r; s = r, r = r->next) {
- if (r->sectorOffset >= req->sectorOffset) {
- /* insert after s, before r */
- RF_ASSERT(s);
- req->next = r;
- r->prev = req;
- s->next = req;
- req->prev = s;
- return;
- }
- }
-q_at_end:
- /* insert after s, at end of queue */
- RF_ASSERT(r == NULL);
- RF_ASSERT(s);
- RF_ASSERT(s == (*tailp));
- req->next = NULL;
- req->prev = s;
- s->next = req;
- *tailp = req;
-}
-/* for removing from head-of-queue */
-#define DO_HEAD_DEQ(_r_,_q_) { \
- _r_ = (_q_)->queue; \
- RF_ASSERT((_r_) != NULL); \
- (_q_)->queue = (_r_)->next; \
- (_q_)->qlen--; \
- if ((_q_)->qlen == 0) { \
- RF_ASSERT((_r_) == (_q_)->qtail); \
- RF_ASSERT((_q_)->queue == NULL); \
- (_q_)->qtail = NULL; \
- } \
- else { \
- RF_ASSERT((_q_)->queue->prev == (_r_)); \
- (_q_)->queue->prev = NULL; \
- } \
-}
-
-/* for removing from end-of-queue */
-#define DO_TAIL_DEQ(_r_,_q_) { \
- _r_ = (_q_)->qtail; \
- RF_ASSERT((_r_) != NULL); \
- (_q_)->qtail = (_r_)->prev; \
- (_q_)->qlen--; \
- if ((_q_)->qlen == 0) { \
- RF_ASSERT((_r_) == (_q_)->queue); \
- RF_ASSERT((_q_)->qtail == NULL); \
- (_q_)->queue = NULL; \
- } \
- else { \
- RF_ASSERT((_q_)->qtail->next == (_r_)); \
- (_q_)->qtail->next = NULL; \
- } \
-}
-
-#define DO_BEST_DEQ(_l_,_r_,_q_) { \
- if (SNUM_DIFF((_q_)->queue->sectorOffset,_l_) \
- < SNUM_DIFF((_q_)->qtail->sectorOffset,_l_)) \
- { \
- DO_HEAD_DEQ(_r_,_q_); \
- } \
- else { \
- DO_TAIL_DEQ(_r_,_q_); \
- } \
-}
-
-static RF_DiskQueueData_t *
-closest_to_arm(queue, arm_pos, dir, allow_reverse)
- RF_SstfQ_t *queue;
- RF_SectorNum_t arm_pos;
- int *dir;
- int allow_reverse;
-{
- RF_SectorNum_t best_pos_l = 0, this_pos_l = 0, last_pos = 0;
- RF_SectorNum_t best_pos_r = 0, this_pos_r = 0;
- RF_DiskQueueData_t *r, *best_l, *best_r;
-
- best_r = best_l = NULL;
- for (r = queue->queue; r; r = r->next) {
- if (r->sectorOffset < arm_pos) {
- if (best_l == NULL) {
- best_l = r;
- last_pos = best_pos_l = this_pos_l;
- } else {
- this_pos_l = arm_pos - r->sectorOffset;
- if (this_pos_l < best_pos_l) {
- best_l = r;
- last_pos = best_pos_l = this_pos_l;
- } else {
- last_pos = this_pos_l;
- }
- }
- } else {
- if (best_r == NULL) {
- best_r = r;
- last_pos = best_pos_r = this_pos_r;
- } else {
- this_pos_r = r->sectorOffset - arm_pos;
- if (this_pos_r < best_pos_r) {
- best_r = r;
- last_pos = best_pos_r = this_pos_r;
- } else {
- last_pos = this_pos_r;
- }
- if (this_pos_r > last_pos) {
- /* getting farther away */
- break;
- }
- }
- }
- }
- if ((best_r == NULL) && (best_l == NULL))
- return (NULL);
- if ((*dir == DIR_RIGHT) && best_r)
- return (best_r);
- if ((*dir == DIR_LEFT) && best_l)
- return (best_l);
- if (*dir == DIR_EITHER) {
- if (best_l == NULL)
- return (best_r);
- if (best_r == NULL)
- return (best_l);
- if (best_pos_r < best_pos_l)
- return (best_r);
- else
- return (best_l);
- }
- /*
- * Nothing in the direction we want to go. Reverse or
- * reset the arm. We know we have an I/O in the other
- * direction.
- */
- if (allow_reverse) {
- if (*dir == DIR_RIGHT) {
- *dir = DIR_LEFT;
- return (best_l);
- } else {
- *dir = DIR_RIGHT;
- return (best_r);
- }
- }
- /*
- * Reset (beginning of queue).
- */
- RF_ASSERT(*dir == DIR_RIGHT);
- return (queue->queue);
-}
-
-void *
-rf_SstfCreate(sect_per_disk, cl_list, listp)
- RF_SectorCount_t sect_per_disk;
- RF_AllocListElem_t *cl_list;
- RF_ShutdownList_t **listp;
-{
- RF_Sstf_t *sstfq;
-
- RF_CallocAndAdd(sstfq, 1, sizeof(RF_Sstf_t), (RF_Sstf_t *), cl_list);
- sstfq->dir = DIR_EITHER;
- sstfq->allow_reverse = 1;
- return ((void *) sstfq);
-}
-
-void *
-rf_ScanCreate(sect_per_disk, cl_list, listp)
- RF_SectorCount_t sect_per_disk;
- RF_AllocListElem_t *cl_list;
- RF_ShutdownList_t **listp;
-{
- RF_Sstf_t *scanq;
-
- RF_CallocAndAdd(scanq, 1, sizeof(RF_Sstf_t), (RF_Sstf_t *), cl_list);
- scanq->dir = DIR_RIGHT;
- scanq->allow_reverse = 1;
- return ((void *) scanq);
-}
-
-void *
-rf_CscanCreate(sect_per_disk, cl_list, listp)
- RF_SectorCount_t sect_per_disk;
- RF_AllocListElem_t *cl_list;
- RF_ShutdownList_t **listp;
-{
- RF_Sstf_t *cscanq;
-
- RF_CallocAndAdd(cscanq, 1, sizeof(RF_Sstf_t), (RF_Sstf_t *), cl_list);
- cscanq->dir = DIR_RIGHT;
- return ((void *) cscanq);
-}
-
-void
-rf_SstfEnqueue(qptr, req, priority)
- void *qptr;
- RF_DiskQueueData_t *req;
- int priority;
-{
- RF_Sstf_t *sstfq;
-
- sstfq = (RF_Sstf_t *) qptr;
-
- if (priority == RF_IO_LOW_PRIORITY) {
- if (rf_sstfDebug || rf_scanDebug || rf_cscanDebug) {
- RF_DiskQueue_t *dq;
- dq = (RF_DiskQueue_t *) req->queue;
- printf("raid%d: ENQ lopri %d,%d queues are %d,%d,%d\n",
- req->raidPtr->raidid,
- dq->row, dq->col,
- sstfq->left.qlen, sstfq->right.qlen,
- sstfq->lopri.qlen);
- }
- do_sstf_ord_q(&sstfq->lopri.queue, &sstfq->lopri.qtail, req);
- sstfq->lopri.qlen++;
- } else {
- if (req->sectorOffset < sstfq->last_sector) {
- do_sstf_ord_q(&sstfq->left.queue, &sstfq->left.qtail, req);
- sstfq->left.qlen++;
- } else {
- do_sstf_ord_q(&sstfq->right.queue, &sstfq->right.qtail, req);
- sstfq->right.qlen++;
- }
- }
-}
-
-static void
-do_dequeue(queue, req)
- RF_SstfQ_t *queue;
- RF_DiskQueueData_t *req;
-{
- RF_DiskQueueData_t *req2;
-
- if (rf_sstfDebug || rf_scanDebug || rf_cscanDebug) {
- printf("raid%d: do_dequeue\n", req->raidPtr->raidid);
- }
- if (req == queue->queue) {
- DO_HEAD_DEQ(req2, queue);
- RF_ASSERT(req2 == req);
- } else
- if (req == queue->qtail) {
- DO_TAIL_DEQ(req2, queue);
- RF_ASSERT(req2 == req);
- } else {
- /* dequeue from middle of list */
- RF_ASSERT(req->next);
- RF_ASSERT(req->prev);
- queue->qlen--;
- req->next->prev = req->prev;
- req->prev->next = req->next;
- req->next = req->prev = NULL;
- }
-}
-
-RF_DiskQueueData_t *
-rf_SstfDequeue(qptr)
- void *qptr;
-{
- RF_DiskQueueData_t *req = NULL;
- RF_Sstf_t *sstfq;
-
- sstfq = (RF_Sstf_t *) qptr;
-
- if (rf_sstfDebug) {
- RF_DiskQueue_t *dq;
- dq = (RF_DiskQueue_t *) req->queue;
- RF_ASSERT(QSUM(sstfq) == dq->queueLength);
- printf("raid%d: sstf: Dequeue %d,%d queues are %d,%d,%d\n",
- req->raidPtr->raidid, dq->row, dq->col,
- sstfq->left.qlen, sstfq->right.qlen, sstfq->lopri.qlen);
- }
- if (sstfq->left.queue == NULL) {
- RF_ASSERT(sstfq->left.qlen == 0);
- if (sstfq->right.queue == NULL) {
- RF_ASSERT(sstfq->right.qlen == 0);
- if (sstfq->lopri.queue == NULL) {
- RF_ASSERT(sstfq->lopri.qlen == 0);
- return (NULL);
- }
- if (rf_sstfDebug) {
- printf("raid%d: sstf: check for close lopri",
- req->raidPtr->raidid);
- }
- req = closest_to_arm(&sstfq->lopri, sstfq->last_sector,
- &sstfq->dir, sstfq->allow_reverse);
- if (rf_sstfDebug) {
- printf("raid%d: sstf: closest_to_arm said %lx",
- req->raidPtr->raidid, (long) req);
- }
- if (req == NULL)
- return (NULL);
- do_dequeue(&sstfq->lopri, req);
- } else {
- DO_BEST_DEQ(sstfq->last_sector, req, &sstfq->right);
- }
- } else {
- if (sstfq->right.queue == NULL) {
- RF_ASSERT(sstfq->right.qlen == 0);
- DO_BEST_DEQ(sstfq->last_sector, req, &sstfq->left);
- } else {
- if (SNUM_DIFF(sstfq->last_sector, sstfq->right.queue->sectorOffset)
- < SNUM_DIFF(sstfq->last_sector, sstfq->left.qtail->sectorOffset)) {
- DO_HEAD_DEQ(req, &sstfq->right);
- } else {
- DO_TAIL_DEQ(req, &sstfq->left);
- }
- }
- }
- RF_ASSERT(req);
- sstfq->last_sector = req->sectorOffset;
- return (req);
-}
-
-RF_DiskQueueData_t *
-rf_ScanDequeue(qptr)
- void *qptr;
-{
- RF_DiskQueueData_t *req = NULL;
- RF_Sstf_t *scanq;
-
- scanq = (RF_Sstf_t *) qptr;
-
- if (rf_scanDebug) {
- RF_DiskQueue_t *dq;
- dq = (RF_DiskQueue_t *) req->queue;
- RF_ASSERT(QSUM(scanq) == dq->queueLength);
- printf("raid%d: scan: Dequeue %d,%d queues are %d,%d,%d\n",
- req->raidPtr->raidid, dq->row, dq->col,
- scanq->left.qlen, scanq->right.qlen, scanq->lopri.qlen);
- }
- if (scanq->left.queue == NULL) {
- RF_ASSERT(scanq->left.qlen == 0);
- if (scanq->right.queue == NULL) {
- RF_ASSERT(scanq->right.qlen == 0);
- if (scanq->lopri.queue == NULL) {
- RF_ASSERT(scanq->lopri.qlen == 0);
- return (NULL);
- }
- req = closest_to_arm(&scanq->lopri, scanq->last_sector,
- &scanq->dir, scanq->allow_reverse);
- if (req == NULL)
- return (NULL);
- do_dequeue(&scanq->lopri, req);
- } else {
- scanq->dir = DIR_RIGHT;
- DO_HEAD_DEQ(req, &scanq->right);
- }
- } else
- if (scanq->right.queue == NULL) {
- RF_ASSERT(scanq->right.qlen == 0);
- RF_ASSERT(scanq->left.queue);
- scanq->dir = DIR_LEFT;
- DO_TAIL_DEQ(req, &scanq->left);
- } else {
- RF_ASSERT(scanq->right.queue);
- RF_ASSERT(scanq->left.queue);
- if (scanq->dir == DIR_RIGHT) {
- DO_HEAD_DEQ(req, &scanq->right);
- } else {
- DO_TAIL_DEQ(req, &scanq->left);
- }
- }
- RF_ASSERT(req);
- scanq->last_sector = req->sectorOffset;
- return (req);
-}
-
-RF_DiskQueueData_t *
-rf_CscanDequeue(qptr)
- void *qptr;
-{
- RF_DiskQueueData_t *req = NULL;
- RF_Sstf_t *cscanq;
-
- cscanq = (RF_Sstf_t *) qptr;
-
- RF_ASSERT(cscanq->dir == DIR_RIGHT);
- if (rf_cscanDebug) {
- RF_DiskQueue_t *dq;
- dq = (RF_DiskQueue_t *) req->queue;
- RF_ASSERT(QSUM(cscanq) == dq->queueLength);
- printf("raid%d: scan: Dequeue %d,%d queues are %d,%d,%d\n",
- req->raidPtr->raidid, dq->row, dq->col,
- cscanq->left.qlen, cscanq->right.qlen,
- cscanq->lopri.qlen);
- }
- if (cscanq->right.queue) {
- DO_HEAD_DEQ(req, &cscanq->right);
- } else {
- RF_ASSERT(cscanq->right.qlen == 0);
- if (cscanq->left.queue == NULL) {
- RF_ASSERT(cscanq->left.qlen == 0);
- if (cscanq->lopri.queue == NULL) {
- RF_ASSERT(cscanq->lopri.qlen == 0);
- return (NULL);
- }
- req = closest_to_arm(&cscanq->lopri, cscanq->last_sector,
- &cscanq->dir, cscanq->allow_reverse);
- if (req == NULL)
- return (NULL);
- do_dequeue(&cscanq->lopri, req);
- } else {
- /*
- * There's I/Os to the left of the arm. Swing
- * on back (swap queues).
- */
- cscanq->right = cscanq->left;
- cscanq->left.qlen = 0;
- cscanq->left.queue = cscanq->left.qtail = NULL;
- DO_HEAD_DEQ(req, &cscanq->right);
- }
- }
- RF_ASSERT(req);
- cscanq->last_sector = req->sectorOffset;
- return (req);
-}
-
-RF_DiskQueueData_t *
-rf_SstfPeek(qptr)
- void *qptr;
-{
- RF_DiskQueueData_t *req;
- RF_Sstf_t *sstfq;
-
- sstfq = (RF_Sstf_t *) qptr;
-
- if ((sstfq->left.queue == NULL) && (sstfq->right.queue == NULL)) {
- req = closest_to_arm(&sstfq->lopri, sstfq->last_sector, &sstfq->dir,
- sstfq->allow_reverse);
- } else {
- if (sstfq->left.queue == NULL)
- req = sstfq->right.queue;
- else {
- if (sstfq->right.queue == NULL)
- req = sstfq->left.queue;
- else {
- if (SNUM_DIFF(sstfq->last_sector, sstfq->right.queue->sectorOffset)
- < SNUM_DIFF(sstfq->last_sector, sstfq->left.qtail->sectorOffset)) {
- req = sstfq->right.queue;
- } else {
- req = sstfq->left.qtail;
- }
- }
- }
- }
- if (req == NULL) {
- RF_ASSERT(QSUM(sstfq) == 0);
- }
- return (req);
-}
-
-RF_DiskQueueData_t *
-rf_ScanPeek(qptr)
- void *qptr;
-{
- RF_DiskQueueData_t *req;
- RF_Sstf_t *scanq;
- int dir;
-
- scanq = (RF_Sstf_t *) qptr;
- dir = scanq->dir;
-
- if (scanq->left.queue == NULL) {
- RF_ASSERT(scanq->left.qlen == 0);
- if (scanq->right.queue == NULL) {
- RF_ASSERT(scanq->right.qlen == 0);
- if (scanq->lopri.queue == NULL) {
- RF_ASSERT(scanq->lopri.qlen == 0);
- return (NULL);
- }
- req = closest_to_arm(&scanq->lopri, scanq->last_sector,
- &dir, scanq->allow_reverse);
- } else {
- req = scanq->right.queue;
- }
- } else
- if (scanq->right.queue == NULL) {
- RF_ASSERT(scanq->right.qlen == 0);
- RF_ASSERT(scanq->left.queue);
- req = scanq->left.qtail;
- } else {
- RF_ASSERT(scanq->right.queue);
- RF_ASSERT(scanq->left.queue);
- if (scanq->dir == DIR_RIGHT) {
- req = scanq->right.queue;
- } else {
- req = scanq->left.qtail;
- }
- }
- if (req == NULL) {
- RF_ASSERT(QSUM(scanq) == 0);
- }
- return (req);
-}
-
-RF_DiskQueueData_t *
-rf_CscanPeek(qptr)
- void *qptr;
-{
- RF_DiskQueueData_t *req;
- RF_Sstf_t *cscanq;
-
- cscanq = (RF_Sstf_t *) qptr;
-
- RF_ASSERT(cscanq->dir == DIR_RIGHT);
- if (cscanq->right.queue) {
- req = cscanq->right.queue;
- } else {
- RF_ASSERT(cscanq->right.qlen == 0);
- if (cscanq->left.queue == NULL) {
- RF_ASSERT(cscanq->left.qlen == 0);
- if (cscanq->lopri.queue == NULL) {
- RF_ASSERT(cscanq->lopri.qlen == 0);
- return (NULL);
- }
- req = closest_to_arm(&cscanq->lopri, cscanq->last_sector,
- &cscanq->dir, cscanq->allow_reverse);
- } else {
- /*
- * There's I/Os to the left of the arm. We'll end
- * up swinging on back.
- */
- req = cscanq->left.queue;
- }
- }
- if (req == NULL) {
- RF_ASSERT(QSUM(cscanq) == 0);
- }
- return (req);
-}
-
-int
-rf_SstfPromote(qptr, parityStripeID, which_ru)
- void *qptr;
- RF_StripeNum_t parityStripeID;
- RF_ReconUnitNum_t which_ru;
-{
- RF_DiskQueueData_t *r, *next;
- RF_Sstf_t *sstfq;
- int n;
-
- sstfq = (RF_Sstf_t *) qptr;
-
- n = 0;
- for (r = sstfq->lopri.queue; r; r = next) {
- next = r->next;
- if (rf_sstfDebug || rf_scanDebug || rf_cscanDebug) {
- printf("raid%d: check promote %lx\n",
- r->raidPtr->raidid, (long) r);
- }
- if ((r->parityStripeID == parityStripeID)
- && (r->which_ru == which_ru)) {
- do_dequeue(&sstfq->lopri, r);
- rf_SstfEnqueue(qptr, r, RF_IO_NORMAL_PRIORITY);
- n++;
- }
- }
- if (rf_sstfDebug || rf_scanDebug || rf_cscanDebug) {
- printf("raid%d: promoted %d matching I/Os queues are %d,%d,%d\n",
- r->raidPtr->raidid, n, sstfq->left.qlen,
- sstfq->right.qlen, sstfq->lopri.qlen);
- }
- return (n);
-}
diff --git a/sys/dev/raidframe/rf_sstf.h b/sys/dev/raidframe/rf_sstf.h
deleted file mode 100644
index 2fc1c0d..0000000
--- a/sys/dev/raidframe/rf_sstf.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_sstf.h,v 1.3 1999/02/05 00:06:17 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Jim Zelenka
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#ifndef _RF__RF_SSTF_H_
-#define _RF__RF_SSTF_H_
-
-#include <dev/raidframe/rf_diskqueue.h>
-
-typedef struct RF_SstfQ_s {
- RF_DiskQueueData_t *queue;
- RF_DiskQueueData_t *qtail;
- int qlen;
-} RF_SstfQ_t;
-
-typedef struct RF_Sstf_s {
- RF_SstfQ_t left;
- RF_SstfQ_t right;
- RF_SstfQ_t lopri;
- RF_SectorNum_t last_sector;
- int dir;
- int allow_reverse;
-} RF_Sstf_t;
-
-void *
-rf_SstfCreate(RF_SectorCount_t sect_per_disk,
- RF_AllocListElem_t * cl_list, RF_ShutdownList_t ** listp);
-void *
-rf_ScanCreate(RF_SectorCount_t sect_per_disk,
- RF_AllocListElem_t * cl_list, RF_ShutdownList_t ** listp);
-void *
-rf_CscanCreate(RF_SectorCount_t sect_per_disk,
- RF_AllocListElem_t * cl_list, RF_ShutdownList_t ** listp);
-void rf_SstfEnqueue(void *qptr, RF_DiskQueueData_t * req, int priority);
-RF_DiskQueueData_t *rf_SstfDequeue(void *qptr);
-RF_DiskQueueData_t *rf_SstfPeek(void *qptr);
-int
-rf_SstfPromote(void *qptr, RF_StripeNum_t parityStripeID,
- RF_ReconUnitNum_t which_ru);
-RF_DiskQueueData_t *rf_ScanDequeue(void *qptr);
-RF_DiskQueueData_t *rf_ScanPeek(void *qptr);
-RF_DiskQueueData_t *rf_CscanDequeue(void *qptr);
-RF_DiskQueueData_t *rf_CscanPeek(void *qptr);
-
-#endif /* !_RF__RF_SSTF_H_ */
diff --git a/sys/dev/raidframe/rf_states.c b/sys/dev/raidframe/rf_states.c
deleted file mode 100644
index bc686ec..0000000
--- a/sys/dev/raidframe/rf_states.c
+++ /dev/null
@@ -1,669 +0,0 @@
-/* $NetBSD: rf_states.c,v 1.15 2000/10/20 02:24:45 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland, William V. Courtright II, Robby Findler
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#include <sys/errno.h>
-
-#include <dev/raidframe/rf_archs.h>
-#include <dev/raidframe/rf_threadstuff.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_dag.h>
-#include <dev/raidframe/rf_desc.h>
-#include <dev/raidframe/rf_aselect.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_states.h>
-#include <dev/raidframe/rf_dagutils.h>
-#include <dev/raidframe/rf_driver.h>
-#include <dev/raidframe/rf_engine.h>
-#include <dev/raidframe/rf_map.h>
-#include <dev/raidframe/rf_etimer.h>
-#include <dev/raidframe/rf_kintf.h>
-
-/* prototypes for some of the available states.
-
- States must:
-
- - not block.
-
- - either schedule rf_ContinueRaidAccess as a callback and return
- RF_TRUE, or complete all of their work and return RF_FALSE.
-
- - increment desc->state when they have finished their work.
-*/
-
-static char *
-StateName(RF_AccessState_t state)
-{
- switch (state) {
- case rf_QuiesceState:return "QuiesceState";
- case rf_MapState:
- return "MapState";
- case rf_LockState:
- return "LockState";
- case rf_CreateDAGState:
- return "CreateDAGState";
- case rf_ExecuteDAGState:
- return "ExecuteDAGState";
- case rf_ProcessDAGState:
- return "ProcessDAGState";
- case rf_CleanupState:
- return "CleanupState";
- case rf_LastState:
- return "LastState";
- case rf_IncrAccessesCountState:
- return "IncrAccessesCountState";
- case rf_DecrAccessesCountState:
- return "DecrAccessesCountState";
- default:
- return "!!! UnnamedState !!!";
- }
-}
-
-void
-rf_ContinueRaidAccess(RF_RaidAccessDesc_t * desc)
-{
- int suspended = RF_FALSE;
- int current_state_index = desc->state;
- RF_AccessState_t current_state = desc->states[current_state_index];
- int unit = desc->raidPtr->raidid;
-
- do {
-
- current_state_index = desc->state;
- current_state = desc->states[current_state_index];
-
- switch (current_state) {
-
- case rf_QuiesceState:
- suspended = rf_State_Quiesce(desc);
- break;
- case rf_IncrAccessesCountState:
- suspended = rf_State_IncrAccessCount(desc);
- break;
- case rf_MapState:
- suspended = rf_State_Map(desc);
- break;
- case rf_LockState:
- suspended = rf_State_Lock(desc);
- break;
- case rf_CreateDAGState:
- suspended = rf_State_CreateDAG(desc);
- break;
- case rf_ExecuteDAGState:
- suspended = rf_State_ExecuteDAG(desc);
- break;
- case rf_ProcessDAGState:
- suspended = rf_State_ProcessDAG(desc);
- break;
- case rf_CleanupState:
- suspended = rf_State_Cleanup(desc);
- break;
- case rf_DecrAccessesCountState:
- suspended = rf_State_DecrAccessCount(desc);
- break;
- case rf_LastState:
- suspended = rf_State_LastState(desc);
- break;
- }
-
- /* after this point, we cannot dereference desc since desc may
- * have been freed. desc is only freed in LastState, so if we
- * renter this function or loop back up, desc should be valid. */
-
- if (rf_printStatesDebug) {
- printf("raid%d: State: %-24s StateIndex: %3i desc: 0x%ld %s\n",
- unit, StateName(current_state),
- current_state_index, (long) desc,
- suspended ? "callback scheduled" : "looping");
- }
- } while (!suspended && current_state != rf_LastState);
-
- return;
-}
-
-
-void
-rf_ContinueDagAccess(RF_DagList_t * dagList)
-{
- RF_AccTraceEntry_t *tracerec = &(dagList->desc->tracerec);
- RF_RaidAccessDesc_t *desc;
- RF_DagHeader_t *dag_h;
- RF_Etimer_t timer;
- int i;
-
- desc = dagList->desc;
-
- timer = tracerec->timer;
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- tracerec->specific.user.exec_us = RF_ETIMER_VAL_US(timer);
- RF_ETIMER_START(tracerec->timer);
-
- /* skip to dag which just finished */
- dag_h = dagList->dags;
- for (i = 0; i < dagList->numDagsDone; i++) {
- dag_h = dag_h->next;
- }
-
- /* check to see if retry is required */
- if (dag_h->status == rf_rollBackward) {
- /* when a dag fails, mark desc status as bad and allow all
- * other dags in the desc to execute to completion. then,
- * free all dags and start over */
- desc->status = 1; /* bad status */
- {
- printf("raid%d: DAG failure: %c addr 0x%lx (%ld) nblk 0x%x (%d) buf 0x%lx\n",
- desc->raidPtr->raidid, desc->type,
- (long) desc->raidAddress,
- (long) desc->raidAddress, (int) desc->numBlocks,
- (int) desc->numBlocks,
- (unsigned long) (desc->bufPtr));
- }
- }
- dagList->numDagsDone++;
- rf_ContinueRaidAccess(desc);
-}
-
-int
-rf_State_LastState(RF_RaidAccessDesc_t * desc)
-{
- void (*callbackFunc) (RF_CBParam_t) = desc->callbackFunc;
- RF_CBParam_t callbackArg;
-
- callbackArg.p = desc->callbackArg;
-
- /*
- * If this is not an async request, wake up the caller
- */
- if (desc->async_flag == 0)
- wakeup(desc->bp);
-
- /*
- * That's all the IO for this one... unbusy the 'disk'.
- */
-
- rf_disk_unbusy(desc);
-
- /*
- * Wakeup any requests waiting to go.
- */
-
- RF_LOCK_MUTEX(((RF_Raid_t *) desc->raidPtr)->mutex);
- ((RF_Raid_t *) desc->raidPtr)->openings++;
- RF_UNLOCK_MUTEX(((RF_Raid_t *) desc->raidPtr)->mutex);
-
- /* wake up any pending IO */
- raidstart(((RF_Raid_t *) desc->raidPtr));
-
- /* printf("Calling biodone on 0x%x\n",desc->bp); */
- biodone(desc->bp); /* access came through ioctl */
-
- if (callbackFunc)
- callbackFunc(callbackArg);
- rf_FreeRaidAccDesc(desc);
-
- return RF_FALSE;
-}
-
-int
-rf_State_IncrAccessCount(RF_RaidAccessDesc_t * desc)
-{
- RF_Raid_t *raidPtr;
-
- raidPtr = desc->raidPtr;
- /* Bummer. We have to do this to be 100% safe w.r.t. the increment
- * below */
- RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
- raidPtr->accs_in_flight++; /* used to detect quiescence */
- RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
-
- desc->state++;
- return RF_FALSE;
-}
-
-int
-rf_State_DecrAccessCount(RF_RaidAccessDesc_t * desc)
-{
- RF_Raid_t *raidPtr;
-
- raidPtr = desc->raidPtr;
-
- RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
- raidPtr->accs_in_flight--;
- if (raidPtr->accesses_suspended && raidPtr->accs_in_flight == 0) {
- rf_SignalQuiescenceLock(raidPtr, raidPtr->reconDesc);
- }
- rf_UpdateUserStats(raidPtr, RF_ETIMER_VAL_US(desc->timer), desc->numBlocks);
- RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
-
- desc->state++;
- return RF_FALSE;
-}
-
-int
-rf_State_Quiesce(RF_RaidAccessDesc_t * desc)
-{
- RF_AccTraceEntry_t *tracerec = &desc->tracerec;
- RF_Etimer_t timer;
- int suspended = RF_FALSE;
- RF_Raid_t *raidPtr;
-
- raidPtr = desc->raidPtr;
-
- RF_ETIMER_START(timer);
- RF_ETIMER_START(desc->timer);
-
- RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
- if (raidPtr->accesses_suspended) {
- RF_CallbackDesc_t *cb;
- cb = rf_AllocCallbackDesc();
- /* XXX the following cast is quite bogus...
- * rf_ContinueRaidAccess takes a (RF_RaidAccessDesc_t *) as an
- * argument.. GO */
- cb->callbackFunc = (void (*) (RF_CBParam_t)) rf_ContinueRaidAccess;
- cb->callbackArg.p = (void *) desc;
- cb->next = raidPtr->quiesce_wait_list;
- raidPtr->quiesce_wait_list = cb;
- suspended = RF_TRUE;
- }
- RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
-
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- tracerec->specific.user.suspend_ovhd_us += RF_ETIMER_VAL_US(timer);
-
- if (suspended && rf_quiesceDebug)
- printf("Stalling access due to quiescence lock\n");
-
- desc->state++;
- return suspended;
-}
-
-int
-rf_State_Map(RF_RaidAccessDesc_t * desc)
-{
- RF_Raid_t *raidPtr = desc->raidPtr;
- RF_AccTraceEntry_t *tracerec = &desc->tracerec;
- RF_Etimer_t timer;
-
- RF_ETIMER_START(timer);
-
- if (!(desc->asmap = rf_MapAccess(raidPtr, desc->raidAddress, desc->numBlocks,
- desc->bufPtr, RF_DONT_REMAP)))
- RF_PANIC();
-
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- tracerec->specific.user.map_us = RF_ETIMER_VAL_US(timer);
-
- desc->state++;
- return RF_FALSE;
-}
-
-int
-rf_State_Lock(RF_RaidAccessDesc_t * desc)
-{
- RF_AccTraceEntry_t *tracerec = &desc->tracerec;
- RF_Raid_t *raidPtr = desc->raidPtr;
- RF_AccessStripeMapHeader_t *asmh = desc->asmap;
- RF_AccessStripeMap_t *asm_p;
- RF_Etimer_t timer;
- int suspended = RF_FALSE;
-
- RF_ETIMER_START(timer);
- if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) {
- RF_StripeNum_t lastStripeID = -1;
-
- /* acquire each lock that we don't already hold */
- for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) {
- RF_ASSERT(RF_IO_IS_R_OR_W(desc->type));
- if (!rf_suppressLocksAndLargeWrites &&
- asm_p->parityInfo &&
- !(desc->flags & RF_DAG_SUPPRESS_LOCKS) &&
- !(asm_p->flags & RF_ASM_FLAGS_LOCK_TRIED)) {
- asm_p->flags |= RF_ASM_FLAGS_LOCK_TRIED;
- RF_ASSERT(asm_p->stripeID > lastStripeID);
-
- /* locks must be acquired hierarchically */
-
- lastStripeID = asm_p->stripeID;
- /* XXX the cast to (void (*)(RF_CBParam_t))
- * below is bogus! GO */
- RF_INIT_LOCK_REQ_DESC(asm_p->lockReqDesc,
- desc->type,
- (void (*) (RF_Buf_t)) rf_ContinueRaidAccess,
- desc, asm_p,
- raidPtr->Layout.dataSectorsPerStripe);
- if (rf_AcquireStripeLock(raidPtr->lockTable,
- asm_p->stripeID, &asm_p->lockReqDesc)) {
- suspended = RF_TRUE;
- break;
- }
- }
- if (desc->type == RF_IO_TYPE_WRITE &&
- raidPtr->status[asm_p->physInfo->row] == rf_rs_reconstructing) {
- if (!(asm_p->flags & RF_ASM_FLAGS_FORCE_TRIED)) {
- int val;
-
- asm_p->flags |= RF_ASM_FLAGS_FORCE_TRIED;
- /* XXX the cast below is quite
- * bogus!!! XXX GO */
- val = rf_ForceOrBlockRecon(raidPtr, asm_p,
- (void (*) (RF_Raid_t *, void *)) rf_ContinueRaidAccess, desc);
- if (val == 0) {
- asm_p->flags |= RF_ASM_FLAGS_RECON_BLOCKED;
- } else {
- suspended = RF_TRUE;
- break;
- }
- } else {
- if (rf_pssDebug) {
- printf("raid%d: skipping force/block because already done, psid %ld\n",
- desc->raidPtr->raidid,
- (long) asm_p->stripeID);
- }
- }
- } else {
- if (rf_pssDebug) {
- printf("raid%d: skipping force/block because not write or not under recon, psid %ld\n",
- desc->raidPtr->raidid,
- (long) asm_p->stripeID);
- }
- }
- }
-
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer);
-
- if (suspended)
- return (RF_TRUE);
- }
- desc->state++;
- return (RF_FALSE);
-}
-/*
- * the following three states create, execute, and post-process dags
- * the error recovery unit is a single dag.
- * by default, SelectAlgorithm creates an array of dags, one per parity stripe
- * in some tricky cases, multiple dags per stripe are created
- * - dags within a parity stripe are executed sequentially (arbitrary order)
- * - dags for distinct parity stripes are executed concurrently
- *
- * repeat until all dags complete successfully -or- dag selection fails
- *
- * while !done
- * create dag(s) (SelectAlgorithm)
- * if dag
- * execute dag (DispatchDAG)
- * if dag successful
- * done (SUCCESS)
- * else
- * !done (RETRY - start over with new dags)
- * else
- * done (FAIL)
- */
-int
-rf_State_CreateDAG(RF_RaidAccessDesc_t * desc)
-{
- RF_AccTraceEntry_t *tracerec = &desc->tracerec;
- RF_Etimer_t timer;
- RF_DagHeader_t *dag_h;
- int i, selectStatus;
-
- /* generate a dag for the access, and fire it off. When the dag
- * completes, we'll get re-invoked in the next state. */
- RF_ETIMER_START(timer);
- /* SelectAlgorithm returns one or more dags */
- selectStatus = rf_SelectAlgorithm(desc, desc->flags | RF_DAG_SUPPRESS_LOCKS);
- if (rf_printDAGsDebug)
- for (i = 0; i < desc->numStripes; i++)
- rf_PrintDAGList(desc->dagArray[i].dags);
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- /* update time to create all dags */
- tracerec->specific.user.dag_create_us = RF_ETIMER_VAL_US(timer);
-
- desc->status = 0; /* good status */
-
- if (selectStatus) {
- /* failed to create a dag */
- /* this happens when there are too many faults or incomplete
- * dag libraries */
- printf("[Failed to create a DAG]\n");
- RF_PANIC();
- } else {
- /* bind dags to desc */
- for (i = 0; i < desc->numStripes; i++) {
- dag_h = desc->dagArray[i].dags;
- while (dag_h) {
- dag_h->bp = (RF_Buf_t) desc->bp;
- dag_h->tracerec = tracerec;
- dag_h = dag_h->next;
- }
- }
- desc->flags |= RF_DAG_DISPATCH_RETURNED;
- desc->state++; /* next state should be rf_State_ExecuteDAG */
- }
- return RF_FALSE;
-}
-
-
-
-/* the access has an array of dagLists, one dagList per parity stripe.
- * fire the first dag in each parity stripe (dagList).
- * dags within a stripe (dagList) must be executed sequentially
- * - this preserves atomic parity update
- * dags for independents parity groups (stripes) are fired concurrently */
-
-int
-rf_State_ExecuteDAG(RF_RaidAccessDesc_t * desc)
-{
- int i;
- RF_DagHeader_t *dag_h;
- RF_DagList_t *dagArray = desc->dagArray;
-
- /* next state is always rf_State_ProcessDAG important to do this
- * before firing the first dag (it may finish before we leave this
- * routine) */
- desc->state++;
-
- /* sweep dag array, a stripe at a time, firing the first dag in each
- * stripe */
- for (i = 0; i < desc->numStripes; i++) {
- RF_ASSERT(dagArray[i].numDags > 0);
- RF_ASSERT(dagArray[i].numDagsDone == 0);
- RF_ASSERT(dagArray[i].numDagsFired == 0);
- RF_ETIMER_START(dagArray[i].tracerec.timer);
- /* fire first dag in this stripe */
- dag_h = dagArray[i].dags;
- RF_ASSERT(dag_h);
- dagArray[i].numDagsFired++;
- /* XXX Yet another case where we pass in a conflicting
- * function pointer :-( XXX GO */
- rf_DispatchDAG(dag_h, (void (*) (void *)) rf_ContinueDagAccess, &dagArray[i]);
- }
-
- /* the DAG will always call the callback, even if there was no
- * blocking, so we are always suspended in this state */
- return RF_TRUE;
-}
-
-
-
-/* rf_State_ProcessDAG is entered when a dag completes.
- * first, check to all dags in the access have completed
- * if not, fire as many dags as possible */
-
-int
-rf_State_ProcessDAG(RF_RaidAccessDesc_t * desc)
-{
- RF_AccessStripeMapHeader_t *asmh = desc->asmap;
- RF_Raid_t *raidPtr = desc->raidPtr;
- RF_DagHeader_t *dag_h;
- int i, j, done = RF_TRUE;
- RF_DagList_t *dagArray = desc->dagArray;
- RF_Etimer_t timer;
-
- /* check to see if this is the last dag */
- for (i = 0; i < desc->numStripes; i++)
- if (dagArray[i].numDags != dagArray[i].numDagsDone)
- done = RF_FALSE;
-
- if (done) {
- if (desc->status) {
- /* a dag failed, retry */
- RF_ETIMER_START(timer);
- /* free all dags */
- for (i = 0; i < desc->numStripes; i++) {
- rf_FreeDAG(desc->dagArray[i].dags);
- }
- rf_MarkFailuresInASMList(raidPtr, asmh);
- /* back up to rf_State_CreateDAG */
- desc->state = desc->state - 2;
- return RF_FALSE;
- } else {
- /* move on to rf_State_Cleanup */
- desc->state++;
- }
- return RF_FALSE;
- } else {
- /* more dags to execute */
- /* see if any are ready to be fired. if so, fire them */
- /* don't fire the initial dag in a list, it's fired in
- * rf_State_ExecuteDAG */
- for (i = 0; i < desc->numStripes; i++) {
- if ((dagArray[i].numDagsDone < dagArray[i].numDags)
- && (dagArray[i].numDagsDone == dagArray[i].numDagsFired)
- && (dagArray[i].numDagsFired > 0)) {
- RF_ETIMER_START(dagArray[i].tracerec.timer);
- /* fire next dag in this stripe */
- /* first, skip to next dag awaiting execution */
- dag_h = dagArray[i].dags;
- for (j = 0; j < dagArray[i].numDagsDone; j++)
- dag_h = dag_h->next;
- dagArray[i].numDagsFired++;
- /* XXX and again we pass a different function
- * pointer.. GO */
- rf_DispatchDAG(dag_h, (void (*) (void *)) rf_ContinueDagAccess,
- &dagArray[i]);
- }
- }
- return RF_TRUE;
- }
-}
-/* only make it this far if all dags complete successfully */
-int
-rf_State_Cleanup(RF_RaidAccessDesc_t * desc)
-{
- RF_AccTraceEntry_t *tracerec = &desc->tracerec;
- RF_AccessStripeMapHeader_t *asmh = desc->asmap;
- RF_Raid_t *raidPtr = desc->raidPtr;
- RF_AccessStripeMap_t *asm_p;
- RF_DagHeader_t *dag_h;
- RF_Etimer_t timer;
- int i;
-
- desc->state++;
-
- timer = tracerec->timer;
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- tracerec->specific.user.dag_retry_us = RF_ETIMER_VAL_US(timer);
-
- /* the RAID I/O is complete. Clean up. */
- tracerec->specific.user.dag_retry_us = 0;
-
- RF_ETIMER_START(timer);
- if (desc->flags & RF_DAG_RETURN_DAG) {
- /* copy dags into paramDAG */
- *(desc->paramDAG) = desc->dagArray[0].dags;
- dag_h = *(desc->paramDAG);
- for (i = 1; i < desc->numStripes; i++) {
- /* concatenate dags from remaining stripes */
- RF_ASSERT(dag_h);
- while (dag_h->next)
- dag_h = dag_h->next;
- dag_h->next = desc->dagArray[i].dags;
- }
- } else {
- /* free all dags */
- for (i = 0; i < desc->numStripes; i++) {
- rf_FreeDAG(desc->dagArray[i].dags);
- }
- }
-
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- tracerec->specific.user.cleanup_us = RF_ETIMER_VAL_US(timer);
-
- RF_ETIMER_START(timer);
- if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) {
- for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) {
- if (!rf_suppressLocksAndLargeWrites &&
- asm_p->parityInfo &&
- !(desc->flags & RF_DAG_SUPPRESS_LOCKS)) {
- RF_ASSERT_VALID_LOCKREQ(&asm_p->lockReqDesc);
- rf_ReleaseStripeLock(raidPtr->lockTable,
- asm_p->stripeID,
- &asm_p->lockReqDesc);
- }
- if (asm_p->flags & RF_ASM_FLAGS_RECON_BLOCKED) {
- rf_UnblockRecon(raidPtr, asm_p);
- }
- }
- }
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer);
-
- RF_ETIMER_START(timer);
- if (desc->flags & RF_DAG_RETURN_ASM)
- *(desc->paramASM) = asmh;
- else
- rf_FreeAccessStripeMap(asmh);
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- tracerec->specific.user.cleanup_us += RF_ETIMER_VAL_US(timer);
-
- RF_ETIMER_STOP(desc->timer);
- RF_ETIMER_EVAL(desc->timer);
-
- timer = desc->tracerec.tot_timer;
- RF_ETIMER_STOP(timer);
- RF_ETIMER_EVAL(timer);
- desc->tracerec.total_us = RF_ETIMER_VAL_US(timer);
-
- rf_LogTraceRec(raidPtr, tracerec);
-
- desc->flags |= RF_DAG_ACCESS_COMPLETE;
-
- return RF_FALSE;
-}
diff --git a/sys/dev/raidframe/rf_states.h b/sys/dev/raidframe/rf_states.h
deleted file mode 100644
index 6c0aee4..0000000
--- a/sys/dev/raidframe/rf_states.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_states.h,v 1.3 1999/02/05 00:06:17 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland, William V. Courtright II, Robby Findler
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#ifndef _RF__RF_STATES_H_
-#define _RF__RF_STATES_H_
-
-#include <dev/raidframe/rf_types.h>
-
-void rf_ContinueRaidAccess(RF_RaidAccessDesc_t * desc);
-void rf_ContinueDagAccess(RF_DagList_t * dagList);
-int rf_State_LastState(RF_RaidAccessDesc_t * desc);
-int rf_State_IncrAccessCount(RF_RaidAccessDesc_t * desc);
-int rf_State_DecrAccessCount(RF_RaidAccessDesc_t * desc);
-int rf_State_Quiesce(RF_RaidAccessDesc_t * desc);
-int rf_State_Map(RF_RaidAccessDesc_t * desc);
-int rf_State_Lock(RF_RaidAccessDesc_t * desc);
-int rf_State_CreateDAG(RF_RaidAccessDesc_t * desc);
-int rf_State_ExecuteDAG(RF_RaidAccessDesc_t * desc);
-int rf_State_ProcessDAG(RF_RaidAccessDesc_t * desc);
-int rf_State_Cleanup(RF_RaidAccessDesc_t * desc);
-
-#endif /* !_RF__RF_STATES_H_ */
diff --git a/sys/dev/raidframe/rf_stripelocks.c b/sys/dev/raidframe/rf_stripelocks.c
deleted file mode 100644
index 409c0f4..0000000
--- a/sys/dev/raidframe/rf_stripelocks.c
+++ /dev/null
@@ -1,669 +0,0 @@
-/* $NetBSD: rf_stripelocks.c,v 1.6 2000/12/04 11:35:46 fvdl Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Authors: Mark Holland, Jim Zelenka
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*
- * stripelocks.c -- code to lock stripes for read and write access
- *
- * The code distinguishes between read locks and write locks. There can be
- * as many readers to given stripe as desired. When a write request comes
- * in, no further readers are allowed to enter, and all subsequent requests
- * are queued in FIFO order. When a the number of readers goes to zero, the
- * writer is given the lock. When a writer releases the lock, the list of
- * queued requests is scanned, and all readersq up to the next writer are
- * given the lock.
- *
- * The lock table size must be one less than a power of two, but HASH_STRIPEID
- * is the only function that requires this.
- *
- * The code now supports "range locks". When you ask to lock a stripe, you
- * specify a range of addresses in that stripe that you want to lock. When
- * you acquire the lock, you've locked only this range of addresses, and
- * other threads can concurrently read/write any non-overlapping portions
- * of the stripe. The "addresses" that you lock are abstract in that you
- * can pass in anything you like. The expectation is that you'll pass in
- * the range of physical disk offsets of the parity bits you're planning
- * to update. The idea behind this, of course, is to allow sub-stripe
- * locking. The implementation is perhaps not the best imaginable; in the
- * worst case a lock release is O(n^2) in the total number of outstanding
- * requests to a given stripe. Note that if you're striping with a
- * stripe unit size equal to an entire disk (i.e. not striping), there will
- * be only one stripe and you may spend some significant number of cycles
- * searching through stripe lock descriptors.
- */
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_raid.h>
-#include <dev/raidframe/rf_stripelocks.h>
-#include <dev/raidframe/rf_alloclist.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_freelist.h>
-#include <dev/raidframe/rf_debugprint.h>
-#include <dev/raidframe/rf_driver.h>
-#include <dev/raidframe/rf_shutdown.h>
-
-#define Dprintf1(s,a) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL)
-#define Dprintf2(s,a,b) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL)
-#define Dprintf3(s,a,b,c) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL)
-#define Dprintf4(s,a,b,c,d) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),NULL,NULL,NULL,NULL)
-#define Dprintf5(s,a,b,c,d,e) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),NULL,NULL,NULL)
-#define Dprintf6(s,a,b,c,d,e,f) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),NULL,NULL)
-#define Dprintf7(s,a,b,c,d,e,f,g) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),(void *)((unsigned long)g),NULL)
-#define Dprintf8(s,a,b,c,d,e,f,g,h) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),(void *)((unsigned long)g),(void *)((unsigned long)h))
-
-#define FLUSH
-
-#define HASH_STRIPEID(_sid_) ( (_sid_) & (rf_lockTableSize-1) )
-
-static void AddToWaitersQueue(RF_LockTableEntry_t * lockTable, RF_StripeLockDesc_t * lockDesc, RF_LockReqDesc_t * lockReqDesc);
-static RF_StripeLockDesc_t *AllocStripeLockDesc(RF_StripeNum_t stripeID);
-static void FreeStripeLockDesc(RF_StripeLockDesc_t * p);
-static void PrintLockedStripes(RF_LockTableEntry_t * lockTable);
-
-/* determines if two ranges overlap. always yields false if either start value is negative */
-#define SINGLE_RANGE_OVERLAP(_strt1, _stop1, _strt2, _stop2) \
- ( (_strt1 >= 0) && (_strt2 >= 0) && (RF_MAX(_strt1, _strt2) <= RF_MIN(_stop1, _stop2)) )
-
-/* determines if any of the ranges specified in the two lock descriptors overlap each other */
-#define RANGE_OVERLAP(_cand, _pred) \
- ( SINGLE_RANGE_OVERLAP((_cand)->start, (_cand)->stop, (_pred)->start, (_pred)->stop ) || \
- SINGLE_RANGE_OVERLAP((_cand)->start2, (_cand)->stop2, (_pred)->start, (_pred)->stop ) || \
- SINGLE_RANGE_OVERLAP((_cand)->start, (_cand)->stop, (_pred)->start2, (_pred)->stop2) || \
- SINGLE_RANGE_OVERLAP((_cand)->start2, (_cand)->stop2, (_pred)->start2, (_pred)->stop2) )
-
-/* Determines if a candidate lock request conflicts with a predecessor lock req.
- * Note that the arguments are not interchangeable.
- * The rules are:
- * a candidate read conflicts with a predecessor write if any ranges overlap
- * a candidate write conflicts with a predecessor read if any ranges overlap
- * a candidate write conflicts with a predecessor write if any ranges overlap
- */
-#define STRIPELOCK_CONFLICT(_cand, _pred) \
- RANGE_OVERLAP((_cand), (_pred)) && \
- ( ( (((_cand)->type == RF_IO_TYPE_READ) && ((_pred)->type == RF_IO_TYPE_WRITE)) || \
- (((_cand)->type == RF_IO_TYPE_WRITE) && ((_pred)->type == RF_IO_TYPE_READ)) || \
- (((_cand)->type == RF_IO_TYPE_WRITE) && ((_pred)->type == RF_IO_TYPE_WRITE)) \
- ) \
- )
-
-static RF_FreeList_t *rf_stripelock_freelist;
-#define RF_MAX_FREE_STRIPELOCK 128
-#define RF_STRIPELOCK_INC 8
-#define RF_STRIPELOCK_INITIAL 32
-
-static void rf_ShutdownStripeLockFreeList(void *);
-static void rf_RaidShutdownStripeLocks(void *);
-
-static void
-rf_ShutdownStripeLockFreeList(ignored)
- void *ignored;
-{
- RF_FREELIST_DESTROY(rf_stripelock_freelist, next, (RF_StripeLockDesc_t *));
-}
-
-int
-rf_ConfigureStripeLockFreeList(listp)
- RF_ShutdownList_t **listp;
-{
- unsigned mask;
- int rc;
-
- RF_FREELIST_CREATE(rf_stripelock_freelist, RF_MAX_FREE_STRIPELOCK,
- RF_STRIPELOCK_INITIAL, sizeof(RF_StripeLockDesc_t));
- rc = rf_ShutdownCreate(listp, rf_ShutdownStripeLockFreeList, NULL);
- if (rc) {
- RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n",
- __FILE__, __LINE__, rc);
- rf_ShutdownStripeLockFreeList(NULL);
- return (rc);
- }
- RF_FREELIST_PRIME(rf_stripelock_freelist, RF_STRIPELOCK_INITIAL, next,
- (RF_StripeLockDesc_t *));
- for (mask = 0x1; mask; mask <<= 1)
- if (rf_lockTableSize == mask)
- break;
- if (!mask) {
- printf("[WARNING: lock table size must be a power of two. Setting to %d.]\n", RF_DEFAULT_LOCK_TABLE_SIZE);
- rf_lockTableSize = RF_DEFAULT_LOCK_TABLE_SIZE;
- }
- return (0);
-}
-
-RF_LockTableEntry_t *
-rf_MakeLockTable()
-{
- RF_LockTableEntry_t *lockTable;
- int i, rc;
-
- RF_Calloc(lockTable, ((int) rf_lockTableSize), sizeof(RF_LockTableEntry_t), (RF_LockTableEntry_t *));
- if (lockTable == NULL)
- return (NULL);
- for (i = 0; i < rf_lockTableSize; i++) {
- rc = rf_mutex_init(&lockTable[i].mutex, __FUNCTION__);
- if (rc) {
- RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
- __LINE__, rc);
- /* XXX clean up other mutexes */
- return (NULL);
- }
- }
- return (lockTable);
-}
-
-void
-rf_ShutdownStripeLocks(RF_LockTableEntry_t * lockTable)
-{
- int i;
-
- if (rf_stripeLockDebug) {
- PrintLockedStripes(lockTable);
- }
- for (i = 0; i < rf_lockTableSize; i++) {
- rf_mutex_destroy(&lockTable[i].mutex);
- }
- RF_Free(lockTable, rf_lockTableSize * sizeof(RF_LockTableEntry_t));
-}
-
-static void
-rf_RaidShutdownStripeLocks(arg)
- void *arg;
-{
- RF_Raid_t *raidPtr = (RF_Raid_t *) arg;
- rf_ShutdownStripeLocks(raidPtr->lockTable);
-}
-
-int
-rf_ConfigureStripeLocks(
- RF_ShutdownList_t ** listp,
- RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr)
-{
- int rc;
-
- raidPtr->lockTable = rf_MakeLockTable();
- if (raidPtr->lockTable == NULL)
- return (ENOMEM);
- rc = rf_ShutdownCreate(listp, rf_RaidShutdownStripeLocks, raidPtr);
- if (rc) {
- RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n",
- __FILE__, __LINE__, rc);
- rf_ShutdownStripeLocks(raidPtr->lockTable);
- return (rc);
- }
- return (0);
-}
-/* returns 0 if you've got the lock, and non-zero if you have to wait.
- * if and only if you have to wait, we'll cause cbFunc to get invoked
- * with cbArg when you are granted the lock. We store a tag in *releaseTag
- * that you need to give back to us when you release the lock.
- */
-int
-rf_AcquireStripeLock(
- RF_LockTableEntry_t * lockTable,
- RF_StripeNum_t stripeID,
- RF_LockReqDesc_t * lockReqDesc)
-{
- RF_StripeLockDesc_t *lockDesc;
- RF_LockReqDesc_t *p;
- int tid = 0, hashval = HASH_STRIPEID(stripeID);
- int retcode = 0;
-
- RF_ASSERT(RF_IO_IS_R_OR_W(lockReqDesc->type));
-
- if (rf_stripeLockDebug) {
- if (stripeID == -1)
- Dprintf1("[%d] Lock acquisition supressed (stripeID == -1)\n", tid);
- else {
- Dprintf8("[%d] Trying to acquire stripe lock table 0x%lx SID %ld type %c range %ld-%ld, range2 %ld-%ld hashval %d\n",
- tid, (unsigned long) lockTable, stripeID, lockReqDesc->type, lockReqDesc->start,
- lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2);
- Dprintf3("[%d] lock %ld hashval %d\n", tid, stripeID, hashval);
- FLUSH;
- }
- }
- if (stripeID == -1)
- return (0);
- lockReqDesc->next = NULL; /* just to be sure */
-
- RF_LOCK_MUTEX(lockTable[hashval].mutex);
- for (lockDesc = lockTable[hashval].descList; lockDesc; lockDesc = lockDesc->next) {
- if (lockDesc->stripeID == stripeID)
- break;
- }
-
- if (!lockDesc) { /* no entry in table => no one reading or
- * writing */
- lockDesc = AllocStripeLockDesc(stripeID);
- lockDesc->next = lockTable[hashval].descList;
- lockTable[hashval].descList = lockDesc;
- if (lockReqDesc->type == RF_IO_TYPE_WRITE)
- lockDesc->nWriters++;
- lockDesc->granted = lockReqDesc;
- if (rf_stripeLockDebug) {
- Dprintf7("[%d] no one waiting: lock %ld %c %ld-%ld %ld-%ld granted\n",
- tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2);
- FLUSH;
- }
- } else {
-
- if (lockReqDesc->type == RF_IO_TYPE_WRITE)
- lockDesc->nWriters++;
-
- if (lockDesc->nWriters == 0) { /* no need to search any lists
- * if there are no writers
- * anywhere */
- lockReqDesc->next = lockDesc->granted;
- lockDesc->granted = lockReqDesc;
- if (rf_stripeLockDebug) {
- Dprintf7("[%d] no writers: lock %ld %c %ld-%ld %ld-%ld granted\n",
- tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2);
- FLUSH;
- }
- } else {
-
- /* search the granted & waiting lists for a conflict.
- * stop searching as soon as we find one */
- retcode = 0;
- for (p = lockDesc->granted; p; p = p->next)
- if (STRIPELOCK_CONFLICT(lockReqDesc, p)) {
- retcode = 1;
- break;
- }
- if (!retcode)
- for (p = lockDesc->waitersH; p; p = p->next)
- if (STRIPELOCK_CONFLICT(lockReqDesc, p)) {
- retcode = 2;
- break;
- }
- if (!retcode) {
- lockReqDesc->next = lockDesc->granted; /* no conflicts found =>
- * grant lock */
- lockDesc->granted = lockReqDesc;
- if (rf_stripeLockDebug) {
- Dprintf7("[%d] no conflicts: lock %ld %c %ld-%ld %ld-%ld granted\n",
- tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop,
- lockReqDesc->start2, lockReqDesc->stop2);
- FLUSH;
- }
- } else {
- if (rf_stripeLockDebug) {
- Dprintf6("[%d] conflict: lock %ld %c %ld-%ld hashval=%d not granted\n",
- tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop,
- hashval);
- Dprintf3("[%d] lock %ld retcode=%d\n", tid, stripeID, retcode);
- FLUSH;
- }
- AddToWaitersQueue(lockTable, lockDesc, lockReqDesc); /* conflict => the
- * current access must
- * wait */
- }
- }
- }
-
- RF_UNLOCK_MUTEX(lockTable[hashval].mutex);
- return (retcode);
-}
-
-void
-rf_ReleaseStripeLock(
- RF_LockTableEntry_t * lockTable,
- RF_StripeNum_t stripeID,
- RF_LockReqDesc_t * lockReqDesc)
-{
- RF_StripeLockDesc_t *lockDesc, *ld_t;
- RF_LockReqDesc_t *lr, *lr_t, *callbacklist, *t;
- RF_IoType_t type = lockReqDesc->type;
- int tid = 0, hashval = HASH_STRIPEID(stripeID);
- int release_it, consider_it;
- RF_LockReqDesc_t *candidate, *candidate_t, *predecessor;
-
- RF_ASSERT(RF_IO_IS_R_OR_W(type));
-
- if (rf_stripeLockDebug) {
- if (stripeID == -1)
- Dprintf1("[%d] Lock release supressed (stripeID == -1)\n", tid);
- else {
- Dprintf8("[%d] Releasing stripe lock on stripe ID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n",
- tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2, lockTable);
- FLUSH;
- }
- }
- if (stripeID == -1)
- return;
-
- RF_LOCK_MUTEX(lockTable[hashval].mutex);
-
- /* find the stripe lock descriptor */
- for (ld_t = NULL, lockDesc = lockTable[hashval].descList; lockDesc; ld_t = lockDesc, lockDesc = lockDesc->next) {
- if (lockDesc->stripeID == stripeID)
- break;
- }
- RF_ASSERT(lockDesc); /* major error to release a lock that doesn't
- * exist */
-
- /* find the stripe lock request descriptor & delete it from the list */
- for (lr_t = NULL, lr = lockDesc->granted; lr; lr_t = lr, lr = lr->next)
- if (lr == lockReqDesc)
- break;
-
- RF_ASSERT(lr && (lr == lockReqDesc)); /* major error to release a
- * lock that hasn't been
- * granted */
- if (lr_t)
- lr_t->next = lr->next;
- else {
- RF_ASSERT(lr == lockDesc->granted);
- lockDesc->granted = lr->next;
- }
- lr->next = NULL;
-
- if (lockReqDesc->type == RF_IO_TYPE_WRITE)
- lockDesc->nWriters--;
-
- /* search through the waiters list to see if anyone needs to be woken
- * up. for each such descriptor in the wait list, we check it against
- * everything granted and against everything _in front_ of it in the
- * waiters queue. If it conflicts with none of these, we release it.
- *
- * DON'T TOUCH THE TEMPLINK POINTER OF ANYTHING IN THE GRANTED LIST HERE.
- * This will roach the case where the callback tries to acquire a new
- * lock in the same stripe. There are some asserts to try and detect
- * this.
- *
- * We apply 2 performance optimizations: (1) if releasing this lock
- * results in no more writers to this stripe, we just release
- * everybody waiting, since we place no restrictions on the number of
- * concurrent reads. (2) we consider as candidates for wakeup only
- * those waiters that have a range overlap with either the descriptor
- * being woken up or with something in the callbacklist (i.e.
- * something we've just now woken up). This allows us to avoid the
- * long evaluation for some descriptors. */
-
- callbacklist = NULL;
- if (lockDesc->nWriters == 0) { /* performance tweak (1) */
- while (lockDesc->waitersH) {
-
- lr = lockDesc->waitersH; /* delete from waiters
- * list */
- lockDesc->waitersH = lr->next;
-
- RF_ASSERT(lr->type == RF_IO_TYPE_READ);
-
- lr->next = lockDesc->granted; /* add to granted list */
- lockDesc->granted = lr;
-
- RF_ASSERT(!lr->templink);
- lr->templink = callbacklist; /* put on callback list
- * so that we'll invoke
- * callback below */
- callbacklist = lr;
- if (rf_stripeLockDebug) {
- Dprintf8("[%d] No writers: granting lock stripe ID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n",
- tid, stripeID, lr->type, lr->start, lr->stop, lr->start2, lr->stop2, (unsigned long) lockTable);
- FLUSH;
- }
- }
- lockDesc->waitersT = NULL; /* we've purged the whole
- * waiters list */
-
- } else
- for (candidate_t = NULL, candidate = lockDesc->waitersH; candidate;) {
-
- /* performance tweak (2) */
- consider_it = 0;
- if (RANGE_OVERLAP(lockReqDesc, candidate))
- consider_it = 1;
- else
- for (t = callbacklist; t; t = t->templink)
- if (RANGE_OVERLAP(t, candidate)) {
- consider_it = 1;
- break;
- }
- if (!consider_it) {
- if (rf_stripeLockDebug) {
- Dprintf8("[%d] No overlap: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n",
- tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2,
- (unsigned long) lockTable);
- FLUSH;
- }
- candidate_t = candidate;
- candidate = candidate->next;
- continue;
- }
- /* we have a candidate for release. check to make
- * sure it is not blocked by any granted locks */
- release_it = 1;
- for (predecessor = lockDesc->granted; predecessor; predecessor = predecessor->next) {
- if (STRIPELOCK_CONFLICT(candidate, predecessor)) {
- if (rf_stripeLockDebug) {
- Dprintf8("[%d] Conflicts with granted lock: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n",
- tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2,
- (unsigned long) lockTable);
- FLUSH;
- }
- release_it = 0;
- break;
- }
- }
-
- /* now check to see if the candidate is blocked by any
- * waiters that occur before it it the wait queue */
- if (release_it)
- for (predecessor = lockDesc->waitersH; predecessor != candidate; predecessor = predecessor->next) {
- if (STRIPELOCK_CONFLICT(candidate, predecessor)) {
- if (rf_stripeLockDebug) {
- Dprintf8("[%d] Conflicts with waiting lock: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n",
- tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2,
- (unsigned long) lockTable);
- FLUSH;
- }
- release_it = 0;
- break;
- }
- }
-
- /* release it if indicated */
- if (release_it) {
- if (rf_stripeLockDebug) {
- Dprintf8("[%d] Granting lock to candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n",
- tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2,
- (unsigned long) lockTable);
- FLUSH;
- }
- if (candidate_t) {
- candidate_t->next = candidate->next;
- if (lockDesc->waitersT == candidate)
- lockDesc->waitersT = candidate_t; /* cannot be waitersH
- * since candidate_t is
- * not NULL */
- } else {
- RF_ASSERT(candidate == lockDesc->waitersH);
- lockDesc->waitersH = lockDesc->waitersH->next;
- if (!lockDesc->waitersH)
- lockDesc->waitersT = NULL;
- }
- candidate->next = lockDesc->granted; /* move it to the
- * granted list */
- lockDesc->granted = candidate;
-
- RF_ASSERT(!candidate->templink);
- candidate->templink = callbacklist; /* put it on the list of
- * things to be called
- * after we release the
- * mutex */
- callbacklist = candidate;
-
- if (!candidate_t)
- candidate = lockDesc->waitersH;
- else
- candidate = candidate_t->next; /* continue with the
- * rest of the list */
- } else {
- candidate_t = candidate;
- candidate = candidate->next; /* continue with the
- * rest of the list */
- }
- }
-
- /* delete the descriptor if no one is waiting or active */
- if (!lockDesc->granted && !lockDesc->waitersH) {
- RF_ASSERT(lockDesc->nWriters == 0);
- if (rf_stripeLockDebug) {
- Dprintf3("[%d] Last lock released (table 0x%lx): deleting desc for stripeID %ld\n", tid, (unsigned long) lockTable, stripeID);
- FLUSH;
- }
- if (ld_t)
- ld_t->next = lockDesc->next;
- else {
- RF_ASSERT(lockDesc == lockTable[hashval].descList);
- lockTable[hashval].descList = lockDesc->next;
- }
- FreeStripeLockDesc(lockDesc);
- lockDesc = NULL;/* only for the ASSERT below */
- }
- RF_UNLOCK_MUTEX(lockTable[hashval].mutex);
-
- /* now that we've unlocked the mutex, invoke the callback on all the
- * descriptors in the list */
- RF_ASSERT(!((callbacklist) && (!lockDesc))); /* if we deleted the
- * descriptor, we should
- * have no callbacks to
- * do */
- for (candidate = callbacklist; candidate;) {
- t = candidate;
- candidate = candidate->templink;
- t->templink = NULL;
- (t->cbFunc) (t->cbArg);
- }
-}
-/* must have the indicated lock table mutex upon entry */
-static void
-AddToWaitersQueue(
- RF_LockTableEntry_t * lockTable,
- RF_StripeLockDesc_t * lockDesc,
- RF_LockReqDesc_t * lockReqDesc)
-{
-#if 0 /* XXX fvdl -- unitialized use of 'tid' */
- int tid;
-
- if (rf_stripeLockDebug) {
- Dprintf3("[%d] Waiting on lock for stripe %ld table 0x%lx\n", tid, lockDesc->stripeID, (unsigned long) lockTable);
- FLUSH;
- }
-#endif
- if (!lockDesc->waitersH) {
- lockDesc->waitersH = lockDesc->waitersT = lockReqDesc;
- } else {
- lockDesc->waitersT->next = lockReqDesc;
- lockDesc->waitersT = lockReqDesc;
- }
-}
-
-static RF_StripeLockDesc_t *
-AllocStripeLockDesc(RF_StripeNum_t stripeID)
-{
- RF_StripeLockDesc_t *p;
-
- RF_FREELIST_GET(rf_stripelock_freelist, p, next, (RF_StripeLockDesc_t *));
- if (p) {
- p->stripeID = stripeID;
- }
- return (p);
-}
-
-static void
-FreeStripeLockDesc(RF_StripeLockDesc_t * p)
-{
- RF_FREELIST_FREE(rf_stripelock_freelist, p, next);
-}
-
-static void
-PrintLockedStripes(lockTable)
- RF_LockTableEntry_t *lockTable;
-{
- int i, j, foundone = 0, did;
- RF_StripeLockDesc_t *p;
- RF_LockReqDesc_t *q;
-
- RF_LOCK_MUTEX(rf_printf_mutex);
- printf("Locked stripes:\n");
- for (i = 0; i < rf_lockTableSize; i++)
- if (lockTable[i].descList) {
- foundone = 1;
- for (p = lockTable[i].descList; p; p = p->next) {
- printf("Stripe ID 0x%lx (%d) nWriters %d\n",
- (long) p->stripeID, (int) p->stripeID, p->nWriters);
-
- if (!(p->granted))
- printf("Granted: (none)\n");
- else
- printf("Granted:\n");
- for (did = 1, j = 0, q = p->granted; q; j++, q = q->next) {
- printf(" %c(%ld-%ld", q->type, (long) q->start, (long) q->stop);
- if (q->start2 != -1)
- printf(",%ld-%ld) ", (long) q->start2,
- (long) q->stop2);
- else
- printf(") ");
- if (j && !(j % 4)) {
- printf("\n");
- did = 1;
- } else
- did = 0;
- }
- if (!did)
- printf("\n");
-
- if (!(p->waitersH))
- printf("Waiting: (none)\n");
- else
- printf("Waiting:\n");
- for (did = 1, j = 0, q = p->waitersH; q; j++, q = q->next) {
- printf("%c(%ld-%ld", q->type, (long) q->start, (long) q->stop);
- if (q->start2 != -1)
- printf(",%ld-%ld) ", (long) q->start2, (long) q->stop2);
- else
- printf(") ");
- if (j && !(j % 4)) {
- printf("\n ");
- did = 1;
- } else
- did = 0;
- }
- if (!did)
- printf("\n");
- }
- }
- if (!foundone)
- printf("(none)\n");
- else
- printf("\n");
- RF_UNLOCK_MUTEX(rf_printf_mutex);
-}
diff --git a/sys/dev/raidframe/rf_stripelocks.h b/sys/dev/raidframe/rf_stripelocks.h
deleted file mode 100644
index ab960c1..0000000
--- a/sys/dev/raidframe/rf_stripelocks.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_stripelocks.h,v 1.3 1999/02/05 00:06:18 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*****************************************************************************
- *
- * stripelocks.h -- header file for locking stripes
- *
- * Note that these functions are called from the execution routines of certain
- * DAG Nodes, and so they must be NON-BLOCKING to assure maximum parallelism
- * in the DAG. Accordingly, when a node wants to acquire a lock, it calls
- * AcquireStripeLock, supplying a pointer to a callback function. If the lock
- * is free at the time of the call, 0 is returned, indicating that the lock
- * has been acquired. If the lock is not free, 1 is returned, and a copy of
- * the function pointer and argument are held in the lock table. When the
- * lock becomes free, the callback function is invoked.
- *
- *****************************************************************************/
-
-#ifndef _RF__RF_STRIPELOCKS_H_
-#define _RF__RF_STRIPELOCKS_H_
-
-#if defined(__FreeBSD__)
-#include <sys/types.h>
-#if __FreeBSD_version > 500005
-#include <sys/bio.h>
-#endif
-#if _KERNEL
-#include <sys/systm.h>
-#endif
-#endif
-#include <sys/buf.h>
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_threadstuff.h>
-#include <dev/raidframe/rf_general.h>
-
-struct RF_LockReqDesc_s {
- RF_IoType_t type; /* read or write */
- RF_int64 start, stop; /* start and end of range to be locked */
- RF_int64 start2, stop2; /* start and end of 2nd range to be locked */
- void (*cbFunc) (RF_Buf_t); /* callback function */
- void *cbArg; /* argument to callback function */
- RF_LockReqDesc_t *next; /* next element in chain */
- RF_LockReqDesc_t *templink; /* for making short-lived lists of
- * request descriptors */
-};
-#define RF_ASSERT_VALID_LOCKREQ(_lr_) { \
- RF_ASSERT(RF_IO_IS_R_OR_W((_lr_)->type)); \
-}
-
-struct RF_StripeLockDesc_s {
- RF_StripeNum_t stripeID;/* the stripe ID */
- RF_LockReqDesc_t *granted; /* unordered list of granted requests */
- RF_LockReqDesc_t *waitersH; /* FIFO queue of all waiting reqs,
- * both read and write (Head and Tail) */
- RF_LockReqDesc_t *waitersT;
- int nWriters; /* number of writers either granted or waiting */
- RF_StripeLockDesc_t *next; /* for hash table collision resolution */
-};
-
-struct RF_LockTableEntry_s {
- RF_DECLARE_MUTEX(mutex) /* mutex on this hash chain */
- RF_StripeLockDesc_t *descList; /* hash chain of lock descriptors */
-};
-/*
- * Initializes a stripe lock descriptor. _defSize is the number of sectors
- * that we lock when there is no parity information in the ASM (e.g. RAID0).
- */
-
-#define RF_INIT_LOCK_REQ_DESC(_lrd, _typ, _cbf, _cba, _asm, _defSize) \
- { \
- (_lrd).type = _typ; \
- (_lrd).start2 = -1; \
- (_lrd).stop2 = -1; \
- if ((_asm)->parityInfo) { \
- (_lrd).start = (_asm)->parityInfo->startSector; \
- (_lrd).stop = (_asm)->parityInfo->startSector + (_asm)->parityInfo->numSector-1; \
- if ((_asm)->parityInfo->next) { \
- (_lrd).start2 = (_asm)->parityInfo->next->startSector; \
- (_lrd).stop2 = (_asm)->parityInfo->next->startSector + (_asm)->parityInfo->next->numSector-1; \
- } \
- } else { \
- (_lrd).start = 0; \
- (_lrd).stop = (_defSize); \
- } \
- (_lrd).templink= NULL; \
- (_lrd).cbFunc = (_cbf); \
- (_lrd).cbArg = (void *) (_cba); \
- }
-
-int rf_ConfigureStripeLockFreeList(RF_ShutdownList_t ** listp);
-RF_LockTableEntry_t *rf_MakeLockTable(void);
-void rf_ShutdownStripeLocks(RF_LockTableEntry_t * lockTable);
-int
-rf_ConfigureStripeLocks(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr,
- RF_Config_t * cfgPtr);
-int
-rf_AcquireStripeLock(RF_LockTableEntry_t * lockTable,
- RF_StripeNum_t stripeID, RF_LockReqDesc_t * lockReqDesc);
-void
-rf_ReleaseStripeLock(RF_LockTableEntry_t * lockTable,
- RF_StripeNum_t stripeID, RF_LockReqDesc_t * lockReqDesc);
-
-#endif /* !_RF__RF_STRIPELOCKS_H_ */
diff --git a/sys/dev/raidframe/rf_strutils.c b/sys/dev/raidframe/rf_strutils.c
deleted file mode 100644
index d434f0a..0000000
--- a/sys/dev/raidframe/rf_strutils.c
+++ /dev/null
@@ -1,58 +0,0 @@
-/* $NetBSD: rf_strutils.c,v 1.3 1999/02/05 00:06:18 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * rf_strutils.c
- *
- * String-parsing funcs
- */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-/*
- * rf_strutils.c -- some simple utilities for munging on strings.
- * I put them in a file by themselves because they're needed in
- * setconfig, in the user-level driver, and in the kernel.
- *
- */
-
-#include <dev/raidframe/rf_utils.h>
-
-/* finds a non-white character in the line */
-char *
-rf_find_non_white(char *p)
-{
- for (; *p != '\0' && (*p == ' ' || *p == '\t'); p++);
- return (p);
-}
-/* finds a white character in the line */
-char *
-rf_find_white(char *p)
-{
- for (; *p != '\0' && (*p != ' ' && *p != '\t'); p++);
- return (p);
-}
diff --git a/sys/dev/raidframe/rf_threadstuff.c b/sys/dev/raidframe/rf_threadstuff.c
deleted file mode 100644
index 657ffee..0000000
--- a/sys/dev/raidframe/rf_threadstuff.c
+++ /dev/null
@@ -1,223 +0,0 @@
-/* $NetBSD: rf_threadstuff.c,v 1.5 1999/12/07 02:13:28 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * rf_threadstuff.c
- */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Jim Zelenka
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_threadstuff.h>
-#include <dev/raidframe/rf_general.h>
-#include <dev/raidframe/rf_shutdown.h>
-
-static void mutex_destroyer(void *);
-static void cond_destroyer(void *);
-
-/*
- * Shared stuff
- */
-
-static void
-mutex_destroyer(arg)
- void *arg;
-{
- int rc;
-
- rc = rf_mutex_destroy(arg);
- if (rc) {
- RF_ERRORMSG1("RAIDFRAME: Error %d auto-destroying mutex\n", rc);
- }
-}
-
-static void
-cond_destroyer(arg)
- void *arg;
-{
- int rc;
-
- rc = rf_cond_destroy(arg);
- if (rc) {
- RF_ERRORMSG1("RAIDFRAME: Error %d auto-destroying condition\n", rc);
- }
-}
-
-int
-_rf_create_managed_mutex(listp, m, file, line)
- RF_ShutdownList_t **listp;
-RF_DECLARE_MUTEX(*m)
- char *file;
- int line;
-{
- int rc, rc1;
-
- rc = rf_mutex_init(m, __FUNCTION__);
- if (rc)
- return (rc);
- rc = _rf_ShutdownCreate(listp, mutex_destroyer, (void *) m, file, line);
- if (rc) {
- RF_ERRORMSG1("RAIDFRAME: Error %d adding shutdown entry\n", rc);
- rc1 = rf_mutex_destroy(m);
- if (rc1) {
- RF_ERRORMSG1("RAIDFRAME: Error %d destroying mutex\n", rc1);
- }
- }
- return (rc);
-}
-
-int
-_rf_create_managed_cond(listp, c, file, line)
- RF_ShutdownList_t **listp;
-RF_DECLARE_COND(*c)
- char *file;
- int line;
-{
- int rc, rc1;
-
- rc = rf_cond_init(c);
- if (rc)
- return (rc);
- rc = _rf_ShutdownCreate(listp, cond_destroyer, (void *) c, file, line);
- if (rc) {
- RF_ERRORMSG1("RAIDFRAME: Error %d adding shutdown entry\n", rc);
- rc1 = rf_cond_destroy(c);
- if (rc1) {
- RF_ERRORMSG1("RAIDFRAME: Error %d destroying cond\n", rc1);
- }
- }
- return (rc);
-}
-
-int
-_rf_init_managed_threadgroup(listp, g, file, line)
- RF_ShutdownList_t **listp;
- RF_ThreadGroup_t *g;
- char *file;
- int line;
-{
- int rc;
-
- rc = _rf_create_managed_mutex(listp, &g->mutex, file, line);
- if (rc)
- return (rc);
- rc = _rf_create_managed_cond(listp, &g->cond, file, line);
- if (rc)
- return (rc);
- g->created = g->running = g->shutdown = 0;
- return (0);
-}
-
-int
-_rf_destroy_threadgroup(g, file, line)
- RF_ThreadGroup_t *g;
- char *file;
- int line;
-{
- int rc1, rc2;
-
- rc1 = rf_mutex_destroy(&g->mutex);
- rc2 = rf_cond_destroy(&g->cond);
- if (rc1)
- return (rc1);
- return (rc2);
-}
-
-int
-_rf_init_threadgroup(g, file, line)
- RF_ThreadGroup_t *g;
- char *file;
- int line;
-{
- int rc;
-
- rc = rf_mutex_init(&g->mutex, __FUNCTION__);
- if (rc)
- return (rc);
- rc = rf_cond_init(&g->cond);
- if (rc) {
- rf_mutex_destroy(&g->mutex);
- return (rc);
- }
- g->created = g->running = g->shutdown = 0;
- return (0);
-}
-
-
-/*
- * Kernel
- */
-#if defined(__FreeBSD__) && __FreeBSD_version > 500005
-int
-rf_mutex_init(m, s)
-decl_simple_lock_data(, *m)
-const char *s;
-{
- mtx_init(m, s, NULL, MTX_DEF);
- return (0);
-}
-
-int
-rf_mutex_destroy(m)
-decl_simple_lock_data(, *m)
-{
- mtx_destroy(m);
- return (0);
-}
-#else
-int
-rf_mutex_init(m, s)
-decl_simple_lock_data(, *m)
-const char *s;
-{
- simple_lock_init(m);
- return (0);
-}
-
-int
-rf_mutex_destroy(m)
-decl_simple_lock_data(, *m)
-{
- return (0);
-}
-#endif
-
-int
-rf_cond_init(c)
-RF_DECLARE_COND(*c)
-{
- *c = 0; /* no reason */
- return (0);
-}
-
-int
-rf_cond_destroy(c)
-RF_DECLARE_COND(*c)
-{
- return (0);
-}
diff --git a/sys/dev/raidframe/rf_threadstuff.h b/sys/dev/raidframe/rf_threadstuff.h
deleted file mode 100644
index a3560cc..0000000
--- a/sys/dev/raidframe/rf_threadstuff.h
+++ /dev/null
@@ -1,229 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_threadstuff.h,v 1.10 2001/01/27 20:42:21 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland, Daniel Stodolsky, Jim Zelenka
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*
- * threadstuff.h -- definitions for threads, locks, and synchronization
- *
- * The purpose of this file is provide some illusion of portability.
- * If the functions below can be implemented with the same semantics on
- * some new system, then at least the synchronization and thread control
- * part of the code should not require modification to port to a new machine.
- * the only other place where the pthread package is explicitly used is
- * threadid.h
- *
- * this file should be included above stdio.h to get some necessary defines.
- *
- */
-
-#ifndef _RF__RF_THREADSTUFF_H_
-#define _RF__RF_THREADSTUFF_H_
-
-#include <dev/raidframe/rf_types.h>
-#include <sys/types.h>
-#include <sys/param.h>
-#ifdef _KERNEL
-#include <sys/systm.h>
-#include <sys/proc.h>
-#include <sys/kthread.h>
-#endif
-
-#define rf_create_managed_mutex(a,b) _rf_create_managed_mutex(a,b,__FILE__,__LINE__)
-#define rf_create_managed_cond(a,b) _rf_create_managed_cond(a,b,__FILE__,__LINE__)
-#define rf_init_managed_threadgroup(a,b) _rf_init_managed_threadgroup(a,b,__FILE__,__LINE__)
-#define rf_init_threadgroup(a) _rf_init_threadgroup(a,__FILE__,__LINE__)
-#define rf_destroy_threadgroup(a) _rf_destroy_threadgroup(a,__FILE__,__LINE__)
-
-int _rf_init_threadgroup(RF_ThreadGroup_t * g, char *file, int line);
-int _rf_destroy_threadgroup(RF_ThreadGroup_t * g, char *file, int line);
-int
-_rf_init_managed_threadgroup(RF_ShutdownList_t ** listp,
- RF_ThreadGroup_t * g, char *file, int line);
-
-#include <sys/lock.h>
-#if defined(__FreeBSD__ ) && __FreeBSD_version > 500005
-#include <sys/mutex.h>
-#define decl_simple_lock_data(a,b) a struct mtx b;
-#define simple_lock_addr(a) ((struct mtx *)&(a))
-
-typedef struct thread *RF_Thread_t;
-typedef void *RF_ThreadArg_t;
-
-#ifdef _KERNEL
-static __inline struct ucred *
-rf_getucred(RF_Thread_t td)
-{
- return (((struct thread *)td)->td_ucred);
-}
-#endif
-
-#define RF_LOCK_MUTEX(_m_) mtx_lock(&(_m_))
-#define RF_UNLOCK_MUTEX(_m_) mtx_unlock(&(_m_))
-#else
-#define decl_simple_lock_data(a,b) a struct simplelock b;
-#define simple_lock_addr(a) ((struct simplelock *)&(a))
-
-typedef struct proc *RF_Thread_t;
-typedef void *RF_ThreadArg_t;
-
-static __inline struct ucred *
-rf_getucred(RF_Thread_t td)
-{
- return (((struct proc *)td)->p_ucred);
-}
-
-#define RF_LOCK_MUTEX(_m_) simple_lock(&(_m_))
-#define RF_UNLOCK_MUTEX(_m_) simple_unlock(&(_m_))
-#endif
-
-#define RF_DECLARE_MUTEX(_m_) decl_simple_lock_data(,(_m_))
-#define RF_DECLARE_STATIC_MUTEX(_m_) decl_simple_lock_data(static,(_m_))
-#define RF_DECLARE_EXTERN_MUTEX(_m_) decl_simple_lock_data(extern,(_m_))
-
-#define RF_DECLARE_COND(_c_) int _c_;
-#define RF_DECLARE_STATIC_COND(_c_) static int _c_;
-#define RF_DECLARE_EXTERN_COND(_c_) extern int _c_;
-
-/*
- * In NetBSD, kernel threads are simply processes which share several
- * substructures and never run in userspace.
- */
-#define RF_WAIT_COND(_c_,_m_) \
- RF_LTSLEEP(&(_c_), PRIBIO, "rfwcond", 0, &(_m_))
-#define RF_SIGNAL_COND(_c_) wakeup_one(&(_c_))
-#define RF_BROADCAST_COND(_c_) wakeup(&(_c_))
-#if defined(__NetBSD__)
-#define RF_CREATE_THREAD(_handle_, _func_, _arg_, _name_) \
- kthread_create1((void (*)(void *))(_func_), (void *)(_arg_), \
- (struct proc **)&(_handle_), _name_)
-#define RF_THREAD_EXIT(ret) \
- kthread_exit(ret)
-#elif defined(__FreeBSD__)
-#if __FreeBSD_version > 500005
-#define RF_CREATE_THREAD(_handle_, _func_, _arg_, _name_) \
- kthread_create((void (*)(void *))(_func_), (void *)(_arg_), \
- (struct proc **)&(_handle_), 0, 4, _name_)
-#define RF_THREAD_EXIT(ret) \
- kthread_exit(ret)
-#else
-#define RF_CREATE_THREAD(_handle_, _func_, _arg_, _name_) \
- kthread_create((void (*)(void *))(_func_), (void *)(_arg_), \
- (struct proc **)&(_handle_), _name_)
-#define RF_THREAD_EXIT(ret) \
- kthread_exit(ret);
-#endif
-#endif
-
-struct RF_ThreadGroup_s {
- int created;
- int running;
- int shutdown;
- RF_DECLARE_MUTEX(mutex)
- RF_DECLARE_COND(cond)
-};
-/*
- * Someone has started a thread in the group
- */
-#define RF_THREADGROUP_STARTED(_g_) { \
- RF_LOCK_MUTEX((_g_)->mutex); \
- (_g_)->created++; \
- RF_UNLOCK_MUTEX((_g_)->mutex); \
-}
-
-/*
- * Thread announcing that it is now running
- */
-#define RF_THREADGROUP_RUNNING(_g_) { \
- RF_LOCK_MUTEX((_g_)->mutex); \
- (_g_)->running++; \
- RF_UNLOCK_MUTEX((_g_)->mutex); \
- RF_SIGNAL_COND((_g_)->cond); \
-}
-
-/*
- * Thread announcing that it is now done
- */
-#define RF_THREADGROUP_DONE(_g_) { \
- RF_LOCK_MUTEX((_g_)->mutex); \
- (_g_)->shutdown++; \
- RF_UNLOCK_MUTEX((_g_)->mutex); \
- RF_SIGNAL_COND((_g_)->cond); \
-}
-
-/*
- * Wait for all threads to start running
- */
-#define RF_THREADGROUP_WAIT_START(_g_) { \
- RF_LOCK_MUTEX((_g_)->mutex); \
- while((_g_)->running < (_g_)->created) { \
- RF_WAIT_COND((_g_)->cond, (_g_)->mutex); \
- } \
- RF_UNLOCK_MUTEX((_g_)->mutex); \
-}
-
-/*
- * Wait for all threads to stop running
- */
-#ifndef __NetBSD__
-#define RF_THREADGROUP_WAIT_STOP(_g_) { \
- RF_LOCK_MUTEX((_g_)->mutex); \
- RF_ASSERT((_g_)->running == (_g_)->created); \
- while((_g_)->shutdown < (_g_)->running) { \
- RF_WAIT_COND((_g_)->cond, (_g_)->mutex); \
- } \
- RF_UNLOCK_MUTEX((_g_)->mutex); \
-}
-#else
- /* XXX Note that we've removed the assert. That should get put back in once
- * we actually get something like a kernel thread running */
-#define RF_THREADGROUP_WAIT_STOP(_g_) { \
- RF_LOCK_MUTEX((_g_)->mutex); \
- while((_g_)->shutdown < (_g_)->running) { \
- RF_WAIT_COND((_g_)->cond, (_g_)->mutex); \
- } \
- RF_UNLOCK_MUTEX((_g_)->mutex); \
-}
-#endif
-
-#if defined(__FreeBSD__) && __FreeBSD_version > 500005
-int rf_mutex_init(struct mtx *, const char *);
-int rf_mutex_destroy(struct mtx *);
-int _rf_create_managed_mutex(RF_ShutdownList_t **, struct mtx *,
- char *, int);
-#else
-int rf_mutex_init(struct simplelock *, const char *);
-int rf_mutex_destroy(struct simplelock *);
-int _rf_create_managed_mutex(RF_ShutdownList_t **, struct simplelock *,
- char *, int);
-#endif
-int _rf_create_managed_cond(RF_ShutdownList_t ** listp, int *,
- char *file, int line);
-
-int rf_cond_init(int *c);
-int rf_cond_destroy(int *c);
-#endif /* !_RF__RF_THREADSTUFF_H_ */
diff --git a/sys/dev/raidframe/rf_types.h b/sys/dev/raidframe/rf_types.h
deleted file mode 100644
index 25630ef..0000000
--- a/sys/dev/raidframe/rf_types.h
+++ /dev/null
@@ -1,247 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_types.h,v 1.6 1999/09/05 03:05:55 oster Exp $ */
-/*
- * rf_types.h
- */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Jim Zelenka
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-/***********************************************************
- *
- * rf_types.h -- standard types for RAIDframe
- *
- ***********************************************************/
-
-#ifndef _RF__RF_TYPES_H_
-#define _RF__RF_TYPES_H_
-
-
-#include <dev/raidframe/rf_archs.h>
-
-#include <sys/errno.h>
-#include <sys/types.h>
-
-#include <sys/uio.h>
-#include <sys/param.h>
-#ifdef _KERNEL
-#include <sys/lock.h>
-#endif
-
-/*
- * First, define system-dependent types and constants.
- *
- * If the machine is big-endian, RF_BIG_ENDIAN should be 1.
- * Otherwise, it should be 0.
- *
- * The various integer types should be self-explanatory; we
- * use these elsewhere to avoid size confusion.
- *
- * LONGSHIFT is lg(sizeof(long)) (that is, log base two of sizeof(long)
- *
- */
-
-#include <sys/types.h>
-#include <sys/limits.h>
-#include <machine/endian.h>
-
-#if BYTE_ORDER == BIG_ENDIAN
-#define RF_IS_BIG_ENDIAN 1
-#elif BYTE_ORDER == LITTLE_ENDIAN
-#define RF_IS_BIG_ENDIAN 0
-#else
-#error byte order not defined
-#endif
-typedef int8_t RF_int8;
-typedef u_int8_t RF_uint8;
-typedef int16_t RF_int16;
-typedef u_int16_t RF_uint16;
-typedef int32_t RF_int32;
-typedef u_int32_t RF_uint32;
-typedef int64_t RF_int64;
-typedef u_int64_t RF_uint64;
-#if LONG_BIT == 32
-#define RF_LONGSHIFT 2
-#elif LONG_BIT == 64
-#define RF_LONGSHIFT 3
-#elif defined(__i386__)
-#define RF_LONGSHIFT 2
-#elif defined(__alpha__)
-#define RF_LONGSHIFT 3
-#else
-#error word size not defined
-#endif
-
-/*
- * These are just zero and non-zero. We don't use "TRUE"
- * and "FALSE" because there's too much nonsense trying
- * to get them defined exactly once on every platform, given
- * the different places they may be defined in system header
- * files.
- */
-#define RF_TRUE 1
-#define RF_FALSE 0
-
-/*
- * Now, some generic types
- */
-typedef RF_uint64 RF_IoCount_t;
-typedef RF_uint64 RF_Offset_t;
-typedef RF_uint32 RF_PSSFlags_t;
-typedef RF_uint64 RF_SectorCount_t;
-typedef RF_uint64 RF_StripeCount_t;
-typedef RF_int64 RF_SectorNum_t;/* these are unsigned so we can set them to
- * (-1) for "uninitialized" */
-typedef RF_int64 RF_StripeNum_t;
-typedef RF_int64 RF_RaidAddr_t;
-typedef int RF_RowCol_t; /* unsigned so it can be (-1) */
-typedef RF_int64 RF_HeadSepLimit_t;
-typedef RF_int64 RF_ReconUnitCount_t;
-typedef int RF_ReconUnitNum_t;
-
-typedef char RF_ParityConfig_t;
-
-typedef char RF_DiskQueueType_t[1024];
-#define RF_DISK_QUEUE_TYPE_NONE ""
-
-/* values for the 'type' field in a reconstruction buffer */
-typedef int RF_RbufType_t;
-#define RF_RBUF_TYPE_EXCLUSIVE 0 /* this buf assigned exclusively to
- * one disk */
-#define RF_RBUF_TYPE_FLOATING 1 /* this is a floating recon buf */
-#define RF_RBUF_TYPE_FORCED 2 /* this rbuf was allocated to complete
- * a forced recon */
-
-typedef char RF_IoType_t;
-#define RF_IO_TYPE_READ 'r'
-#define RF_IO_TYPE_WRITE 'w'
-#define RF_IO_TYPE_NOP 'n'
-#define RF_IO_IS_R_OR_W(_type_) (((_type_) == RF_IO_TYPE_READ) \
- || ((_type_) == RF_IO_TYPE_WRITE))
-
-typedef void (*RF_VoidFuncPtr) (void *,...);
-
-typedef RF_uint32 RF_AccessStripeMapFlags_t;
-typedef RF_uint32 RF_DiskQueueDataFlags_t;
-typedef RF_uint32 RF_DiskQueueFlags_t;
-typedef RF_uint32 RF_RaidAccessFlags_t;
-
-#define RF_DISKQUEUE_DATA_FLAGS_NONE ((RF_DiskQueueDataFlags_t)0)
-
-typedef struct RF_AccessStripeMap_s RF_AccessStripeMap_t;
-typedef struct RF_AccessStripeMapHeader_s RF_AccessStripeMapHeader_t;
-typedef struct RF_AllocListElem_s RF_AllocListElem_t;
-typedef struct RF_CallbackDesc_s RF_CallbackDesc_t;
-typedef struct RF_ChunkDesc_s RF_ChunkDesc_t;
-typedef struct RF_CommonLogData_s RF_CommonLogData_t;
-typedef struct RF_Config_s RF_Config_t;
-typedef struct RF_CumulativeStats_s RF_CumulativeStats_t;
-typedef struct RF_DagHeader_s RF_DagHeader_t;
-typedef struct RF_DagList_s RF_DagList_t;
-typedef struct RF_DagNode_s RF_DagNode_t;
-typedef struct RF_DeclusteredConfigInfo_s RF_DeclusteredConfigInfo_t;
-typedef struct RF_DiskId_s RF_DiskId_t;
-typedef struct RF_DiskMap_s RF_DiskMap_t;
-typedef struct RF_DiskQueue_s RF_DiskQueue_t;
-typedef struct RF_DiskQueueData_s RF_DiskQueueData_t;
-typedef struct RF_DiskQueueSW_s RF_DiskQueueSW_t;
-typedef struct RF_Etimer_s RF_Etimer_t;
-typedef struct RF_EventCreate_s RF_EventCreate_t;
-typedef struct RF_FreeList_s RF_FreeList_t;
-typedef struct RF_LockReqDesc_s RF_LockReqDesc_t;
-typedef struct RF_LockTableEntry_s RF_LockTableEntry_t;
-typedef struct RF_MCPair_s RF_MCPair_t;
-typedef struct RF_OwnerInfo_s RF_OwnerInfo_t;
-typedef struct RF_ParityLog_s RF_ParityLog_t;
-typedef struct RF_ParityLogAppendQueue_s RF_ParityLogAppendQueue_t;
-typedef struct RF_ParityLogData_s RF_ParityLogData_t;
-typedef struct RF_ParityLogDiskQueue_s RF_ParityLogDiskQueue_t;
-typedef struct RF_ParityLogQueue_s RF_ParityLogQueue_t;
-typedef struct RF_ParityLogRecord_s RF_ParityLogRecord_t;
-typedef struct RF_PerDiskReconCtrl_s RF_PerDiskReconCtrl_t;
-typedef struct RF_PSStatusHeader_s RF_PSStatusHeader_t;
-typedef struct RF_PhysDiskAddr_s RF_PhysDiskAddr_t;
-typedef struct RF_PropHeader_s RF_PropHeader_t;
-typedef struct RF_Raid_s RF_Raid_t;
-typedef struct RF_RaidAccessDesc_s RF_RaidAccessDesc_t;
-typedef struct RF_RaidDisk_s RF_RaidDisk_t;
-typedef struct RF_RaidLayout_s RF_RaidLayout_t;
-typedef struct RF_RaidReconDesc_s RF_RaidReconDesc_t;
-typedef struct RF_ReconBuffer_s RF_ReconBuffer_t;
-typedef struct RF_ReconConfig_s RF_ReconConfig_t;
-typedef struct RF_ReconCtrl_s RF_ReconCtrl_t;
-typedef struct RF_ReconDoneProc_s RF_ReconDoneProc_t;
-typedef struct RF_ReconEvent_s RF_ReconEvent_t;
-typedef struct RF_ReconMap_s RF_ReconMap_t;
-typedef struct RF_ReconMapListElem_s RF_ReconMapListElem_t;
-typedef struct RF_ReconParityStripeStatus_s RF_ReconParityStripeStatus_t;
-typedef struct RF_RedFuncs_s RF_RedFuncs_t;
-typedef struct RF_RegionBufferQueue_s RF_RegionBufferQueue_t;
-typedef struct RF_RegionInfo_s RF_RegionInfo_t;
-typedef struct RF_ShutdownList_s RF_ShutdownList_t;
-typedef struct RF_SpareTableEntry_s RF_SpareTableEntry_t;
-typedef struct RF_SparetWait_s RF_SparetWait_t;
-typedef struct RF_StripeLockDesc_s RF_StripeLockDesc_t;
-typedef struct RF_ThreadGroup_s RF_ThreadGroup_t;
-typedef struct RF_ThroughputStats_s RF_ThroughputStats_t;
-
-/*
- * Important assumptions regarding ordering of the states in this list
- * have been made!!!
- * Before disturbing this ordering, look at code in rf_states.c
- */
-typedef enum RF_AccessState_e {
- /* original states */
- rf_QuiesceState, /* handles queisence for reconstruction */
- rf_IncrAccessesCountState, /* count accesses in flight */
- rf_DecrAccessesCountState,
- rf_MapState, /* map access to disk addresses */
- rf_LockState, /* take stripe locks */
- rf_CreateDAGState, /* create DAGs */
- rf_ExecuteDAGState, /* execute DAGs */
- rf_ProcessDAGState, /* DAGs are completing- check if correct, or
- * if we need to retry */
- rf_CleanupState, /* release stripe locks, clean up */
- rf_LastState /* must be the last state */
-} RF_AccessState_t;
-#define RF_MAXROW 10 /* these are arbitrary and can be modified at
- * will */
-#define RF_MAXCOL 40
-#define RF_MAXSPARE 10
-#define RF_MAXDBGV 75 /* max number of debug variables */
-
-union RF_GenericParam_u {
- void *p;
- RF_uint64 v;
-};
-typedef union RF_GenericParam_u RF_DagParam_t;
-typedef union RF_GenericParam_u RF_CBParam_t;
-
-#if defined(__FreeBSD__) && __FreeBSD_version > 500005
-typedef struct bio *RF_Buf_t;
-#else
-typedef struct buf *RF_Buf_t;
-#endif
-#endif /* _RF__RF_TYPES_H_ */
diff --git a/sys/dev/raidframe/rf_utils.c b/sys/dev/raidframe/rf_utils.c
deleted file mode 100644
index 71f7b93..0000000
--- a/sys/dev/raidframe/rf_utils.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/* $NetBSD: rf_utils.c,v 1.5 2000/01/07 03:41:03 oster Exp $ */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/****************************************
- *
- * rf_utils.c -- various support routines
- *
- ****************************************/
-
-
-#include <dev/raidframe/rf_threadstuff.h>
-
-#include <sys/time.h>
-
-#include <dev/raidframe/rf_utils.h>
-#include <dev/raidframe/rf_debugMem.h>
-#include <dev/raidframe/rf_alloclist.h>
-#include <dev/raidframe/rf_general.h>
-
-/* creates & zeros 2-d array with b rows and k columns (MCH) */
-RF_RowCol_t **
-rf_make_2d_array(b, k, allocList)
- int b;
- int k;
- RF_AllocListElem_t *allocList;
-{
- RF_RowCol_t **retval, i;
-
- RF_MallocAndAdd(retval, b * sizeof(RF_RowCol_t *), (RF_RowCol_t **), allocList);
- for (i = 0; i < b; i++) {
- RF_MallocAndAdd(retval[i], k * sizeof(RF_RowCol_t), (RF_RowCol_t *), allocList);
- (void) bzero((char *) retval[i], k * sizeof(RF_RowCol_t));
- }
- return (retval);
-}
-
-void
-rf_free_2d_array(a, b, k)
- RF_RowCol_t **a;
- int b;
- int k;
-{
- RF_RowCol_t i;
-
- for (i = 0; i < b; i++)
- RF_Free(a[i], k * sizeof(RF_RowCol_t));
- RF_Free(a, b * sizeof(RF_RowCol_t));
-}
-
-
-/* creates & zeros a 1-d array with c columns */
-RF_RowCol_t *
-rf_make_1d_array(c, allocList)
- int c;
- RF_AllocListElem_t *allocList;
-{
- RF_RowCol_t *retval;
-
- RF_MallocAndAdd(retval, c * sizeof(RF_RowCol_t), (RF_RowCol_t *), allocList);
- (void) bzero((char *) retval, c * sizeof(RF_RowCol_t));
- return (retval);
-}
-
-void
-rf_free_1d_array(a, n)
- RF_RowCol_t *a;
- int n;
-{
- RF_Free(a, n * sizeof(RF_RowCol_t));
-}
-/* Euclid's algorithm: finds and returns the greatest common divisor
- * between a and b. (MCH)
- */
-int
-rf_gcd(m, n)
- int m;
- int n;
-{
- int t;
-
- while (m > 0) {
- t = n % m;
- n = m;
- m = t;
- }
- return (n);
-}
-/* these convert between text and integer. Apparently the regular C macros
- * for doing this are not available in the kernel
- */
-
-#define ISDIGIT(x) ( (x) >= '0' && (x) <= '9' )
-#define ISHEXCHAR(x) ( ((x) >= 'a' && (x) <= 'f') || ((x) >= 'A' && (x) <= 'F') )
-#define ISHEX(x) ( ISDIGIT(x) || ISHEXCHAR(x) )
-#define HC2INT(x) ( ((x) >= 'a' && (x) <= 'f') ? (x) - 'a' + 10 : \
- ( ((x) >= 'A' && (x) <= 'F') ? (x) - 'A' + 10 : (x - '0') ) )
-
-int
-rf_atoi(p)
- char *p;
-{
- int val = 0, negate = 0;
-
- if (*p == '-') {
- negate = 1;
- p++;
- }
- for (; ISDIGIT(*p); p++)
- val = 10 * val + (*p - '0');
- return ((negate) ? -val : val);
-}
-
-int
-rf_htoi(p)
- char *p;
-{
- int val = 0;
- for (; ISHEXCHAR(*p); p++)
- val = 16 * val + HC2INT(*p);
- return (val);
-}
diff --git a/sys/dev/raidframe/rf_utils.h b/sys/dev/raidframe/rf_utils.h
deleted file mode 100644
index 18eac84..0000000
--- a/sys/dev/raidframe/rf_utils.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/* $FreeBSD$ */
-/* $NetBSD: rf_utils.h,v 1.4 1999/08/13 03:26:55 oster Exp $ */
-/*
- * Copyright (c) 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Mark Holland
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/***************************************
- *
- * rf_utils.c -- header file for utils.c
- *
- ***************************************/
-
-
-#ifndef _RF__RF_UTILS_H_
-#define _RF__RF_UTILS_H_
-
-#include <dev/raidframe/rf_types.h>
-#include <dev/raidframe/rf_alloclist.h>
-#include <dev/raidframe/rf_threadstuff.h>
-
-char *rf_find_non_white(char *p);
-char *rf_find_white(char *p);
-RF_RowCol_t **rf_make_2d_array(int b, int k, RF_AllocListElem_t * allocList);
-RF_RowCol_t *rf_make_1d_array(int c, RF_AllocListElem_t * allocList);
-void rf_free_2d_array(RF_RowCol_t ** a, int b, int k);
-void rf_free_1d_array(RF_RowCol_t * a, int n);
-int rf_gcd(int m, int n);
-int rf_atoi(char *p);
-int rf_htoi(char *p);
-
-#define RF_USEC_PER_SEC 1000000
-#define RF_TIMEVAL_TO_US(_t_) (((_t_).tv_sec) \
- * RF_USEC_PER_SEC + (_t_).tv_usec)
-
-#define RF_TIMEVAL_DIFF(_start_,_end_,_diff_) { \
- if ((_end_)->tv_usec < (_start_)->tv_usec) { \
- (_diff_)->tv_usec = ((_end_)->tv_usec + RF_USEC_PER_SEC) \
- - (_start_)->tv_usec; \
- (_diff_)->tv_sec = ((_end_)->tv_sec-1) - (_start_)->tv_sec; \
- } \
- else { \
- (_diff_)->tv_usec = (_end_)->tv_usec - (_start_)->tv_usec; \
- (_diff_)->tv_sec = (_end_)->tv_sec - (_start_)->tv_sec; \
- } \
-}
-
-#endif /* !_RF__RF_UTILS_H_ */
diff --git a/sys/modules/raidframe/Makefile b/sys/modules/raidframe/Makefile
deleted file mode 100644
index 74256f1..0000000
--- a/sys/modules/raidframe/Makefile
+++ /dev/null
@@ -1,28 +0,0 @@
-# $FreeBSD$
-
-.PATH: ${.CURDIR}/../../dev/raidframe
-
-KMOD= raidframe
-SRCS= rf_acctrace.c rf_alloclist.c rf_aselect.c rf_callback.c \
- rf_chaindecluster.c rf_copyback.c rf_cvscan.c rf_dagdegrd.c \
- rf_dagdegwr.c rf_dagffrd.c rf_dagffwr.c rf_dagfuncs.c rf_dagutils.c \
- rf_debugMem.c rf_debugprint.c rf_decluster.c rf_declusterPQ.c \
- rf_diskqueue.c rf_disks.c rf_driver.c rf_engine.c rf_evenodd.c \
- rf_evenodd_dagfuncs.c rf_evenodd_dags.c rf_fifo.c rf_interdecluster.c \
- rf_invertq.c rf_layout.c rf_map.c rf_mcpair.c rf_memchunk.c \
- rf_nwayxor.c rf_options.c rf_paritylog.c rf_paritylogDiskMgr.c \
- rf_paritylogging.c rf_parityloggingdags.c rf_parityscan.c rf_pq.c \
- rf_pqdeg.c rf_pqdegdags.c rf_psstatus.c rf_raid0.c rf_raid1.c \
- rf_raid4.c rf_raid5.c rf_raid5_rotatedspare.c rf_reconbuffer.c \
- rf_reconmap.c rf_reconstruct.c rf_reconutil.c rf_revent.c \
- rf_shutdown.c rf_sstf.c rf_states.c rf_stripelocks.c rf_strutils.c \
- rf_threadstuff.c rf_utils.c rf_freebsdkintf.c \
- opt_raid.h vnode_if.h
-RF_AUTOCONFIG?= 1
-RF_DEBUG?= 0
-
-opt_raid.h:
- echo "#define RAID_AUTOCONFIG ${RF_AUTOCONFIG}" > ${.TARGET}
- echo "#define RAID_DEBUG ${RF_DEBUG}" >> ${.TARGET}
-
-.include <bsd.kmod.mk>
OpenPOWER on IntegriCloud