diff options
author | scottl <scottl@FreeBSD.org> | 2004-03-16 12:23:43 +0000 |
---|---|---|
committer | scottl <scottl@FreeBSD.org> | 2004-03-16 12:23:43 +0000 |
commit | 5bf26ae88ec5acc7384d978a63caf87263d43f44 (patch) | |
tree | d2e69ac30beafcc279a1b8c63944743fb2f5d608 /sys/dev/raidframe | |
parent | f795311fe87ff55dcaac737f83444253e8e28bfe (diff) | |
download | FreeBSD-src-5bf26ae88ec5acc7384d978a63caf87263d43f44.zip FreeBSD-src-5bf26ae88ec5acc7384d978a63caf87263d43f44.tar.gz |
Remove RAIDFrame. It hasn't worked since GEOM replaced the old disk
mini-layer. I don't have time to bing it forward into the GEOM world, and
no one else has stepped forward to claim it. It'll be in the Attic for safe
keeping for now.
Diffstat (limited to 'sys/dev/raidframe')
134 files changed, 0 insertions, 42360 deletions
diff --git a/sys/dev/raidframe/rf_acctrace.c b/sys/dev/raidframe/rf_acctrace.c deleted file mode 100644 index 91c1b6d4..0000000 --- a/sys/dev/raidframe/rf_acctrace.c +++ /dev/null @@ -1,174 +0,0 @@ -/* $NetBSD: rf_acctrace.c,v 1.4 1999/08/13 03:41:52 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************** - * - * acctrace.c -- code to support collecting information about each access - * - *****************************************************************************/ - -#if defined(__FreeBSD__) -#include <sys/types.h> -#include <sys/time.h> -#endif -#include <sys/stat.h> -#if defined(__NetBSD__) -#include <sys/types.h> -#endif - -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_acctrace.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_hist.h> -#include <dev/raidframe/rf_shutdown.h> - -static long numTracesSoFar; -static int accessTraceBufCount = 0; -static RF_AccTraceEntry_t *access_tracebuf; -static long traceCount; - -int rf_stopCollectingTraces; -RF_DECLARE_MUTEX(rf_tracing_mutex) - int rf_trace_fd; - - static void rf_ShutdownAccessTrace(void *); - - static void rf_ShutdownAccessTrace(ignored) - void *ignored; -{ - if (rf_accessTraceBufSize) { - if (accessTraceBufCount) - rf_FlushAccessTraceBuf(); - RF_Free(access_tracebuf, rf_accessTraceBufSize * sizeof(RF_AccTraceEntry_t)); - } - rf_mutex_destroy(&rf_tracing_mutex); -} - -int -rf_ConfigureAccessTrace(listp) - RF_ShutdownList_t **listp; -{ - int rc; - - numTracesSoFar = accessTraceBufCount = rf_stopCollectingTraces = 0; - if (rf_accessTraceBufSize) { - RF_Malloc(access_tracebuf, rf_accessTraceBufSize * sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); - accessTraceBufCount = 0; - } - traceCount = 0; - numTracesSoFar = 0; - rc = rf_mutex_init(&rf_tracing_mutex, __FUNCTION__); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - } - rc = rf_ShutdownCreate(listp, rf_ShutdownAccessTrace, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - if (rf_accessTraceBufSize) { - RF_Free(access_tracebuf, rf_accessTraceBufSize * sizeof(RF_AccTraceEntry_t)); - rf_mutex_destroy(&rf_tracing_mutex); - } - } - return (rc); -} -/* install a trace record. cause a flush to disk or to the trace collector daemon - * if the trace buffer is at least 1/2 full. - */ -void -rf_LogTraceRec(raid, rec) - RF_Raid_t *raid; - RF_AccTraceEntry_t *rec; -{ - RF_AccTotals_t *acc = &raid->acc_totals; -#if 0 - RF_Etimer_t timer; - int i, n; -#endif - - if (rf_stopCollectingTraces || ((rf_maxNumTraces >= 0) && (numTracesSoFar >= rf_maxNumTraces))) - return; - - /* update AccTotals for this device */ - if (!raid->keep_acc_totals) - return; - acc->num_log_ents++; - if (rec->reconacc) { - acc->recon_start_to_fetch_us += rec->specific.recon.recon_start_to_fetch_us; - acc->recon_fetch_to_return_us += rec->specific.recon.recon_fetch_to_return_us; - acc->recon_return_to_submit_us += rec->specific.recon.recon_return_to_submit_us; - acc->recon_num_phys_ios += rec->num_phys_ios; - acc->recon_phys_io_us += rec->phys_io_us; - acc->recon_diskwait_us += rec->diskwait_us; - acc->recon_reccount++; - } else { - RF_HIST_ADD(acc->tot_hist, rec->total_us); - RF_HIST_ADD(acc->dw_hist, rec->diskwait_us); - /* count of physical ios which are too big. often due to - * thermal recalibration */ - /* if bigvals > 0, you should probably ignore this data set */ - if (rec->diskwait_us > 100000) - acc->bigvals++; - acc->total_us += rec->total_us; - acc->suspend_ovhd_us += rec->specific.user.suspend_ovhd_us; - acc->map_us += rec->specific.user.map_us; - acc->lock_us += rec->specific.user.lock_us; - acc->dag_create_us += rec->specific.user.dag_create_us; - acc->dag_retry_us += rec->specific.user.dag_retry_us; - acc->exec_us += rec->specific.user.exec_us; - acc->cleanup_us += rec->specific.user.cleanup_us; - acc->exec_engine_us += rec->specific.user.exec_engine_us; - acc->xor_us += rec->xor_us; - acc->q_us += rec->q_us; - acc->plog_us += rec->plog_us; - acc->diskqueue_us += rec->diskqueue_us; - acc->diskwait_us += rec->diskwait_us; - acc->num_phys_ios += rec->num_phys_ios; - acc->phys_io_us = rec->phys_io_us; - acc->user_reccount++; - } -} - - -/* assumes the tracing mutex is locked at entry. In order to allow this to be called - * from interrupt context, we don't do any copyouts here, but rather just wake trace - * buffer collector thread. - */ -void -rf_FlushAccessTraceBuf() -{ - accessTraceBufCount = 0; -} diff --git a/sys/dev/raidframe/rf_acctrace.h b/sys/dev/raidframe/rf_acctrace.h deleted file mode 100644 index c211514..0000000 --- a/sys/dev/raidframe/rf_acctrace.h +++ /dev/null @@ -1,134 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_acctrace.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************** - * - * acctrace.h -- header file for acctrace.c - * - *****************************************************************************/ - - -#ifndef _RF__RF_ACCTRACE_H_ -#define _RF__RF_ACCTRACE_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_hist.h> -#include <dev/raidframe/rf_etimer.h> - -typedef struct RF_user_acc_stats_s { - RF_uint64 suspend_ovhd_us; /* us spent mucking in the - * access-suspension code */ - RF_uint64 map_us; /* us spent mapping the access */ - RF_uint64 lock_us; /* us spent locking & unlocking stripes, - * including time spent blocked */ - RF_uint64 dag_create_us;/* us spent creating the DAGs */ - RF_uint64 dag_retry_us; /* _total_ us spent retrying the op -- not - * broken down into components */ - RF_uint64 exec_us; /* us spent in DispatchDAG */ - RF_uint64 exec_engine_us; /* us spent in engine, not including - * blocking time */ - RF_uint64 cleanup_us; /* us spent tearing down the dag & maps, and - * generally cleaning up */ -} RF_user_acc_stats_t; - -typedef struct RF_recon_acc_stats_s { - RF_uint32 recon_start_to_fetch_us; - RF_uint32 recon_fetch_to_return_us; - RF_uint32 recon_return_to_submit_us; -} RF_recon_acc_stats_t; - -typedef struct RF_acctrace_entry_s { - union { - RF_user_acc_stats_t user; - RF_recon_acc_stats_t recon; - } specific; - RF_uint8 reconacc; /* whether this is a tracerec for a user acc - * or a recon acc */ - RF_uint64 xor_us; /* us spent doing XORs */ - RF_uint64 q_us; /* us spent doing XORs */ - RF_uint64 plog_us; /* us spent waiting to stuff parity into log */ - RF_uint64 diskqueue_us; /* _total_ us spent in disk queue(s), incl - * concurrent ops */ - RF_uint64 diskwait_us; /* _total_ us spent waiting actually waiting - * on the disk, incl concurrent ops */ - RF_uint64 total_us; /* total us spent on this access */ - RF_uint64 num_phys_ios; /* number of physical I/Os invoked */ - RF_uint64 phys_io_us; /* time of physical I/O */ - RF_Etimer_t tot_timer; /* a timer used to compute total access time */ - RF_Etimer_t timer; /* a generic timer val for timing events that - * live across procedure boundaries */ - RF_Etimer_t recon_timer;/* generic timer for recon stuff */ - RF_uint64 index; -} RF_AccTraceEntry_t; - -typedef struct RF_AccTotals_s { - /* user acc stats */ - RF_uint64 suspend_ovhd_us; - RF_uint64 map_us; - RF_uint64 lock_us; - RF_uint64 dag_create_us; - RF_uint64 dag_retry_us; - RF_uint64 exec_us; - RF_uint64 exec_engine_us; - RF_uint64 cleanup_us; - RF_uint64 user_reccount; - /* recon acc stats */ - RF_uint64 recon_start_to_fetch_us; - RF_uint64 recon_fetch_to_return_us; - RF_uint64 recon_return_to_submit_us; - RF_uint64 recon_io_overflow_count; - RF_uint64 recon_phys_io_us; - RF_uint64 recon_num_phys_ios; - RF_uint64 recon_diskwait_us; - RF_uint64 recon_reccount; - /* trace entry stats */ - RF_uint64 xor_us; - RF_uint64 q_us; - RF_uint64 plog_us; - RF_uint64 diskqueue_us; - RF_uint64 diskwait_us; - RF_uint64 total_us; - RF_uint64 num_log_ents; - RF_uint64 phys_io_overflow_count; - RF_uint64 num_phys_ios; - RF_uint64 phys_io_us; - RF_uint64 bigvals; - /* histograms */ - RF_Hist_t dw_hist[RF_HIST_NUM_BUCKETS]; - RF_Hist_t tot_hist[RF_HIST_NUM_BUCKETS]; -} RF_AccTotals_t; -#if RF_UTILITY == 0 -RF_DECLARE_EXTERN_MUTEX(rf_tracing_mutex) -#endif /* RF_UTILITY == 0 */ - - int rf_ConfigureAccessTrace(RF_ShutdownList_t ** listp); - void rf_LogTraceRec(RF_Raid_t * raid, RF_AccTraceEntry_t * rec); - void rf_FlushAccessTraceBuf(void); - -#endif /* !_RF__RF_ACCTRACE_H_ */ diff --git a/sys/dev/raidframe/rf_alloclist.c b/sys/dev/raidframe/rf_alloclist.c deleted file mode 100644 index 8c8b837..0000000 --- a/sys/dev/raidframe/rf_alloclist.c +++ /dev/null @@ -1,190 +0,0 @@ -/* $NetBSD: rf_alloclist.c,v 1.4 1999/08/13 03:41:53 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/**************************************************************************** - * - * Alloclist.c -- code to manipulate allocation lists - * - * an allocation list is just a list of AllocListElem structures. Each - * such structure contains a fixed-size array of pointers. Calling - * FreeAList() causes each pointer to be freed. - * - ***************************************************************************/ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_shutdown.h> - -RF_DECLARE_STATIC_MUTEX(alist_mutex) - static unsigned int fl_hit_count, fl_miss_count; - - static RF_AllocListElem_t *al_free_list = NULL; - static int al_free_list_count; - -#define RF_AL_FREELIST_MAX 256 - -#define DO_FREE(_p,_sz) RF_Free((_p),(_sz)) - - static void rf_ShutdownAllocList(void *); - - static void rf_ShutdownAllocList(ignored) - void *ignored; -{ - RF_AllocListElem_t *p, *pt; - - for (p = al_free_list; p;) { - pt = p; - p = p->next; - DO_FREE(pt, sizeof(*pt)); - } - rf_mutex_destroy(&alist_mutex); - /* - printf("Alloclist: Free list hit count %lu (%lu %%) miss count %lu (%lu %%)\n", - fl_hit_count, (100*fl_hit_count)/(fl_hit_count+fl_miss_count), - fl_miss_count, (100*fl_miss_count)/(fl_hit_count+fl_miss_count)); - */ -} - -int -rf_ConfigureAllocList(listp) - RF_ShutdownList_t **listp; -{ - int rc; - - rc = rf_mutex_init(&alist_mutex, __FUNCTION__); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return (rc); - } - al_free_list = NULL; - fl_hit_count = fl_miss_count = al_free_list_count = 0; - rc = rf_ShutdownCreate(listp, rf_ShutdownAllocList, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - rf_mutex_destroy(&alist_mutex); - return (rc); - } - return (0); -} - - -/* we expect the lists to have at most one or two elements, so we're willing - * to search for the end. If you ever observe the lists growing longer, - * increase POINTERS_PER_ALLOC_LIST_ELEMENT. - */ -void -rf_real_AddToAllocList(l, p, size, lockflag) - RF_AllocListElem_t *l; - void *p; - int size; - int lockflag; -{ - RF_AllocListElem_t *newelem; - - for (; l->next; l = l->next) - RF_ASSERT(l->numPointers == RF_POINTERS_PER_ALLOC_LIST_ELEMENT); /* find end of list */ - - RF_ASSERT(l->numPointers >= 0 && l->numPointers <= RF_POINTERS_PER_ALLOC_LIST_ELEMENT); - if (l->numPointers == RF_POINTERS_PER_ALLOC_LIST_ELEMENT) { - newelem = rf_real_MakeAllocList(lockflag); - l->next = newelem; - l = newelem; - } - l->pointers[l->numPointers] = p; - l->sizes[l->numPointers] = size; - l->numPointers++; - -} - - -/* we use the debug_mem_mutex here because we need to lock it anyway to call free. - * this is probably a bug somewhere else in the code, but when I call malloc/free - * outside of any lock I have endless trouble with malloc appearing to return the - * same pointer twice. Since we have to lock it anyway, we might as well use it - * as the lock around the al_free_list. Note that we can't call Free with the - * debug_mem_mutex locked. - */ -void -rf_FreeAllocList(l) - RF_AllocListElem_t *l; -{ - int i; - RF_AllocListElem_t *temp, *p; - - for (p = l; p; p = p->next) { - RF_ASSERT(p->numPointers >= 0 && p->numPointers <= RF_POINTERS_PER_ALLOC_LIST_ELEMENT); - for (i = 0; i < p->numPointers; i++) { - RF_ASSERT(p->pointers[i]); - RF_Free(p->pointers[i], p->sizes[i]); - } - } - while (l) { - temp = l; - l = l->next; - if (al_free_list_count > RF_AL_FREELIST_MAX) { - DO_FREE(temp, sizeof(*temp)); - } else { - temp->next = al_free_list; - al_free_list = temp; - al_free_list_count++; - } - } -} - -RF_AllocListElem_t * -rf_real_MakeAllocList(lockflag) - int lockflag; -{ - RF_AllocListElem_t *p; - - if (al_free_list) { - fl_hit_count++; - p = al_free_list; - al_free_list = p->next; - al_free_list_count--; - } else { - fl_miss_count++; - RF_Malloc(p, sizeof(RF_AllocListElem_t), (RF_AllocListElem_t *)); /* no allocation locking - * in kernel, so this is - * fine */ - } - if (p == NULL) { - return (NULL); - } - bzero((char *) p, sizeof(RF_AllocListElem_t)); - return (p); -} diff --git a/sys/dev/raidframe/rf_alloclist.h b/sys/dev/raidframe/rf_alloclist.h deleted file mode 100644 index c746452..0000000 --- a/sys/dev/raidframe/rf_alloclist.h +++ /dev/null @@ -1,60 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_alloclist.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/**************************************************************************** - * - * alloclist.h -- header file for alloclist.c - * - ***************************************************************************/ - -#ifndef _RF__RF_ALLOCLIST_H_ -#define _RF__RF_ALLOCLIST_H_ - -#include <dev/raidframe/rf_types.h> - -#define RF_POINTERS_PER_ALLOC_LIST_ELEMENT 20 - -struct RF_AllocListElem_s { - void *pointers[RF_POINTERS_PER_ALLOC_LIST_ELEMENT]; - int sizes[RF_POINTERS_PER_ALLOC_LIST_ELEMENT]; - int numPointers; - RF_AllocListElem_t *next; -}; -#define rf_MakeAllocList(_ptr_) _ptr_ = rf_real_MakeAllocList(1); -#define rf_AddToAllocList(_l_,_ptr_,_sz_) rf_real_AddToAllocList((_l_), (_ptr_), (_sz_), 1) - -int rf_ConfigureAllocList(RF_ShutdownList_t ** listp); - -#if RF_UTILITY == 0 -void rf_real_AddToAllocList(RF_AllocListElem_t * l, void *p, int size, int lockflag); -void rf_FreeAllocList(RF_AllocListElem_t * l); -RF_AllocListElem_t *rf_real_MakeAllocList(int lockflag); -#endif /* RF_UTILITY == 0 */ - -#endif /* !_RF__RF_ALLOCLIST_H_ */ diff --git a/sys/dev/raidframe/rf_archs.h b/sys/dev/raidframe/rf_archs.h deleted file mode 100644 index faef157..0000000 --- a/sys/dev/raidframe/rf_archs.h +++ /dev/null @@ -1,75 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_archs.h,v 1.11 2001/01/26 04:43:16 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_archs.h -- defines for which architectures you want to - * include is some particular build of raidframe. Unfortunately, - * it's difficult to exclude declustering, P+Q, and distributed - * sparing because the code is intermixed with RAID5 code. This - * should be fixed. - * - * this is really intended only for use in the kernel, where I - * am worried about the size of the object module. At user level and - * in the simulator, I don't really care that much, so all the - * architectures can be compiled together. Note that by itself, turning - * off these defines does not affect the size of the executable; you - * have to edit the makefile for that. - * - * comment out any line below to eliminate that architecture. - * the list below includes all the modules that can be compiled - * out. - * - */ - -#ifndef _RF__RF_ARCHS_H_ -#define _RF__RF_ARCHS_H_ - -#define RF_INCLUDE_EVENODD 1 - -#define RF_INCLUDE_RAID5_RS 1 -#define RF_INCLUDE_PARITYLOGGING 1 - -#define RF_INCLUDE_CHAINDECLUSTER 1 -#define RF_INCLUDE_INTERDECLUSTER 1 - -#define RF_INCLUDE_PARITY_DECLUSTERING 1 -#define RF_INCLUDE_PARITY_DECLUSTERING_DS 1 - -#define RF_INCLUDE_RAID0 1 -#define RF_INCLUDE_RAID1 1 -#define RF_INCLUDE_RAID4 1 -#define RF_INCLUDE_RAID5 1 -#define RF_INCLUDE_RAID6 0 -#define RF_INCLUDE_DECL_PQ 0 - -#define RF_MEMORY_REDZONES 0 -#define RF_RECON_STATS 1 - -#include <dev/raidframe/rf_options.h> - -#endif /* !_RF__RF_ARCHS_H_ */ diff --git a/sys/dev/raidframe/rf_aselect.c b/sys/dev/raidframe/rf_aselect.c deleted file mode 100644 index 13cdbbe..0000000 --- a/sys/dev/raidframe/rf_aselect.c +++ /dev/null @@ -1,496 +0,0 @@ -/* $NetBSD: rf_aselect.c,v 1.3 1999/02/05 00:06:06 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************** - * - * aselect.c -- algorithm selection code - * - *****************************************************************************/ - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_desc.h> -#include <dev/raidframe/rf_map.h> - -#if defined(__NetBSD__) || defined(__FreeBSD__) && defined(_KERNEL) -/* the function below is not used... so don't define it! */ -#else -static void TransferDagMemory(RF_DagHeader_t *, RF_DagHeader_t *); -#endif - -static int InitHdrNode(RF_DagHeader_t **, RF_Raid_t *, int); -static void UpdateNodeHdrPtr(RF_DagHeader_t *, RF_DagNode_t *); -int rf_SelectAlgorithm(RF_RaidAccessDesc_t *, RF_RaidAccessFlags_t); - - -/****************************************************************************** - * - * Create and Initialiaze a dag header and termination node - * - *****************************************************************************/ -static int -InitHdrNode(hdr, raidPtr, memChunkEnable) - RF_DagHeader_t **hdr; - RF_Raid_t *raidPtr; - int memChunkEnable; -{ - /* create and initialize dag hdr */ - *hdr = rf_AllocDAGHeader(); - rf_MakeAllocList((*hdr)->allocList); - if ((*hdr)->allocList == NULL) { - rf_FreeDAGHeader(*hdr); - return (ENOMEM); - } - (*hdr)->status = rf_enable; - (*hdr)->numSuccedents = 0; - (*hdr)->raidPtr = raidPtr; - (*hdr)->next = NULL; - return (0); -} -/****************************************************************************** - * - * Transfer allocation list and mem chunks from one dag to another - * - *****************************************************************************/ -#if defined(__NetBSD__) || defined(__FreeBSD__) && defined(_KERNEL) -/* the function below is not used... so don't define it! */ -#else -static void -TransferDagMemory(daga, dagb) - RF_DagHeader_t *daga; - RF_DagHeader_t *dagb; -{ - RF_AccessStripeMapHeader_t *end; - RF_AllocListElem_t *p; - int i, memChunksXfrd = 0, xtraChunksXfrd = 0; - - /* transfer allocList from dagb to daga */ - for (p = dagb->allocList; p; p = p->next) { - for (i = 0; i < p->numPointers; i++) { - rf_AddToAllocList(daga->allocList, p->pointers[i], p->sizes[i]); - p->pointers[i] = NULL; - p->sizes[i] = 0; - } - p->numPointers = 0; - } - - /* transfer chunks from dagb to daga */ - while ((memChunksXfrd + xtraChunksXfrd < dagb->chunkIndex + dagb->xtraChunkIndex) && (daga->chunkIndex < RF_MAXCHUNKS)) { - /* stuff chunks into daga's memChunk array */ - if (memChunksXfrd < dagb->chunkIndex) { - daga->memChunk[daga->chunkIndex++] = dagb->memChunk[memChunksXfrd]; - dagb->memChunk[memChunksXfrd++] = NULL; - } else { - daga->memChunk[daga->xtraChunkIndex++] = dagb->xtraMemChunk[xtraChunksXfrd]; - dagb->xtraMemChunk[xtraChunksXfrd++] = NULL; - } - } - /* use escape hatch to hold excess chunks */ - while (memChunksXfrd + xtraChunksXfrd < dagb->chunkIndex + dagb->xtraChunkIndex) { - if (memChunksXfrd < dagb->chunkIndex) { - daga->xtraMemChunk[daga->xtraChunkIndex++] = dagb->memChunk[memChunksXfrd]; - dagb->memChunk[memChunksXfrd++] = NULL; - } else { - daga->xtraMemChunk[daga->xtraChunkIndex++] = dagb->xtraMemChunk[xtraChunksXfrd]; - dagb->xtraMemChunk[xtraChunksXfrd++] = NULL; - } - } - RF_ASSERT((memChunksXfrd == dagb->chunkIndex) && (xtraChunksXfrd == dagb->xtraChunkIndex)); - RF_ASSERT(daga->chunkIndex <= RF_MAXCHUNKS); - RF_ASSERT(daga->xtraChunkIndex <= daga->xtraChunkCnt); - dagb->chunkIndex = 0; - dagb->xtraChunkIndex = 0; - - /* transfer asmList from dagb to daga */ - if (dagb->asmList) { - if (daga->asmList) { - end = daga->asmList; - while (end->next) - end = end->next; - end->next = dagb->asmList; - } else - daga->asmList = dagb->asmList; - dagb->asmList = NULL; - } -} -#endif /* __NetBSD__ */ - -/***************************************************************************************** - * - * Ensure that all node->dagHdr fields in a dag are consistent - * - * IMPORTANT: This routine recursively searches all succedents of the node. If a - * succedent is encountered whose dagHdr ptr does not require adjusting, that node's - * succedents WILL NOT BE EXAMINED. - * - ****************************************************************************************/ -static void -UpdateNodeHdrPtr(hdr, node) - RF_DagHeader_t *hdr; - RF_DagNode_t *node; -{ - int i; - RF_ASSERT(hdr != NULL && node != NULL); - for (i = 0; i < node->numSuccedents; i++) - if (node->succedents[i]->dagHdr != hdr) - UpdateNodeHdrPtr(hdr, node->succedents[i]); - node->dagHdr = hdr; -} -/****************************************************************************** - * - * Create a DAG to do a read or write operation. - * - * create an array of dagLists, one list per parity stripe. - * return the lists in the array desc->dagArray. - * - * Normally, each list contains one dag for the entire stripe. In some - * tricky cases, we break this into multiple dags, either one per stripe - * unit or one per block (sector). When this occurs, these dags are returned - * as a linked list (dagList) which is executed sequentially (to preserve - * atomic parity updates in the stripe). - * - * dags which operate on independent parity goups (stripes) are returned in - * independent dagLists (distinct elements in desc->dagArray) and may be - * executed concurrently. - * - * Finally, if the SelectionFunc fails to create a dag for a block, we punt - * and return 1. - * - * The above process is performed in two phases: - * 1) create an array(s) of creation functions (eg stripeFuncs) - * 2) create dags and concatenate/merge to form the final dag. - * - * Because dag's are basic blocks (single entry, single exit, unconditional - * control flow, we can add the following optimizations (future work): - * first-pass optimizer to allow max concurrency (need all data dependencies) - * second-pass optimizer to eliminate common subexpressions (need true - * data dependencies) - * third-pass optimizer to eliminate dead code (need true data dependencies) - *****************************************************************************/ - -#define MAXNSTRIPES 5 - -int -rf_SelectAlgorithm(desc, flags) - RF_RaidAccessDesc_t *desc; - RF_RaidAccessFlags_t flags; -{ - RF_AccessStripeMapHeader_t *asm_h = desc->asmap; - RF_IoType_t type = desc->type; - RF_Raid_t *raidPtr = desc->raidPtr; - void *bp = desc->bp; - - RF_AccessStripeMap_t *asmap = asm_h->stripeMap; - RF_AccessStripeMap_t *asm_p; - RF_DagHeader_t *dag_h = NULL, *tempdag_h, *lastdag_h; - int i, j, k; - RF_VoidFuncPtr *stripeFuncs, normalStripeFuncs[MAXNSTRIPES]; - RF_AccessStripeMap_t *asm_up, *asm_bp; - RF_AccessStripeMapHeader_t ***asmh_u, *endASMList; - RF_AccessStripeMapHeader_t ***asmh_b; - RF_VoidFuncPtr **stripeUnitFuncs, uFunc; - RF_VoidFuncPtr **blockFuncs, bFunc; - int numStripesBailed = 0, cantCreateDAGs = RF_FALSE; - int numStripeUnitsBailed = 0; - int stripeNum, numUnitDags = 0, stripeUnitNum, numBlockDags = 0; - RF_StripeNum_t numStripeUnits; - RF_SectorNum_t numBlocks; - RF_RaidAddr_t address; - int length; - RF_PhysDiskAddr_t *physPtr; - caddr_t buffer; - - lastdag_h = NULL; - asmh_u = asmh_b = NULL; - stripeUnitFuncs = NULL; - blockFuncs = NULL; - - /* get an array of dag-function creation pointers, try to avoid - * calling malloc */ - if (asm_h->numStripes <= MAXNSTRIPES) - stripeFuncs = normalStripeFuncs; - else - RF_Calloc(stripeFuncs, asm_h->numStripes, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr *)); - - /* walk through the asm list once collecting information */ - /* attempt to find a single creation function for each stripe */ - desc->numStripes = 0; - for (i = 0, asm_p = asmap; asm_p; asm_p = asm_p->next, i++) { - desc->numStripes++; - (raidPtr->Layout.map->SelectionFunc) (raidPtr, type, asm_p, &stripeFuncs[i]); - /* check to see if we found a creation func for this stripe */ - if (stripeFuncs[i] == (RF_VoidFuncPtr) NULL) { - /* could not find creation function for entire stripe - * so, let's see if we can find one for each stripe - * unit in the stripe */ - - if (numStripesBailed == 0) { - /* one stripe map header for each stripe we - * bail on */ - RF_Malloc(asmh_u, sizeof(RF_AccessStripeMapHeader_t **) * asm_h->numStripes, (RF_AccessStripeMapHeader_t ***)); - /* create an array of ptrs to arrays of - * stripeFuncs */ - RF_Calloc(stripeUnitFuncs, asm_h->numStripes, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr **)); - } - /* create an array of creation funcs (called - * stripeFuncs) for this stripe */ - numStripeUnits = asm_p->numStripeUnitsAccessed; - RF_Calloc(stripeUnitFuncs[numStripesBailed], numStripeUnits, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr *)); - RF_Malloc(asmh_u[numStripesBailed], numStripeUnits * sizeof(RF_AccessStripeMapHeader_t *), (RF_AccessStripeMapHeader_t **)); - - /* lookup array of stripeUnitFuncs for this stripe */ - for (j = 0, physPtr = asm_p->physInfo; physPtr; physPtr = physPtr->next, j++) { - /* remap for series of single stripe-unit - * accesses */ - address = physPtr->raidAddress; - length = physPtr->numSector; - buffer = physPtr->bufPtr; - - asmh_u[numStripesBailed][j] = rf_MapAccess(raidPtr, address, length, buffer, RF_DONT_REMAP); - asm_up = asmh_u[numStripesBailed][j]->stripeMap; - - /* get the creation func for this stripe unit */ - (raidPtr->Layout.map->SelectionFunc) (raidPtr, type, asm_up, &(stripeUnitFuncs[numStripesBailed][j])); - - /* check to see if we found a creation func - * for this stripe unit */ - if (stripeUnitFuncs[numStripesBailed][j] == (RF_VoidFuncPtr) NULL) { - /* could not find creation function - * for stripe unit so, let's see if we - * can find one for each block in the - * stripe unit */ - if (numStripeUnitsBailed == 0) { - /* one stripe map header for - * each stripe unit we bail on */ - RF_Malloc(asmh_b, sizeof(RF_AccessStripeMapHeader_t **) * asm_h->numStripes * raidPtr->Layout.numDataCol, (RF_AccessStripeMapHeader_t ***)); - /* create an array of ptrs to - * arrays of blockFuncs */ - RF_Calloc(blockFuncs, asm_h->numStripes * raidPtr->Layout.numDataCol, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr **)); - } - /* create an array of creation funcs - * (called blockFuncs) for this stripe - * unit */ - numBlocks = physPtr->numSector; - numBlockDags += numBlocks; - RF_Calloc(blockFuncs[numStripeUnitsBailed], numBlocks, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr *)); - RF_Malloc(asmh_b[numStripeUnitsBailed], numBlocks * sizeof(RF_AccessStripeMapHeader_t *), (RF_AccessStripeMapHeader_t **)); - - /* lookup array of blockFuncs for this - * stripe unit */ - for (k = 0; k < numBlocks; k++) { - /* remap for series of single - * stripe-unit accesses */ - address = physPtr->raidAddress + k; - length = 1; - buffer = physPtr->bufPtr + (k * (1 << raidPtr->logBytesPerSector)); - - asmh_b[numStripeUnitsBailed][k] = rf_MapAccess(raidPtr, address, length, buffer, RF_DONT_REMAP); - asm_bp = asmh_b[numStripeUnitsBailed][k]->stripeMap; - - /* get the creation func for - * this stripe unit */ - (raidPtr->Layout.map->SelectionFunc) (raidPtr, type, asm_bp, &(blockFuncs[numStripeUnitsBailed][k])); - - /* check to see if we found a - * creation func for this - * stripe unit */ - if (blockFuncs[numStripeUnitsBailed][k] == NULL) - cantCreateDAGs = RF_TRUE; - } - numStripeUnitsBailed++; - } else { - numUnitDags++; - } - } - RF_ASSERT(j == numStripeUnits); - numStripesBailed++; - } - } - - if (cantCreateDAGs) { - /* free memory and punt */ - if (asm_h->numStripes > MAXNSTRIPES) - RF_Free(stripeFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr)); - if (numStripesBailed > 0) { - stripeNum = 0; - for (i = 0, asm_p = asmap; asm_p; asm_p = asm_p->next, i++) - if (stripeFuncs[i] == NULL) { - numStripeUnits = asm_p->numStripeUnitsAccessed; - for (j = 0; j < numStripeUnits; j++) - rf_FreeAccessStripeMap(asmh_u[stripeNum][j]); - RF_Free(asmh_u[stripeNum], numStripeUnits * sizeof(RF_AccessStripeMapHeader_t *)); - RF_Free(stripeUnitFuncs[stripeNum], numStripeUnits * sizeof(RF_VoidFuncPtr)); - stripeNum++; - } - RF_ASSERT(stripeNum == numStripesBailed); - RF_Free(stripeUnitFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr)); - RF_Free(asmh_u, asm_h->numStripes * sizeof(RF_AccessStripeMapHeader_t **)); - } - return (1); - } else { - /* begin dag creation */ - stripeNum = 0; - stripeUnitNum = 0; - - /* create an array of dagLists and fill them in */ - RF_CallocAndAdd(desc->dagArray, desc->numStripes, sizeof(RF_DagList_t), (RF_DagList_t *), desc->cleanupList); - - for (i = 0, asm_p = asmap; asm_p; asm_p = asm_p->next, i++) { - /* grab dag header for this stripe */ - dag_h = NULL; - desc->dagArray[i].desc = desc; - - if (stripeFuncs[i] == (RF_VoidFuncPtr) NULL) { - /* use bailout functions for this stripe */ - for (j = 0, physPtr = asm_p->physInfo; physPtr; physPtr = physPtr->next, j++) { - uFunc = stripeUnitFuncs[stripeNum][j]; - if (uFunc == (RF_VoidFuncPtr) NULL) { - /* use bailout functions for - * this stripe unit */ - for (k = 0; k < physPtr->numSector; k++) { - /* create a dag for - * this block */ - InitHdrNode(&tempdag_h, raidPtr, rf_useMemChunks); - desc->dagArray[i].numDags++; - if (dag_h == NULL) { - dag_h = tempdag_h; - } else { - lastdag_h->next = tempdag_h; - } - lastdag_h = tempdag_h; - - bFunc = blockFuncs[stripeUnitNum][k]; - RF_ASSERT(bFunc); - asm_bp = asmh_b[stripeUnitNum][k]->stripeMap; - (*bFunc) (raidPtr, asm_bp, tempdag_h, bp, flags, tempdag_h->allocList); - } - stripeUnitNum++; - } else { - /* create a dag for this unit */ - InitHdrNode(&tempdag_h, raidPtr, rf_useMemChunks); - desc->dagArray[i].numDags++; - if (dag_h == NULL) { - dag_h = tempdag_h; - } else { - lastdag_h->next = tempdag_h; - } - lastdag_h = tempdag_h; - - asm_up = asmh_u[stripeNum][j]->stripeMap; - (*uFunc) (raidPtr, asm_up, tempdag_h, bp, flags, tempdag_h->allocList); - } - } - RF_ASSERT(j == asm_p->numStripeUnitsAccessed); - /* merge linked bailout dag to existing dag - * collection */ - stripeNum++; - } else { - /* Create a dag for this parity stripe */ - InitHdrNode(&tempdag_h, raidPtr, rf_useMemChunks); - desc->dagArray[i].numDags++; - if (dag_h == NULL) { - dag_h = tempdag_h; - } else { - lastdag_h->next = tempdag_h; - } - lastdag_h = tempdag_h; - - (stripeFuncs[i]) (raidPtr, asm_p, tempdag_h, bp, flags, tempdag_h->allocList); - } - desc->dagArray[i].dags = dag_h; - } - RF_ASSERT(i == desc->numStripes); - - /* free memory */ - if (asm_h->numStripes > MAXNSTRIPES) - RF_Free(stripeFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr)); - if ((numStripesBailed > 0) || (numStripeUnitsBailed > 0)) { - stripeNum = 0; - stripeUnitNum = 0; - if (dag_h->asmList) { - endASMList = dag_h->asmList; - while (endASMList->next) - endASMList = endASMList->next; - } else - endASMList = NULL; - /* walk through io, stripe by stripe */ - for (i = 0, asm_p = asmap; asm_p; asm_p = asm_p->next, i++) - if (stripeFuncs[i] == NULL) { - numStripeUnits = asm_p->numStripeUnitsAccessed; - /* walk through stripe, stripe unit by - * stripe unit */ - for (j = 0, physPtr = asm_p->physInfo; physPtr; physPtr = physPtr->next, j++) { - if (stripeUnitFuncs[stripeNum][j] == NULL) { - numBlocks = physPtr->numSector; - /* walk through stripe - * unit, block by - * block */ - for (k = 0; k < numBlocks; k++) - if (dag_h->asmList == NULL) { - dag_h->asmList = asmh_b[stripeUnitNum][k]; - endASMList = dag_h->asmList; - } else { - endASMList->next = asmh_b[stripeUnitNum][k]; - endASMList = endASMList->next; - } - RF_Free(asmh_b[stripeUnitNum], numBlocks * sizeof(RF_AccessStripeMapHeader_t *)); - RF_Free(blockFuncs[stripeUnitNum], numBlocks * sizeof(RF_VoidFuncPtr)); - stripeUnitNum++; - } - if (dag_h->asmList == NULL) { - dag_h->asmList = asmh_u[stripeNum][j]; - endASMList = dag_h->asmList; - } else { - endASMList->next = asmh_u[stripeNum][j]; - endASMList = endASMList->next; - } - } - RF_Free(asmh_u[stripeNum], numStripeUnits * sizeof(RF_AccessStripeMapHeader_t *)); - RF_Free(stripeUnitFuncs[stripeNum], numStripeUnits * sizeof(RF_VoidFuncPtr)); - stripeNum++; - } - RF_ASSERT(stripeNum == numStripesBailed); - RF_Free(stripeUnitFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr)); - RF_Free(asmh_u, asm_h->numStripes * sizeof(RF_AccessStripeMapHeader_t **)); - if (numStripeUnitsBailed > 0) { - RF_ASSERT(stripeUnitNum == numStripeUnitsBailed); - RF_Free(blockFuncs, raidPtr->Layout.numDataCol * asm_h->numStripes * sizeof(RF_VoidFuncPtr)); - RF_Free(asmh_b, raidPtr->Layout.numDataCol * asm_h->numStripes * sizeof(RF_AccessStripeMapHeader_t **)); - } - } - return (0); - } -} diff --git a/sys/dev/raidframe/rf_aselect.h b/sys/dev/raidframe/rf_aselect.h deleted file mode 100644 index de9cd76..0000000 --- a/sys/dev/raidframe/rf_aselect.h +++ /dev/null @@ -1,43 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_aselect.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************** - * - * aselect.h -- header file for algorithm selection code - * - *****************************************************************************/ - -#ifndef _RF__RF_ASELECT_H_ -#define _RF__RF_ASELECT_H_ - -#include <dev/raidframe/rf_desc.h> - -int rf_SelectAlgorithm(RF_RaidAccessDesc_t * desc, RF_RaidAccessFlags_t flags); - -#endif /* !_RF__RF_ASELECT_H_ */ diff --git a/sys/dev/raidframe/rf_bsd.h b/sys/dev/raidframe/rf_bsd.h deleted file mode 100644 index 14c10f5..0000000 --- a/sys/dev/raidframe/rf_bsd.h +++ /dev/null @@ -1,152 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_netbsd.h,v 1.12 2000/05/28 22:53:49 oster Exp $ */ - -/*- - * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. - * All rights reserved. - * - * This code is derived from software contributed to The NetBSD Foundation - * by Greg Oster; Jason R. Thorpe. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. - * 4. Neither the name of The NetBSD Foundation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _RF__RF_BSD_H_ -#define _RF__RF_BSD_H_ - -#ifdef _KERNEL -#include <sys/fcntl.h> -#include <sys/systm.h> -#include <sys/namei.h> -#include <sys/vnode.h> -#include "opt_raid.h" - -#ifdef RAID_DEBUG -#define rf_printf(lvl, fmt, args...) \ - do { \ - if (lvl <= RAID_DEBUG) printf(fmt, ##args); \ - } while(0) - -#else /* DEBUG */ -#define rf_printf(lvl, fmt, args...) { } -#endif /* DEBUG */ -#endif /* _KERNEL */ - -/* The per-component label information that the user can set */ -typedef struct RF_ComponentInfo_s { - int row; /* the row number of this component */ - int column; /* the column number of this component */ - int serial_number; /* a user-specified serial number for this - RAID set */ -} RF_ComponentInfo_t; - -/* The per-component label information */ -typedef struct RF_ComponentLabel_s { - int version; /* The version of this label. */ - int serial_number; /* a user-specified serial number for this - RAID set */ - int mod_counter; /* modification counter. Changed (usually - by incrementing) every time the label - is changed */ - int row; /* the row number of this component */ - int column; /* the column number of this component */ - int num_rows; /* number of rows in this RAID set */ - int num_columns; /* number of columns in this RAID set */ - int clean; /* 1 when clean, 0 when dirty */ - int status; /* rf_ds_optimal, rf_ds_dist_spared, whatever. */ - /* stuff that will be in version 2 of the label */ - int sectPerSU; /* Sectors per Stripe Unit */ - int SUsPerPU; /* Stripe Units per Parity Units */ - int SUsPerRU; /* Stripe Units per Reconstruction Units */ - int parityConfig; /* '0' == RAID0, '1' == RAID1, etc. */ - int maxOutstanding; /* maxOutstanding disk requests */ - int blockSize; /* size of component block. - (disklabel->d_secsize) */ - int numBlocks; /* number of blocks on this component. May - be smaller than the partition size. */ - int partitionSize; /* number of blocks on this *partition*. - Must exactly match the partition size - from the disklabel. */ - int future_use[33]; /* Future expansion */ - int autoconfigure; /* automatically configure this RAID set. - 0 == no, 1 == yes */ - int root_partition; /* Use this set as / - 0 == no, 1 == yes*/ - int last_unit; /* last unit number (e.g. 0 for /dev/raid0) - of this component. Used for autoconfigure - only. */ - int config_order; /* 0 .. n. The order in which the component - should be auto-configured. E.g. 0 is will - done first, (and would become raid0). - This may be in conflict with last_unit!!?! */ - /* Not currently used. */ - int future_use2[44]; /* More future expansion */ -} RF_ComponentLabel_t; - -typedef struct RF_SingleComponent_s { - int row; - int column; - char component_name[50]; /* name of the component */ -} RF_SingleComponent_t; - -#ifdef _KERNEL - -struct raidcinfo { - struct vnode *ci_vp; /* component device's vnode */ - dev_t ci_dev; /* component device's dev_t */ - RF_ComponentLabel_t ci_label; /* components RAIDframe label */ -#if 0 - size_t ci_size; /* size */ - char *ci_path; /* path to component */ - size_t ci_pathlen; /* length of component path */ -#endif -}; - - - -/* XXX probably belongs in a different .h file. */ -typedef struct RF_AutoConfig_s { - char devname[56]; /* the name of this component */ - int flag; /* a general-purpose flag */ - dev_t dev; /* the device for this component */ - struct vnode *vp; /* Mr. Vnode Pointer */ - RF_ComponentLabel_t *clabel; /* the label */ - struct RF_AutoConfig_s *next; /* the next autoconfig structure - in this set. */ -} RF_AutoConfig_t; - -typedef struct RF_ConfigSet_s { - struct RF_AutoConfig_s *ac; /* all of the autoconfig structures for - this config set. */ - int rootable; /* Set to 1 if this set can be root */ - struct RF_ConfigSet_s *next; -} RF_ConfigSet_t; - -#endif /* _KERNEL */ -#endif /* _RF__RF_BSD_H_ */ diff --git a/sys/dev/raidframe/rf_callback.c b/sys/dev/raidframe/rf_callback.c deleted file mode 100644 index 1739fc6..0000000 --- a/sys/dev/raidframe/rf_callback.c +++ /dev/null @@ -1,96 +0,0 @@ -/* $NetBSD: rf_callback.c,v 1.3 1999/02/05 00:06:06 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************************** - * - * callback.c -- code to manipulate callback descriptor - * - ****************************************************************************************/ - - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_callback.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_freelist.h> -#include <dev/raidframe/rf_shutdown.h> - -static RF_FreeList_t *rf_callback_freelist; - -#define RF_MAX_FREE_CALLBACK 64 -#define RF_CALLBACK_INC 4 -#define RF_CALLBACK_INITIAL 4 - -static void rf_ShutdownCallback(void *); -static void -rf_ShutdownCallback(ignored) - void *ignored; -{ - RF_FREELIST_DESTROY(rf_callback_freelist, next, (RF_CallbackDesc_t *)); -} - -int -rf_ConfigureCallback(listp) - RF_ShutdownList_t **listp; -{ - int rc; - - RF_FREELIST_CREATE(rf_callback_freelist, RF_MAX_FREE_CALLBACK, - RF_CALLBACK_INC, sizeof(RF_CallbackDesc_t)); - if (rf_callback_freelist == NULL) - return (ENOMEM); - rc = rf_ShutdownCreate(listp, rf_ShutdownCallback, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownCallback(NULL); - return (rc); - } - RF_FREELIST_PRIME(rf_callback_freelist, RF_CALLBACK_INITIAL, next, - (RF_CallbackDesc_t *)); - return (0); -} - -RF_CallbackDesc_t * -rf_AllocCallbackDesc() -{ - RF_CallbackDesc_t *p; - - RF_FREELIST_GET(rf_callback_freelist, p, next, (RF_CallbackDesc_t *)); - return (p); -} - -void -rf_FreeCallbackDesc(p) - RF_CallbackDesc_t *p; -{ - RF_FREELIST_FREE(rf_callback_freelist, p, next); -} diff --git a/sys/dev/raidframe/rf_callback.h b/sys/dev/raidframe/rf_callback.h deleted file mode 100644 index feda31d..0000000 --- a/sys/dev/raidframe/rf_callback.h +++ /dev/null @@ -1,65 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_callback.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************************** - * - * callback.h -- header file for callback.c - * - * the reconstruction code must manage concurrent I/Os on multiple drives. - * it sometimes needs to suspend operation on a particular drive until some - * condition occurs. we can't block the thread, of course, or we wouldn't - * be able to manage our other outstanding I/Os. Instead we just suspend - * new activity on the indicated disk, and create a callback descriptor and - * put it someplace where it will get invoked when the condition that's - * stalling us has cleared. When the descriptor is invoked, it will call - * a function that will restart operation on the indicated disk. - * - ****************************************************************************************/ - -#ifndef _RF__RF_CALLBACK_H_ -#define _RF__RF_CALLBACK_H_ - -#include <dev/raidframe/rf_types.h> - -struct RF_CallbackDesc_s { - void (*callbackFunc) (RF_CBParam_t); /* function to call */ - RF_CBParam_t callbackArg; /* args to give to function, or just - * info about this callback */ - RF_CBParam_t callbackArg2; - RF_RowCol_t row; /* disk row and column IDs to give to the - * callback func */ - RF_RowCol_t col; - RF_CallbackDesc_t *next;/* next entry in list */ -}; - -int rf_ConfigureCallback(RF_ShutdownList_t ** listp); -RF_CallbackDesc_t *rf_AllocCallbackDesc(void); -void rf_FreeCallbackDesc(RF_CallbackDesc_t * p); - -#endif /* !_RF__RF_CALLBACK_H_ */ diff --git a/sys/dev/raidframe/rf_chaindecluster.c b/sys/dev/raidframe/rf_chaindecluster.c deleted file mode 100644 index 68951a0..0000000 --- a/sys/dev/raidframe/rf_chaindecluster.c +++ /dev/null @@ -1,292 +0,0 @@ -/* $NetBSD: rf_chaindecluster.c,v 1.6 2001/01/26 04:27:16 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Khalil Amiri - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/****************************************************************************** - * - * rf_chaindecluster.c -- implements chained declustering - * - *****************************************************************************/ - -#include <dev/raidframe/rf_archs.h> - -#if (RF_INCLUDE_CHAINDECLUSTER > 0) - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_chaindecluster.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagffrd.h> -#include <dev/raidframe/rf_dagffwr.h> -#include <dev/raidframe/rf_dagdegrd.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_utils.h> - -typedef struct RF_ChaindeclusterConfigInfo_s { - RF_RowCol_t **stripeIdentifier; /* filled in at config time and used - * by IdentifyStripe */ - RF_StripeCount_t numSparingRegions; - RF_StripeCount_t stripeUnitsPerSparingRegion; - RF_SectorNum_t mirrorStripeOffset; -} RF_ChaindeclusterConfigInfo_t; - -int -rf_ConfigureChainDecluster( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_StripeCount_t num_used_stripeUnitsPerDisk; - RF_ChaindeclusterConfigInfo_t *info; - RF_RowCol_t i; - - /* create a Chained Declustering configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_ChaindeclusterConfigInfo_t), (RF_ChaindeclusterConfigInfo_t *), raidPtr->cleanupList); - if (info == NULL) - return (ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - - /* fill in the config structure. */ - info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, 2, raidPtr->cleanupList); - if (info->stripeIdentifier == NULL) - return (ENOMEM); - for (i = 0; i < raidPtr->numCol; i++) { - info->stripeIdentifier[i][0] = i % raidPtr->numCol; - info->stripeIdentifier[i][1] = (i + 1) % raidPtr->numCol; - } - - RF_ASSERT(raidPtr->numRow == 1); - - /* fill in the remaining layout parameters */ - num_used_stripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk - (layoutPtr->stripeUnitsPerDisk % - (2 * raidPtr->numCol - 2)); - info->numSparingRegions = num_used_stripeUnitsPerDisk / (2 * raidPtr->numCol - 2); - info->stripeUnitsPerSparingRegion = raidPtr->numCol * (raidPtr->numCol - 1); - info->mirrorStripeOffset = info->numSparingRegions * (raidPtr->numCol - 1); - layoutPtr->numStripe = info->numSparingRegions * info->stripeUnitsPerSparingRegion; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = 1; - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numParityCol = 1; - - layoutPtr->dataStripeUnitsPerDisk = num_used_stripeUnitsPerDisk; - - raidPtr->sectorsPerDisk = - num_used_stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; - - raidPtr->totalSectors = - (layoutPtr->numStripe) * layoutPtr->sectorsPerStripeUnit; - - layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit; - - return (0); -} - -RF_ReconUnitCount_t -rf_GetNumSpareRUsChainDecluster(raidPtr) - RF_Raid_t *raidPtr; -{ - RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - - /* - * The layout uses two stripe units per disk as spare within each - * sparing region. - */ - return (2 * info->numSparingRegions); -} - - -/* Maps to the primary copy of the data, i.e. the first mirror pair */ -void -rf_MapSectorChainDecluster( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - RF_SectorNum_t index_within_region, index_within_disk; - RF_StripeNum_t sparing_region_id; - int col_before_remap; - - *row = 0; - sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; - index_within_region = SUID % info->stripeUnitsPerSparingRegion; - index_within_disk = index_within_region / raidPtr->numCol; - col_before_remap = SUID % raidPtr->numCol; - - if (!remap) { - *col = col_before_remap; - *diskSector = (index_within_disk + ((raidPtr->numCol - 1) * sparing_region_id)) * - raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - } else { - /* remap sector to spare space... */ - *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidPtr->numCol - 1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - index_within_disk = index_within_region / raidPtr->numCol; - if (index_within_disk < col_before_remap) - *col = index_within_disk; - else - if (index_within_disk == raidPtr->numCol - 2) { - *col = (col_before_remap + raidPtr->numCol - 1) % raidPtr->numCol; - *diskSector += raidPtr->Layout.sectorsPerStripeUnit; - } else - *col = (index_within_disk + 2) % raidPtr->numCol; - } - -} - - - -/* Maps to the second copy of the mirror pair, which is chain declustered. The second copy is contained - in the next disk (mod numCol) after the disk containing the primary copy. - The offset into the disk is one-half disk down */ -void -rf_MapParityChainDecluster( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - RF_SectorNum_t index_within_region, index_within_disk; - RF_StripeNum_t sparing_region_id; - int col_before_remap; - - *row = 0; - if (!remap) { - *col = SUID % raidPtr->numCol; - *col = (*col + 1) % raidPtr->numCol; - *diskSector = info->mirrorStripeOffset * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (SUID / raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - } else { - /* remap parity to spare space ... */ - sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; - index_within_region = SUID % info->stripeUnitsPerSparingRegion; - index_within_disk = index_within_region / raidPtr->numCol; - *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - col_before_remap = SUID % raidPtr->numCol; - if (index_within_disk < col_before_remap) - *col = index_within_disk; - else - if (index_within_disk == raidPtr->numCol - 2) { - *col = (col_before_remap + 2) % raidPtr->numCol; - *diskSector -= raidPtr->Layout.sectorsPerStripeUnit; - } else - *col = (index_within_disk + 2) % raidPtr->numCol; - } - -} - -void -rf_IdentifyStripeChainDecluster( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) -{ - RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - RF_StripeNum_t SUID; - RF_RowCol_t col; - - SUID = addr / raidPtr->Layout.sectorsPerStripeUnit; - col = SUID % raidPtr->numCol; - *outRow = 0; - *diskids = info->stripeIdentifier[col]; -} - -void -rf_MapSIDToPSIDChainDecluster( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru) -{ - *which_ru = 0; - *psID = stripeID; -} -/****************************************************************************** - * select a graph to perform a single-stripe access - * - * Parameters: raidPtr - description of the physical array - * type - type of operation (read or write) requested - * asmap - logical & physical addresses for this access - * createFunc - function to use to create the graph (return value) - *****************************************************************************/ - -void -rf_RAIDCDagSelect( - RF_Raid_t * raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t * asmap, - RF_VoidFuncPtr * createFunc) -#if 0 - void (**createFunc) (RF_Raid_t *, RF_AccessStripeMap_t *, - RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, - RF_AllocListElem_t *) -#endif -{ - RF_ASSERT(RF_IO_IS_R_OR_W(type)); - RF_ASSERT(raidPtr->numRow == 1); - - if (asmap->numDataFailed + asmap->numParityFailed > 1) { - RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); - *createFunc = NULL; - return; - } - *createFunc = (type == RF_IO_TYPE_READ) ? (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG : (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; - - if (type == RF_IO_TYPE_READ) { - if ((raidPtr->status[0] == rf_rs_degraded) || (raidPtr->status[0] == rf_rs_reconstructing)) - *createFunc = (RF_VoidFuncPtr) rf_CreateRaidCDegradedReadDAG; /* array status is - * degraded, implement - * workload shifting */ - else - *createFunc = (RF_VoidFuncPtr) rf_CreateMirrorPartitionReadDAG; /* array status not - * degraded, so use - * mirror partition dag */ - } else - *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; -} -#endif /* (RF_INCLUDE_CHAINDECLUSTER > 0) */ diff --git a/sys/dev/raidframe/rf_chaindecluster.h b/sys/dev/raidframe/rf_chaindecluster.h deleted file mode 100644 index 6030289..0000000 --- a/sys/dev/raidframe/rf_chaindecluster.h +++ /dev/null @@ -1,68 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_chaindecluster.h,v 1.4 2001/01/26 04:14:14 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Khalil Amiri - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_chaindecluster.h - * header file for Chained Declustering - */ - - -#ifndef _RF__RF_CHAINDECLUSTER_H_ -#define _RF__RF_CHAINDECLUSTER_H_ - -int -rf_ConfigureChainDecluster(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -RF_ReconUnitCount_t rf_GetNumSpareRUsChainDecluster(RF_Raid_t * raidPtr); -void -rf_MapSectorChainDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapParityChainDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_IdentifyStripeChainDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -void -rf_MapSIDToPSIDChainDecluster(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru); -void -rf_RAIDCDagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, - RF_VoidFuncPtr *); -#if 0 -void (**createFunc) (RF_Raid_t *, - RF_AccessStripeMap_t *, - RF_DagHeader_t *, - void *, - RF_RaidAccessFlags_t, - RF_AllocListElem_t *); -#endif - -#endif /* !_RF__RF_CHAINDECLUSTER_H_ */ diff --git a/sys/dev/raidframe/rf_configure.h b/sys/dev/raidframe/rf_configure.h deleted file mode 100644 index c51b8a3..0000000 --- a/sys/dev/raidframe/rf_configure.h +++ /dev/null @@ -1,99 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_configure.h,v 1.4 1999/03/02 03:18:49 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/******************************** - * - * rf_configure.h - * - * header file for raidframe configuration in the kernel version only. - * configuration is invoked via ioctl rather than at boot time - * - *******************************/ - - -#ifndef _RF__RF_CONFIGURE_H_ -#define _RF__RF_CONFIGURE_H_ - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_types.h> - -#include <sys/param.h> -#include <sys/proc.h> - -#if defined(__NetBSD__) -#include <sys/ioctl.h> -#elif defined(__FreeBSD__) -#include <sys/ioccom.h> -#include <sys/filio.h> -#endif - -/* the raidframe configuration, passed down through an ioctl. - * the driver can be reconfigured (with total loss of data) at any time, - * but it must be shut down first. - */ -struct RF_Config_s { - RF_RowCol_t numRow, numCol, numSpare; /* number of rows, columns, - * and spare disks */ - dev_t devs[RF_MAXROW][RF_MAXCOL]; /* device numbers for disks - * comprising array */ - char devnames[RF_MAXROW][RF_MAXCOL][50]; /* device names */ - dev_t spare_devs[RF_MAXSPARE]; /* device numbers for spare - * disks */ - char spare_names[RF_MAXSPARE][50]; /* device names */ - RF_SectorNum_t sectPerSU; /* sectors per stripe unit */ - RF_StripeNum_t SUsPerPU;/* stripe units per parity unit */ - RF_StripeNum_t SUsPerRU;/* stripe units per reconstruction unit */ - RF_ParityConfig_t parityConfig; /* identifies the RAID architecture to - * be used */ - RF_DiskQueueType_t diskQueueType; /* 'f' = fifo, 'c' = cvscan, - * not used in kernel */ - char maxOutstandingDiskReqs; /* # concurrent reqs to be sent to a - * disk. not used in kernel. */ - char debugVars[RF_MAXDBGV][50]; /* space for specifying debug - * variables & their values */ - unsigned int layoutSpecificSize; /* size in bytes of - * layout-specific info */ - void *layoutSpecific; /* a pointer to a layout-specific structure to - * be copied in */ - int force; /* if !0, ignore many fatal - configuration conditions */ - /* - "force" is used to override cases where the component labels would - indicate that configuration should not proceed without user - intervention - */ -}; -#ifndef _KERNEL -int rf_MakeConfig(char *configname, RF_Config_t * cfgPtr); -int rf_MakeLayoutSpecificNULL(FILE * fp, RF_Config_t * cfgPtr, void *arg); -int rf_MakeLayoutSpecificDeclustered(FILE * configfp, RF_Config_t * cfgPtr, void *arg); -void *rf_ReadSpareTable(RF_SparetWait_t * req, char *fname); -#endif /* !_KERNEL */ - -#endif /* !_RF__RF_CONFIGURE_H_ */ diff --git a/sys/dev/raidframe/rf_copyback.c b/sys/dev/raidframe/rf_copyback.c deleted file mode 100644 index eb16404..0000000 --- a/sys/dev/raidframe/rf_copyback.c +++ /dev/null @@ -1,433 +0,0 @@ -/* $NetBSD: rf_copyback.c,v 1.15 2001/01/26 02:16:24 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************************** - * - * copyback.c -- code to copy reconstructed data back from spare space to - * the replaced disk. - * - * the code operates using callbacks on the I/Os to continue with the next - * unit to be copied back. We do this because a simple loop containing blocking I/Os - * will not work in the simulator. - * - ****************************************************************************************/ - -#include <dev/raidframe/rf_types.h> - -#if defined(__FreeBSD__) -#include <sys/types.h> -#include <sys/systm.h> -#if __FreeBSD_version > 500005 -#include <sys/bio.h> -#endif -#endif - -#include <sys/time.h> -#include <sys/buf.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_mcpair.h> -#include <dev/raidframe/rf_acctrace.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_utils.h> -#include <dev/raidframe/rf_copyback.h> -#include <dev/raidframe/rf_decluster.h> -#include <dev/raidframe/rf_driver.h> -#include <dev/raidframe/rf_shutdown.h> -#include <dev/raidframe/rf_kintf.h> - -#define RF_COPYBACK_DATA 0 -#define RF_COPYBACK_PARITY 1 - -int rf_copyback_in_progress; - -static int rf_CopybackReadDoneProc(RF_CopybackDesc_t * desc, int status); -static int rf_CopybackWriteDoneProc(RF_CopybackDesc_t * desc, int status); -static void rf_CopybackOne(RF_CopybackDesc_t * desc, int typ, - RF_RaidAddr_t addr, RF_RowCol_t testRow, - RF_RowCol_t testCol, - RF_SectorNum_t testOffs); -static void rf_CopybackComplete(RF_CopybackDesc_t * desc, int status); - -int -rf_ConfigureCopyback(listp) - RF_ShutdownList_t **listp; -{ - rf_copyback_in_progress = 0; - return (0); -} -#include <sys/types.h> -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/proc.h> -#if defined(__NetBSD__) -#include <sys/ioctl.h> -#elif defined(__FreeBSD__) -#include <sys/ioccom.h> -#include <sys/filio.h> -#endif -#include <sys/fcntl.h> -#include <sys/vnode.h> - -/* do a complete copyback */ -void -rf_CopybackReconstructedData(raidPtr) - RF_Raid_t *raidPtr; -{ - RF_ComponentLabel_t *c_label; - int done, retcode; - RF_CopybackDesc_t *desc; - RF_RowCol_t frow, fcol; - RF_RaidDisk_t *badDisk; - struct vnode *vp; - char *databuf; - int ac; - - RF_Malloc(c_label, sizeof(RF_ComponentLabel_t), (RF_ComponentLabel_t *)); - if (c_label == NULL) { - printf("rf_CopybackReconstructedData: Out of memory?\n"); - return; - } - - done = 0; - fcol = 0; - for (frow = 0; frow < raidPtr->numRow; frow++) { - for (fcol = 0; fcol < raidPtr->numCol; fcol++) { - if (raidPtr->Disks[frow][fcol].status == rf_ds_dist_spared - || raidPtr->Disks[frow][fcol].status == rf_ds_spared) { - done = 1; - break; - } - } - if (done) - break; - } - - if (frow == raidPtr->numRow) { - printf("COPYBACK: no disks need copyback\n"); - return; - } - badDisk = &raidPtr->Disks[frow][fcol]; - - /* This device may have been opened successfully the first time. Close - * it before trying to open it again.. */ - - if (raidPtr->raid_cinfo[frow][fcol].ci_vp != NULL) { - printf("Closed the open device: %s\n", - raidPtr->Disks[frow][fcol].devname); - vp = raidPtr->raid_cinfo[frow][fcol].ci_vp; - ac = raidPtr->Disks[frow][fcol].auto_configured; - rf_close_component(raidPtr, vp, ac); - raidPtr->raid_cinfo[frow][fcol].ci_vp = NULL; - - } - /* note that this disk was *not* auto_configured (any longer) */ - raidPtr->Disks[frow][fcol].auto_configured = 0; - - printf("About to (re-)open the device: %s\n", - raidPtr->Disks[frow][fcol].devname); - - retcode = raid_getcomponentsize(raidPtr, frow, fcol); - - if (retcode) { - printf("COPYBACK: raidlookup on device: %s failed: %d!\n", - raidPtr->Disks[frow][fcol].devname, retcode); - - /* XXX the component isn't responding properly... must be - * still dead :-( */ - return; - - } -#if 0 - /* This is the way it was done before the CAM stuff was removed */ - - if (rf_extract_ids(badDisk->devname, &bus, &targ, &lun)) { - printf("COPYBACK: unable to extract bus, target, lun from devname %s\n", - badDisk->devname); - return; - } - /* TUR the disk that's marked as bad to be sure that it's actually - * alive */ - rf_SCSI_AllocTUR(&tur_op); - retcode = rf_SCSI_DoTUR(tur_op, bus, targ, lun, badDisk->dev); - rf_SCSI_FreeDiskOp(tur_op, 0); -#endif - - if (retcode) { - printf("COPYBACK: target disk failed TUR\n"); - return; - } - /* get a buffer to hold one SU */ - RF_Malloc(databuf, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (char *)); - - /* create a descriptor */ - RF_Malloc(desc, sizeof(*desc), (RF_CopybackDesc_t *)); - desc->raidPtr = raidPtr; - desc->status = 0; - desc->frow = frow; - desc->fcol = fcol; - desc->spRow = badDisk->spareRow; - desc->spCol = badDisk->spareCol; - desc->stripeAddr = 0; - desc->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; - desc->sectPerStripe = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.numDataCol; - desc->databuf = databuf; - desc->mcpair = rf_AllocMCPair(); - - printf("COPYBACK: Quiescing the array\n"); - /* quiesce the array, since we don't want to code support for user - * accs here */ - rf_SuspendNewRequestsAndWait(raidPtr); - - /* adjust state of the array and of the disks */ - RF_LOCK_MUTEX(raidPtr->mutex); - raidPtr->Disks[desc->frow][desc->fcol].status = rf_ds_optimal; - raidPtr->status[desc->frow] = rf_rs_optimal; - rf_copyback_in_progress = 1; /* debug only */ - RF_UNLOCK_MUTEX(raidPtr->mutex); - - printf("COPYBACK: Beginning\n"); - RF_GETTIME(desc->starttime); - rf_ContinueCopyback(desc); - - /* Data has been restored. Fix up the component label. */ - /* Don't actually need the read here.. */ - raidread_component_label( raidPtr->raid_cinfo[frow][fcol].ci_dev, - raidPtr->raid_cinfo[frow][fcol].ci_vp, - c_label); - - raid_init_component_label( raidPtr, c_label ); - - c_label->row = frow; - c_label->column = fcol; - c_label->partitionSize = raidPtr->Disks[frow][fcol].partitionSize; - - raidwrite_component_label( raidPtr->raid_cinfo[frow][fcol].ci_dev, - raidPtr->raid_cinfo[frow][fcol].ci_vp, - c_label); - RF_Free(c_label, sizeof(RF_ComponentLabel_t)); -} - - -/* - * invoked via callback after a copyback I/O has completed to - * continue on with the next one - */ -void -rf_ContinueCopyback(desc) - RF_CopybackDesc_t *desc; -{ - RF_SectorNum_t testOffs, stripeAddr; - RF_Raid_t *raidPtr = desc->raidPtr; - RF_RaidAddr_t addr; - RF_RowCol_t testRow, testCol; - int old_pctg, new_pctg, done; - struct timeval t, diff; - - old_pctg = (-1); - while (1) { - stripeAddr = desc->stripeAddr; - desc->raidPtr->copyback_stripes_done = stripeAddr - / desc->sectPerStripe; - if (rf_prReconSched) { - old_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors; - } - desc->stripeAddr += desc->sectPerStripe; - if (rf_prReconSched) { - new_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors; - if (new_pctg != old_pctg) { - RF_GETTIME(t); - RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff); - printf("%d %d.%06d\n", new_pctg, (int) diff.tv_sec, (int) diff.tv_usec); - } - } - if (stripeAddr >= raidPtr->totalSectors) { - rf_CopybackComplete(desc, 0); - return; - } - /* walk through the current stripe, su-by-su */ - for (done = 0, addr = stripeAddr; addr < stripeAddr + desc->sectPerStripe; addr += desc->sectPerSU) { - - /* map the SU, disallowing remap to spare space */ - (raidPtr->Layout.map->MapSector) (raidPtr, addr, &testRow, &testCol, &testOffs, RF_DONT_REMAP); - - if (testRow == desc->frow && testCol == desc->fcol) { - rf_CopybackOne(desc, RF_COPYBACK_DATA, addr, testRow, testCol, testOffs); - done = 1; - break; - } - } - - if (!done) { - /* we didn't find the failed disk in the data part. - * check parity. */ - - /* map the parity for this stripe, disallowing remap - * to spare space */ - (raidPtr->Layout.map->MapParity) (raidPtr, stripeAddr, &testRow, &testCol, &testOffs, RF_DONT_REMAP); - - if (testRow == desc->frow && testCol == desc->fcol) { - rf_CopybackOne(desc, RF_COPYBACK_PARITY, stripeAddr, testRow, testCol, testOffs); - } - } - /* check to see if the last read/write pair failed */ - if (desc->status) { - rf_CopybackComplete(desc, 1); - return; - } - /* we didn't find any units to copy back in this stripe. - * Continue with the next one */ - } -} - - -/* copyback one unit */ -static void -rf_CopybackOne(desc, typ, addr, testRow, testCol, testOffs) - RF_CopybackDesc_t *desc; - int typ; - RF_RaidAddr_t addr; - RF_RowCol_t testRow; - RF_RowCol_t testCol; - RF_SectorNum_t testOffs; -{ - RF_SectorCount_t sectPerSU = desc->sectPerSU; - RF_Raid_t *raidPtr = desc->raidPtr; - RF_RowCol_t spRow = desc->spRow; - RF_RowCol_t spCol = desc->spCol; - RF_SectorNum_t spOffs; - - /* find the spare spare location for this SU */ - if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { - if (typ == RF_COPYBACK_DATA) - raidPtr->Layout.map->MapSector(raidPtr, addr, &spRow, &spCol, &spOffs, RF_REMAP); - else - raidPtr->Layout.map->MapParity(raidPtr, addr, &spRow, &spCol, &spOffs, RF_REMAP); - } else { - spOffs = testOffs; - } - - /* create reqs to read the old location & write the new */ - desc->readreq = rf_CreateDiskQueueData(RF_IO_TYPE_READ, spOffs, - sectPerSU, desc->databuf, 0L, 0, - (int (*) (void *, int)) rf_CopybackReadDoneProc, desc, - NULL, NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL); - desc->writereq = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, testOffs, - sectPerSU, desc->databuf, 0L, 0, - (int (*) (void *, int)) rf_CopybackWriteDoneProc, desc, - NULL, NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL); - desc->frow = testRow; - desc->fcol = testCol; - - /* enqueue the read. the write will go out as part of the callback on - * the read. at user-level & in the kernel, wait for the read-write - * pair to complete. in the simulator, just return, since everything - * will happen as callbacks */ - - RF_LOCK_MUTEX(desc->mcpair->mutex); - desc->mcpair->flag = 0; - - rf_DiskIOEnqueue(&raidPtr->Queues[spRow][spCol], desc->readreq, RF_IO_NORMAL_PRIORITY); - - while (!desc->mcpair->flag) { - RF_WAIT_MCPAIR(desc->mcpair); - } - RF_UNLOCK_MUTEX(desc->mcpair->mutex); - rf_FreeDiskQueueData(desc->readreq); - rf_FreeDiskQueueData(desc->writereq); - -} - - -/* called at interrupt context when the read has completed. just send out the write */ -static int -rf_CopybackReadDoneProc(desc, status) - RF_CopybackDesc_t *desc; - int status; -{ - if (status) { /* invoke the callback with bad status */ - printf("COPYBACK: copyback read failed. Aborting.\n"); - (desc->writereq->CompleteFunc) (desc, -100); - } else { - rf_DiskIOEnqueue(&(desc->raidPtr->Queues[desc->frow][desc->fcol]), desc->writereq, RF_IO_NORMAL_PRIORITY); - } - return (0); -} -/* called at interrupt context when the write has completed. - * at user level & in the kernel, wake up the copyback thread. - * in the simulator, invoke the next copyback directly. - * can't free diskqueuedata structs in the kernel b/c we're at interrupt context. - */ -static int -rf_CopybackWriteDoneProc(desc, status) - RF_CopybackDesc_t *desc; - int status; -{ - if (status && status != -100) { - printf("COPYBACK: copyback write failed. Aborting.\n"); - } - desc->status = status; - rf_MCPairWakeupFunc(desc->mcpair); - return (0); -} -/* invoked when the copyback has completed */ -static void -rf_CopybackComplete(desc, status) - RF_CopybackDesc_t *desc; - int status; -{ - RF_Raid_t *raidPtr = desc->raidPtr; - struct timeval t, diff; - - if (!status) { - RF_LOCK_MUTEX(raidPtr->mutex); - if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { - RF_ASSERT(raidPtr->Layout.map->parityConfig == 'D'); - rf_FreeSpareTable(raidPtr); - } else { - raidPtr->Disks[desc->spRow][desc->spCol].status = rf_ds_spare; - } - RF_UNLOCK_MUTEX(raidPtr->mutex); - - RF_GETTIME(t); - RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff); - printf("Copyback time was %d.%06d seconds\n", - (int) diff.tv_sec, (int) diff.tv_usec); - } else - printf("COPYBACK: Failure.\n"); - - RF_Free(desc->databuf, rf_RaidAddressToByte(raidPtr, desc->sectPerSU)); - rf_FreeMCPair(desc->mcpair); - RF_Free(desc, sizeof(*desc)); - - rf_copyback_in_progress = 0; - rf_ResumeNewRequests(raidPtr); -} diff --git a/sys/dev/raidframe/rf_copyback.h b/sys/dev/raidframe/rf_copyback.h deleted file mode 100644 index 67da842..0000000 --- a/sys/dev/raidframe/rf_copyback.h +++ /dev/null @@ -1,61 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_copyback.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */ -/* - * rf_copyback.h - */ -/* - * Copyright (c) 1996 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_COPYBACK_H_ -#define _RF__RF_COPYBACK_H_ - -#include <dev/raidframe/rf_types.h> - -typedef struct RF_CopybackDesc_s { - RF_Raid_t *raidPtr; - RF_RowCol_t frow; - RF_RowCol_t fcol; - RF_RowCol_t spRow; - RF_RowCol_t spCol; - int status; - RF_StripeNum_t stripeAddr; - RF_SectorCount_t sectPerSU; - RF_SectorCount_t sectPerStripe; - char *databuf; - RF_DiskQueueData_t *readreq; - RF_DiskQueueData_t *writereq; - struct timeval starttime; - RF_MCPair_t *mcpair; -} RF_CopybackDesc_t; - -extern int rf_copyback_in_progress; - -int rf_ConfigureCopyback(RF_ShutdownList_t ** listp); -void rf_CopybackReconstructedData(RF_Raid_t * raidPtr); -void rf_ContinueCopyback(RF_CopybackDesc_t * desc); - -#endif /* !_RF__RF_COPYBACK_H_ */ diff --git a/sys/dev/raidframe/rf_cvscan.c b/sys/dev/raidframe/rf_cvscan.c deleted file mode 100644 index b7c1026..0000000 --- a/sys/dev/raidframe/rf_cvscan.c +++ /dev/null @@ -1,441 +0,0 @@ -/* $NetBSD: rf_cvscan.c,v 1.5 1999/08/13 03:41:53 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/******************************************************************************* - * - * cvscan.c -- prioritized cvscan disk queueing code. - * - * Nov 9, 1994, adapted from raidSim version (MCH) - * - ******************************************************************************/ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_stripelocks.h> -#include <dev/raidframe/rf_layout.h> -#include <dev/raidframe/rf_diskqueue.h> -#include <dev/raidframe/rf_cvscan.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_general.h> - -#define DO_CHECK_STATE(_hdr_) CheckCvscanState((_hdr_), __FILE__, __LINE__) - -#define pri_ok(p) ( ((p) == RF_IO_NORMAL_PRIORITY) || ((p) == RF_IO_LOW_PRIORITY)) - -static void -CheckCvscanState(RF_CvscanHeader_t * hdr, char *file, int line) -{ - long i, key; - RF_DiskQueueData_t *tmp; - - if (hdr->left != (RF_DiskQueueData_t *) NULL) - RF_ASSERT(hdr->left->sectorOffset < hdr->cur_block); - for (key = hdr->cur_block, i = 0, tmp = hdr->left; - tmp != (RF_DiskQueueData_t *) NULL; - key = tmp->sectorOffset, i++, tmp = tmp->next) - RF_ASSERT(tmp->sectorOffset <= key - && tmp->priority == hdr->nxt_priority && pri_ok(tmp->priority)); - RF_ASSERT(i == hdr->left_cnt); - - for (key = hdr->cur_block, i = 0, tmp = hdr->right; - tmp != (RF_DiskQueueData_t *) NULL; - key = tmp->sectorOffset, i++, tmp = tmp->next) { - RF_ASSERT(key <= tmp->sectorOffset); - RF_ASSERT(tmp->priority == hdr->nxt_priority); - RF_ASSERT(pri_ok(tmp->priority)); - } - RF_ASSERT(i == hdr->right_cnt); - - for (key = hdr->nxt_priority - 1, tmp = hdr->burner; - tmp != (RF_DiskQueueData_t *) NULL; - key = tmp->priority, tmp = tmp->next) { - RF_ASSERT(tmp); - RF_ASSERT(hdr); - RF_ASSERT(pri_ok(tmp->priority)); - RF_ASSERT(key >= tmp->priority); - RF_ASSERT(tmp->priority < hdr->nxt_priority); - } -} - - - -static void -PriorityInsert(RF_DiskQueueData_t ** list_ptr, RF_DiskQueueData_t * req) -{ - /* * insert block pointed to by req in to list whose first * entry is - * pointed to by the pointer that list_ptr points to * ie., list_ptr - * is a grandparent of the first entry */ - - for (; (*list_ptr) != (RF_DiskQueueData_t *) NULL && - (*list_ptr)->priority > req->priority; - list_ptr = &((*list_ptr)->next)) { - } - req->next = (*list_ptr); - (*list_ptr) = req; -} - - - -static void -ReqInsert(RF_DiskQueueData_t ** list_ptr, RF_DiskQueueData_t * req, RF_CvscanArmDir_t order) -{ - /* * insert block pointed to by req in to list whose first * entry is - * pointed to by the pointer that list_ptr points to * ie., list_ptr - * is a grandparent of the first entry */ - - for (; (*list_ptr) != (RF_DiskQueueData_t *) NULL && - - ((order == rf_cvscan_RIGHT && (*list_ptr)->sectorOffset <= req->sectorOffset) - || (order == rf_cvscan_LEFT && (*list_ptr)->sectorOffset > req->sectorOffset)); - list_ptr = &((*list_ptr)->next)) { - } - req->next = (*list_ptr); - (*list_ptr) = req; -} - - - -static RF_DiskQueueData_t * -ReqDequeue(RF_DiskQueueData_t ** list_ptr) -{ - RF_DiskQueueData_t *ret = (*list_ptr); - if ((*list_ptr) != (RF_DiskQueueData_t *) NULL) { - (*list_ptr) = (*list_ptr)->next; - } - return (ret); -} - - - -static void -ReBalance(RF_CvscanHeader_t * hdr) -{ - /* DO_CHECK_STATE(hdr); */ - while (hdr->right != (RF_DiskQueueData_t *) NULL - && hdr->right->sectorOffset < hdr->cur_block) { - hdr->right_cnt--; - hdr->left_cnt++; - ReqInsert(&hdr->left, ReqDequeue(&hdr->right), rf_cvscan_LEFT); - } - /* DO_CHECK_STATE(hdr); */ -} - - - -static void -Transfer(RF_DiskQueueData_t ** to_list_ptr, RF_DiskQueueData_t ** from_list_ptr) -{ - RF_DiskQueueData_t *gp; - for (gp = (*from_list_ptr); gp != (RF_DiskQueueData_t *) NULL;) { - RF_DiskQueueData_t *p = gp->next; - PriorityInsert(to_list_ptr, gp); - gp = p; - } - (*from_list_ptr) = (RF_DiskQueueData_t *) NULL; -} - - - -static void -RealEnqueue(RF_CvscanHeader_t * hdr, RF_DiskQueueData_t * req) -{ - RF_ASSERT(req->priority == RF_IO_NORMAL_PRIORITY || req->priority == RF_IO_LOW_PRIORITY); - - DO_CHECK_STATE(hdr); - if (hdr->left_cnt == 0 && hdr->right_cnt == 0) { - hdr->nxt_priority = req->priority; - } - if (req->priority > hdr->nxt_priority) { - /* - ** dump all other outstanding requests on the back burner - */ - Transfer(&hdr->burner, &hdr->left); - Transfer(&hdr->burner, &hdr->right); - hdr->left_cnt = 0; - hdr->right_cnt = 0; - hdr->nxt_priority = req->priority; - } - if (req->priority < hdr->nxt_priority) { - /* - ** yet another low priority task! - */ - PriorityInsert(&hdr->burner, req); - } else { - if (req->sectorOffset < hdr->cur_block) { - /* this request is to the left of the current arms */ - ReqInsert(&hdr->left, req, rf_cvscan_LEFT); - hdr->left_cnt++; - } else { - /* this request is to the right of the current arms */ - ReqInsert(&hdr->right, req, rf_cvscan_RIGHT); - hdr->right_cnt++; - } - } - DO_CHECK_STATE(hdr); -} - - - -void -rf_CvscanEnqueue(void *q_in, RF_DiskQueueData_t * elem, int priority) -{ - RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in; - RealEnqueue(hdr, elem /* req */ ); -} - - - -RF_DiskQueueData_t * -rf_CvscanDequeue(void *q_in) -{ - RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in; - long range, i, sum_dist_left, sum_dist_right; - RF_DiskQueueData_t *ret; - RF_DiskQueueData_t *tmp; - - DO_CHECK_STATE(hdr); - - if (hdr->left_cnt == 0 && hdr->right_cnt == 0) - return ((RF_DiskQueueData_t *) NULL); - - range = RF_MIN(hdr->range_for_avg, RF_MIN(hdr->left_cnt, hdr->right_cnt)); - for (i = 0, tmp = hdr->left, sum_dist_left = - ((hdr->direction == rf_cvscan_RIGHT) ? range * hdr->change_penalty : 0); - tmp != (RF_DiskQueueData_t *) NULL && i < range; - tmp = tmp->next, i++) { - sum_dist_left += hdr->cur_block - tmp->sectorOffset; - } - for (i = 0, tmp = hdr->right, sum_dist_right = - ((hdr->direction == rf_cvscan_LEFT) ? range * hdr->change_penalty : 0); - tmp != (RF_DiskQueueData_t *) NULL && i < range; - tmp = tmp->next, i++) { - sum_dist_right += tmp->sectorOffset - hdr->cur_block; - } - - if (hdr->right_cnt == 0 || sum_dist_left < sum_dist_right) { - hdr->direction = rf_cvscan_LEFT; - hdr->cur_block = hdr->left->sectorOffset + hdr->left->numSector; - hdr->left_cnt = RF_MAX(hdr->left_cnt - 1, 0); - tmp = hdr->left; - ret = (ReqDequeue(&hdr->left)) /*->parent*/ ; - } else { - hdr->direction = rf_cvscan_RIGHT; - hdr->cur_block = hdr->right->sectorOffset + hdr->right->numSector; - hdr->right_cnt = RF_MAX(hdr->right_cnt - 1, 0); - tmp = hdr->right; - ret = (ReqDequeue(&hdr->right)) /*->parent*/ ; - } - ReBalance(hdr); - - if (hdr->left_cnt == 0 && hdr->right_cnt == 0 - && hdr->burner != (RF_DiskQueueData_t *) NULL) { - /* - ** restore low priority requests for next dequeue - */ - RF_DiskQueueData_t *burner = hdr->burner; - hdr->nxt_priority = burner->priority; - while (burner != (RF_DiskQueueData_t *) NULL - && burner->priority == hdr->nxt_priority) { - RF_DiskQueueData_t *next = burner->next; - RealEnqueue(hdr, burner); - burner = next; - } - hdr->burner = burner; - } - DO_CHECK_STATE(hdr); - return (ret); -} - - - -RF_DiskQueueData_t * -rf_CvscanPeek(void *q_in) -{ - RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in; - long range, i, sum_dist_left, sum_dist_right; - RF_DiskQueueData_t *tmp, *headElement; - - DO_CHECK_STATE(hdr); - - if (hdr->left_cnt == 0 && hdr->right_cnt == 0) - headElement = NULL; - else { - range = RF_MIN(hdr->range_for_avg, RF_MIN(hdr->left_cnt, hdr->right_cnt)); - for (i = 0, tmp = hdr->left, sum_dist_left = - ((hdr->direction == rf_cvscan_RIGHT) ? range * hdr->change_penalty : 0); - tmp != (RF_DiskQueueData_t *) NULL && i < range; - tmp = tmp->next, i++) { - sum_dist_left += hdr->cur_block - tmp->sectorOffset; - } - for (i = 0, tmp = hdr->right, sum_dist_right = - ((hdr->direction == rf_cvscan_LEFT) ? range * hdr->change_penalty : 0); - tmp != (RF_DiskQueueData_t *) NULL && i < range; - tmp = tmp->next, i++) { - sum_dist_right += tmp->sectorOffset - hdr->cur_block; - } - - if (hdr->right_cnt == 0 || sum_dist_left < sum_dist_right) - headElement = hdr->left; - else - headElement = hdr->right; - } - return (headElement); -} - - - -/* -** CVSCAN( 1, 0 ) is Shortest Seek Time First (SSTF) -** lowest average response time -** CVSCAN( 1, infinity ) is SCAN -** lowest response time standard deviation -*/ - - -int -rf_CvscanConfigure() -{ - return (0); -} - - - -void * -rf_CvscanCreate(RF_SectorCount_t sectPerDisk, - RF_AllocListElem_t * clList, - RF_ShutdownList_t ** listp) -{ - RF_CvscanHeader_t *hdr; - long range = 2; /* Currently no mechanism to change these */ - long penalty = sectPerDisk / 5; - - RF_MallocAndAdd(hdr, sizeof(RF_CvscanHeader_t), (RF_CvscanHeader_t *), clList); - bzero((char *) hdr, sizeof(RF_CvscanHeader_t)); - hdr->range_for_avg = RF_MAX(range, 1); - hdr->change_penalty = RF_MAX(penalty, 0); - hdr->direction = rf_cvscan_RIGHT; - hdr->cur_block = 0; - hdr->left_cnt = hdr->right_cnt = 0; - hdr->left = hdr->right = (RF_DiskQueueData_t *) NULL; - hdr->burner = (RF_DiskQueueData_t *) NULL; - DO_CHECK_STATE(hdr); - - return ((void *) hdr); -} - - -#if defined(__NetBSD__) || defined(__FreeBSD__) && defined(_KERNEL) -/* PrintCvscanQueue is not used, so we ignore it... */ -#else -static void -PrintCvscanQueue(RF_CvscanHeader_t * hdr) -{ - RF_DiskQueueData_t *tmp; - - printf("CVSCAN(%d,%d) at %d going %s\n", - (int) hdr->range_for_avg, - (int) hdr->change_penalty, - (int) hdr->cur_block, - (hdr->direction == rf_cvscan_LEFT) ? "LEFT" : "RIGHT"); - printf("\tLeft(%d): ", hdr->left_cnt); - for (tmp = hdr->left; tmp != (RF_DiskQueueData_t *) NULL; tmp = tmp->next) - printf("(%d,%ld,%d) ", - (int) tmp->sectorOffset, - (long) (tmp->sectorOffset + tmp->numSector), - tmp->priority); - printf("\n"); - printf("\tRight(%d): ", hdr->right_cnt); - for (tmp = hdr->right; tmp != (RF_DiskQueueData_t *) NULL; tmp = tmp->next) - printf("(%d,%ld,%d) ", - (int) tmp->sectorOffset, - (long) (tmp->sectorOffset + tmp->numSector), - tmp->priority); - printf("\n"); - printf("\tBurner: "); - for (tmp = hdr->burner; tmp != (RF_DiskQueueData_t *) NULL; tmp = tmp->next) - printf("(%d,%ld,%d) ", - (int) tmp->sectorOffset, - (long) (tmp->sectorOffset + tmp->numSector), - tmp->priority); - printf("\n"); -} -#endif - - -/* promotes reconstruction accesses for the given stripeID to normal priority. - * returns 1 if an access was found and zero otherwise. Normally, we should - * only have one or zero entries in the burner queue, so execution time should - * be short. - */ -int -rf_CvscanPromote(void *q_in, RF_StripeNum_t parityStripeID, RF_ReconUnitNum_t which_ru) -{ - RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in; - RF_DiskQueueData_t *trailer = NULL, *tmp = hdr->burner, *tlist = NULL; - int retval = 0; - - DO_CHECK_STATE(hdr); - while (tmp) { /* handle entries at the front of the list */ - if (tmp->parityStripeID == parityStripeID && tmp->which_ru == which_ru) { - hdr->burner = tmp->next; - tmp->priority = RF_IO_NORMAL_PRIORITY; - tmp->next = tlist; - tlist = tmp; - tmp = hdr->burner; - } else - break; - } - if (tmp) { - trailer = tmp; - tmp = tmp->next; - } - while (tmp) { /* handle entries on the rest of the list */ - if (tmp->parityStripeID == parityStripeID && tmp->which_ru == which_ru) { - trailer->next = tmp->next; - tmp->priority = RF_IO_NORMAL_PRIORITY; - tmp->next = tlist; - tlist = tmp; /* insert on a temp queue */ - tmp = trailer->next; - } else { - trailer = tmp; - tmp = tmp->next; - } - } - while (tlist) { - retval++; - tmp = tlist->next; - RealEnqueue(hdr, tlist); - tlist = tmp; - } - RF_ASSERT(retval == 0 || retval == 1); - DO_CHECK_STATE((RF_CvscanHeader_t *) q_in); - return (retval); -} diff --git a/sys/dev/raidframe/rf_cvscan.h b/sys/dev/raidframe/rf_cvscan.h deleted file mode 100644 index 7f536a8..0000000 --- a/sys/dev/raidframe/rf_cvscan.h +++ /dev/null @@ -1,85 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_cvscan.h,v 1.3 1999/02/05 00:06:07 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* -** Disk scheduling by CVSCAN( N, r ) -** -** Given a set of requests, partition them into one set on each -** side of the current arm position. The trick is to pick which -** side you are going to service next; once a side is picked you will -** service the closest request. -** Let there be n1 requests on one side and n2 requests on the other -** side. If one of n1 or n2 is zero, select the other side. -** If both n1 and n2 are nonzero, select a "range" for examination -** that is N' = min( n1, n2, N ). Average the distance from the -** current position to the nearest N' requests on each side giving -** d1 and d2. -** Suppose the last decision was to move toward set 2, then the -** current direction is toward set 2, and you will only switch to set -** 1 if d1+R < d2 where R is r*(total number of cylinders), r in [0,1]. -** -** I extend this by applying only to the set of requests that all -** share the same, highest priority level. -*/ - -#ifndef _RF__RF_CVSCAN_H_ -#define _RF__RF_CVSCAN_H_ - -#include <dev/raidframe/rf_diskqueue.h> - -typedef enum RF_CvscanArmDir_e { - rf_cvscan_LEFT, - rf_cvscan_RIGHT -} RF_CvscanArmDir_t; - -typedef struct RF_CvscanHeader_s { - long range_for_avg; /* CVSCAN param N */ - long change_penalty; /* CVSCAN param R */ - RF_CvscanArmDir_t direction; - RF_SectorNum_t cur_block; - int nxt_priority; - RF_DiskQueueData_t *left; - int left_cnt; - RF_DiskQueueData_t *right; - int right_cnt; - RF_DiskQueueData_t *burner; -} RF_CvscanHeader_t; - -int rf_CvscanConfigure(void); -void * -rf_CvscanCreate(RF_SectorCount_t sect_per_disk, - RF_AllocListElem_t * cl_list, RF_ShutdownList_t ** listp); -void rf_CvscanEnqueue(void *qptr, RF_DiskQueueData_t * req, int priority); -RF_DiskQueueData_t *rf_CvscanDequeue(void *qptr); -RF_DiskQueueData_t *rf_CvscanPeek(void *qptr); -int -rf_CvscanPromote(void *qptr, RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru); - -#endif /* !_RF__RF_CVSCAN_H_ */ diff --git a/sys/dev/raidframe/rf_dag.h b/sys/dev/raidframe/rf_dag.h deleted file mode 100644 index 15cd4a8..0000000 --- a/sys/dev/raidframe/rf_dag.h +++ /dev/null @@ -1,239 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_dag.h,v 1.3 1999/02/05 00:06:07 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: William V. Courtright II, Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/**************************************************************************** - * * - * dag.h -- header file for DAG-related data structures * - * * - ****************************************************************************/ - -#ifndef _RF__RF_DAG_H_ -#define _RF__RF_DAG_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_stripelocks.h> -#include <dev/raidframe/rf_layout.h> -#include <dev/raidframe/rf_dagflags.h> -#include <dev/raidframe/rf_acctrace.h> -#include <dev/raidframe/rf_memchunk.h> - -#define RF_THREAD_CONTEXT 0 /* we were invoked from thread context */ -#define RF_INTR_CONTEXT 1 /* we were invoked from interrupt context */ -#define RF_MAX_ANTECEDENTS 20 /* max num of antecedents a node may posses */ - -#if defined(__FreeBSD__) && __FreeBSD_version > 500005 -#include <sys/bio.h> -#endif -#include <sys/buf.h> - -struct RF_PropHeader_s { /* structure for propagation of results */ - int resultNum; /* bind result # resultNum */ - int paramNum; /* to parameter # paramNum */ - RF_PropHeader_t *next; /* linked list for multiple results/params */ -}; - -typedef enum RF_NodeStatus_e { - rf_bwd1, /* node is ready for undo logging (backward - * error recovery only) */ - rf_bwd2, /* node has completed undo logging (backward - * error recovery only) */ - rf_wait, /* node is waiting to be executed */ - rf_fired, /* node is currently executing its do function */ - rf_good, /* node successfully completed execution of - * its do function */ - rf_bad, /* node failed to successfully execute its do - * function */ - rf_skipped, /* not used anymore, used to imply a node was - * not executed */ - rf_recover, /* node is currently executing its undo - * function */ - rf_panic, /* node failed to successfully execute its - * undo function */ - rf_undone /* node successfully executed its undo - * function */ -} RF_NodeStatus_t; -/* - * These were used to control skipping a node. - * Now, these are only used as comments. - */ -typedef enum RF_AntecedentType_e { - rf_trueData, - rf_antiData, - rf_outputData, - rf_control -} RF_AntecedentType_t; -#define RF_DAG_PTRCACHESIZE 40 -#define RF_DAG_PARAMCACHESIZE 12 - -typedef RF_uint8 RF_DagNodeFlags_t; - -struct RF_DagNode_s { - RF_NodeStatus_t status; /* current status of this node */ - int (*doFunc) (RF_DagNode_t *); /* normal function */ - int (*undoFunc) (RF_DagNode_t *); /* func to remove effect of - * doFunc */ - int (*wakeFunc) (RF_DagNode_t *, int status); /* func called when the - * node completes an I/O */ - int numParams; /* number of parameters required by *funcPtr */ - int numResults; /* number of results produced by *funcPtr */ - int numAntecedents; /* number of antecedents */ - int numAntDone; /* number of antecedents which have finished */ - int numSuccedents; /* number of succedents */ - int numSuccFired; /* incremented when a succedent is fired - * during forward execution */ - int numSuccDone; /* incremented when a succedent finishes - * during rollBackward */ - int commitNode; /* boolean flag - if true, this is a commit - * node */ - RF_DagNode_t **succedents; /* succedents, array size - * numSuccedents */ - RF_DagNode_t **antecedents; /* antecedents, array size - * numAntecedents */ - RF_AntecedentType_t antType[RF_MAX_ANTECEDENTS]; /* type of each - * antecedent */ - void **results; /* array of results produced by *funcPtr */ - RF_DagParam_t *params; /* array of parameters required by *funcPtr */ - RF_PropHeader_t **propList; /* propagation list, size - * numSuccedents */ - RF_DagHeader_t *dagHdr; /* ptr to head of dag containing this node */ - void *dagFuncData; /* dag execution func uses this for whatever - * it wants */ - RF_DagNode_t *next; - int nodeNum; /* used by PrintDAG for debug only */ - int visited; /* used to avoid re-visiting nodes on DAG - * walks */ - /* ANY CODE THAT USES THIS FIELD MUST MAINTAIN THE PROPERTY THAT AFTER - * IT FINISHES, ALL VISITED FLAGS IN THE DAG ARE IDENTICAL */ - char *name; /* debug only */ - RF_DagNodeFlags_t flags;/* see below */ - RF_DagNode_t *dag_ptrs[RF_DAG_PTRCACHESIZE]; /* cache for performance */ - RF_DagParam_t dag_params[RF_DAG_PARAMCACHESIZE]; /* cache for performance */ -}; -/* - * Bit values for flags field of RF_DagNode_t - */ -#define RF_DAGNODE_FLAG_NONE 0x00 -#define RF_DAGNODE_FLAG_YIELD 0x01 /* in the kernel, yield the processor - * before firing this node */ - -/* enable - DAG ready for normal execution, no errors encountered - * rollForward - DAG encountered an error after commit point, rolling forward - * rollBackward - DAG encountered an error prior to commit point, rolling backward - */ -typedef enum RF_DagStatus_e { - rf_enable, - rf_rollForward, - rf_rollBackward -} RF_DagStatus_t; -#define RF_MAX_HDR_SUCC 1 - -#define RF_MAXCHUNKS 10 - -struct RF_DagHeader_s { - RF_DagStatus_t status; /* status of this DAG */ - int numSuccedents; /* DAG may be a tree, i.e. may have > 1 root */ - int numCommitNodes; /* number of commit nodes in graph */ - int numCommits; /* number of commit nodes which have been - * fired */ - RF_DagNode_t *succedents[RF_MAX_HDR_SUCC]; /* array of succedents, - * size numSuccedents */ - RF_DagHeader_t *next; /* ptr to allow a list of dags */ - RF_AllocListElem_t *allocList; /* ptr to list of ptrs to be freed - * prior to freeing DAG */ - RF_AccessStripeMapHeader_t *asmList; /* list of access stripe maps - * to be freed */ - int nodeNum; /* used by PrintDAG for debug only */ - int numNodesCompleted; - RF_AccTraceEntry_t *tracerec; /* perf mon only */ - - void (*cbFunc) (void *); /* function to call when the dag - * completes */ - void *cbArg; /* argument for cbFunc */ - char *creator; /* name of function used to create this dag */ - - RF_Raid_t *raidPtr; /* the descriptor for the RAID device this DAG - * is for */ - void *bp; /* the bp for this I/O passed down from the - * file system. ignored outside kernel */ - - RF_ChunkDesc_t *memChunk[RF_MAXCHUNKS]; /* experimental- Chunks of - * memory to be retained upon - * DAG free for re-use */ - int chunkIndex; /* the idea is to avoid calls to alloc and - * free */ - - RF_ChunkDesc_t **xtraMemChunk; /* escape hatch which allows - * SelectAlgorithm to merge memChunks - * from several dags */ - int xtraChunkIndex; /* number of ptrs to valid chunks */ - int xtraChunkCnt; /* number of ptrs to chunks allocated */ - -}; - -struct RF_DagList_s { - /* common info for a list of dags which will be fired sequentially */ - int numDags; /* number of dags in the list */ - int numDagsFired; /* number of dags in list which have initiated - * execution */ - int numDagsDone; /* number of dags in list which have completed - * execution */ - RF_DagHeader_t *dags; /* list of dags */ - RF_RaidAccessDesc_t *desc; /* ptr to descriptor for this access */ - RF_AccTraceEntry_t tracerec; /* perf mon info for dags (not user - * info) */ -}; -/* resets a node so that it can be fired again */ -#define RF_ResetNode(_n_) { \ - (_n_)->status = rf_wait; \ - (_n_)->numAntDone = 0; \ - (_n_)->numSuccFired = 0; \ - (_n_)->numSuccDone = 0; \ - (_n_)->next = NULL; \ -} - -#define RF_ResetDagHeader(_h_) { \ - (_h_)->numNodesCompleted = 0; \ - (_h_)->numCommits = 0; \ - (_h_)->status = rf_enable; \ -} - -/* convience macro for declaring a create dag function */ - -#define RF_CREATE_DAG_FUNC_DECL(_name_) \ -void _name_ ( \ - RF_Raid_t *raidPtr, \ - RF_AccessStripeMap_t *asmap, \ - RF_DagHeader_t *dag_h, \ - void *bp, \ - RF_RaidAccessFlags_t flags, \ - RF_AllocListElem_t *allocList) - -#endif /* !_RF__RF_DAG_H_ */ diff --git a/sys/dev/raidframe/rf_dagdegrd.c b/sys/dev/raidframe/rf_dagdegrd.c deleted file mode 100644 index 6321db6..0000000 --- a/sys/dev/raidframe/rf_dagdegrd.c +++ /dev/null @@ -1,1132 +0,0 @@ -/* $NetBSD: rf_dagdegrd.c,v 1.7 2001/01/26 14:06:16 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * rf_dagdegrd.c - * - * code for creating degraded read DAGs - */ - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_memchunk.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_dagdegrd.h> - - -/****************************************************************************** - * - * General comments on DAG creation: - * - * All DAGs in this file use roll-away error recovery. Each DAG has a single - * commit node, usually called "Cmt." If an error occurs before the Cmt node - * is reached, the execution engine will halt forward execution and work - * backward through the graph, executing the undo functions. Assuming that - * each node in the graph prior to the Cmt node are undoable and atomic - or - - * does not make changes to permanent state, the graph will fail atomically. - * If an error occurs after the Cmt node executes, the engine will roll-forward - * through the graph, blindly executing nodes until it reaches the end. - * If a graph reaches the end, it is assumed to have completed successfully. - * - * A graph has only 1 Cmt node. - * - */ - - -/****************************************************************************** - * - * The following wrappers map the standard DAG creation interface to the - * DAG creation routines. Additionally, these wrappers enable experimentation - * with new DAG structures by providing an extra level of indirection, allowing - * the DAG creation routines to be replaced at this single point. - */ - -void -rf_CreateRaidFiveDegradedReadDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) -{ - rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - &rf_xorRecoveryFuncs); -} - - -/****************************************************************************** - * - * DAG creation code begins here - */ - - -/****************************************************************************** - * Create a degraded read DAG for RAID level 1 - * - * Hdr -> Nil -> R(p/s)d -> Commit -> Trm - * - * The "Rd" node reads data from the surviving disk in the mirror pair - * Rpd - read of primary copy - * Rsd - read of secondary copy - * - * Parameters: raidPtr - description of the physical array - * asmap - logical & physical addresses for this access - * bp - buffer ptr (for holding write data) - * flags - general flags (e.g. disk locking) - * allocList - list of memory allocated in DAG creation - *****************************************************************************/ - -void -rf_CreateRaidOneDegradedReadDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) -{ - RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode; - RF_StripeNum_t parityStripeID; - RF_ReconUnitNum_t which_ru; - RF_PhysDiskAddr_t *pda; - int useMirror, i; - - useMirror = 0; - parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), - asmap->raidAddress, &which_ru); - if (rf_dagDebug) { - printf("[Creating RAID level 1 degraded read DAG]\n"); - } - dag_h->creator = "RaidOneDegradedReadDAG"; - /* alloc the Wnd nodes and the Wmir node */ - if (asmap->numDataFailed == 0) - useMirror = RF_FALSE; - else - useMirror = RF_TRUE; - - /* total number of nodes = 1 + (block + commit + terminator) */ - RF_CallocAndAdd(nodes, 4, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - i = 0; - rdNode = &nodes[i]; - i++; - blockNode = &nodes[i]; - i++; - commitNode = &nodes[i]; - i++; - termNode = &nodes[i]; - i++; - - /* this dag can not commit until the commit node is reached. errors - * prior to the commit point imply the dag has failed and must be - * retried */ - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* initialize the block, commit, and terminator nodes */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - - pda = asmap->physInfo; - RF_ASSERT(pda != NULL); - /* parityInfo must describe entire parity unit */ - RF_ASSERT(asmap->parityInfo->next == NULL); - - /* initialize the data node */ - if (!useMirror) { - /* read primary copy of data */ - rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rpd", allocList); - rdNode->params[0].p = pda; - rdNode->params[1].p = pda->bufPtr; - rdNode->params[2].v = parityStripeID; - rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } else { - /* read secondary copy of data */ - rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rsd", allocList); - rdNode->params[0].p = asmap->parityInfo; - rdNode->params[1].p = pda->bufPtr; - rdNode->params[2].v = parityStripeID; - rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - - /* connect header to block node */ - RF_ASSERT(dag_h->numSuccedents == 1); - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - /* connect block node to rdnode */ - RF_ASSERT(blockNode->numSuccedents == 1); - RF_ASSERT(rdNode->numAntecedents == 1); - blockNode->succedents[0] = rdNode; - rdNode->antecedents[0] = blockNode; - rdNode->antType[0] = rf_control; - - /* connect rdnode to commit node */ - RF_ASSERT(rdNode->numSuccedents == 1); - RF_ASSERT(commitNode->numAntecedents == 1); - rdNode->succedents[0] = commitNode; - commitNode->antecedents[0] = rdNode; - commitNode->antType[0] = rf_control; - - /* connect commit node to terminator */ - RF_ASSERT(commitNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - commitNode->succedents[0] = termNode; - termNode->antecedents[0] = commitNode; - termNode->antType[0] = rf_control; -} - - - -/****************************************************************************** - * - * creates a DAG to perform a degraded-mode read of data within one stripe. - * This DAG is as follows: - * - * Hdr -> Block -> Rud -> Xor -> Cmt -> T - * -> Rrd -> - * -> Rp --> - * - * Each R node is a successor of the L node - * One successor arc from each R node goes to C, and the other to X - * There is one Rud for each chunk of surviving user data requested by the - * user, and one Rrd for each chunk of surviving user data _not_ being read by - * the user - * R = read, ud = user data, rd = recovery (surviving) data, p = parity - * X = XOR, C = Commit, T = terminate - * - * The block node guarantees a single source node. - * - * Note: The target buffer for the XOR node is set to the actual user buffer - * where the failed data is supposed to end up. This buffer is zero'd by the - * code here. Thus, if you create a degraded read dag, use it, and then - * re-use, you have to be sure to zero the target buffer prior to the re-use. - * - * The recfunc argument at the end specifies the name and function used for - * the redundancy - * recovery function. - * - *****************************************************************************/ - -void -rf_CreateDegradedReadDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - RF_RedFuncs_t * recFunc) -{ - RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *xorNode, *blockNode; - RF_DagNode_t *commitNode, *rpNode, *termNode; - int nNodes, nRrdNodes, nRudNodes, nXorBufs, i; - int j, paramNum; - RF_SectorCount_t sectorsPerSU; - RF_ReconUnitNum_t which_ru; - char *overlappingPDAs;/* a temporary array of flags */ - RF_AccessStripeMapHeader_t *new_asm_h[2]; - RF_PhysDiskAddr_t *pda, *parityPDA; - RF_StripeNum_t parityStripeID; - RF_PhysDiskAddr_t *failedPDA; - RF_RaidLayout_t *layoutPtr; - char *rpBuf; - - layoutPtr = &(raidPtr->Layout); - /* failedPDA points to the pda within the asm that targets the failed - * disk */ - failedPDA = asmap->failedPDAs[0]; - parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, - asmap->raidAddress, &which_ru); - sectorsPerSU = layoutPtr->sectorsPerStripeUnit; - - if (rf_dagDebug) { - printf("[Creating degraded read DAG]\n"); - } - RF_ASSERT(asmap->numDataFailed == 1); - dag_h->creator = "DegradedReadDAG"; - - /* - * generate two ASMs identifying the surviving data we need - * in order to recover the lost data - */ - - /* overlappingPDAs array must be zero'd */ - RF_Calloc(overlappingPDAs, asmap->numStripeUnitsAccessed, sizeof(char), (char *)); - rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h, new_asm_h, &nXorBufs, - &rpBuf, overlappingPDAs, allocList); - - /* - * create all the nodes at once - * - * -1 because no access is generated for the failed pda - */ - nRudNodes = asmap->numStripeUnitsAccessed - 1; - nRrdNodes = ((new_asm_h[0]) ? new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) + - ((new_asm_h[1]) ? new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0); - nNodes = 5 + nRudNodes + nRrdNodes; /* lock, unlock, xor, Rp, Rud, - * Rrd */ - RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), - allocList); - i = 0; - blockNode = &nodes[i]; - i++; - commitNode = &nodes[i]; - i++; - xorNode = &nodes[i]; - i++; - rpNode = &nodes[i]; - i++; - termNode = &nodes[i]; - i++; - rudNodes = &nodes[i]; - i += nRudNodes; - rrdNodes = &nodes[i]; - i += nRrdNodes; - RF_ASSERT(i == nNodes); - - /* initialize nodes */ - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - /* this dag can not commit until the commit node is reached errors - * prior to the commit point imply the dag has failed */ - dag_h->numSuccedents = 1; - - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, nRudNodes + nRrdNodes + 1, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - rf_InitNode(xorNode, rf_wait, RF_FALSE, recFunc->simple, rf_NullNodeUndoFunc, - NULL, 1, nRudNodes + nRrdNodes + 1, 2 * nXorBufs + 2, 1, dag_h, - recFunc->SimpleName, allocList); - - /* fill in the Rud nodes */ - for (pda = asmap->physInfo, i = 0; i < nRudNodes; i++, pda = pda->next) { - if (pda == failedPDA) { - i--; - continue; - } - rf_InitNode(&rudNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, - rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, - "Rud", allocList); - RF_ASSERT(pda); - rudNodes[i].params[0].p = pda; - rudNodes[i].params[1].p = pda->bufPtr; - rudNodes[i].params[2].v = parityStripeID; - rudNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - - /* fill in the Rrd nodes */ - i = 0; - if (new_asm_h[0]) { - for (pda = new_asm_h[0]->stripeMap->physInfo; - i < new_asm_h[0]->stripeMap->numStripeUnitsAccessed; - i++, pda = pda->next) { - rf_InitNode(&rrdNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, - rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, - dag_h, "Rrd", allocList); - RF_ASSERT(pda); - rrdNodes[i].params[0].p = pda; - rrdNodes[i].params[1].p = pda->bufPtr; - rrdNodes[i].params[2].v = parityStripeID; - rrdNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - } - if (new_asm_h[1]) { - for (j = 0, pda = new_asm_h[1]->stripeMap->physInfo; - j < new_asm_h[1]->stripeMap->numStripeUnitsAccessed; - j++, pda = pda->next) { - rf_InitNode(&rrdNodes[i + j], rf_wait, RF_FALSE, rf_DiskReadFunc, - rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, - dag_h, "Rrd", allocList); - RF_ASSERT(pda); - rrdNodes[i + j].params[0].p = pda; - rrdNodes[i + j].params[1].p = pda->bufPtr; - rrdNodes[i + j].params[2].v = parityStripeID; - rrdNodes[i + j].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - } - /* make a PDA for the parity unit */ - RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - parityPDA->row = asmap->parityInfo->row; - parityPDA->col = asmap->parityInfo->col; - parityPDA->startSector = ((asmap->parityInfo->startSector / sectorsPerSU) - * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU); - parityPDA->numSector = failedPDA->numSector; - - /* initialize the Rp node */ - rf_InitNode(rpNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rp ", allocList); - rpNode->params[0].p = parityPDA; - rpNode->params[1].p = rpBuf; - rpNode->params[2].v = parityStripeID; - rpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - - /* - * the last and nastiest step is to assign all - * the parameters of the Xor node - */ - paramNum = 0; - for (i = 0; i < nRrdNodes; i++) { - /* all the Rrd nodes need to be xored together */ - xorNode->params[paramNum++] = rrdNodes[i].params[0]; - xorNode->params[paramNum++] = rrdNodes[i].params[1]; - } - for (i = 0; i < nRudNodes; i++) { - /* any Rud nodes that overlap the failed access need to be - * xored in */ - if (overlappingPDAs[i]) { - RF_MallocAndAdd(pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - bcopy((char *) rudNodes[i].params[0].p, (char *) pda, sizeof(RF_PhysDiskAddr_t)); - rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_DOBUFFER, 0); - xorNode->params[paramNum++].p = pda; - xorNode->params[paramNum++].p = pda->bufPtr; - } - } - RF_Free(overlappingPDAs, asmap->numStripeUnitsAccessed * sizeof(char)); - - /* install parity pda as last set of params to be xor'd */ - xorNode->params[paramNum++].p = parityPDA; - xorNode->params[paramNum++].p = rpBuf; - - /* - * the last 2 params to the recovery xor node are - * the failed PDA and the raidPtr - */ - xorNode->params[paramNum++].p = failedPDA; - xorNode->params[paramNum++].p = raidPtr; - RF_ASSERT(paramNum == 2 * nXorBufs + 2); - - /* - * The xor node uses results[0] as the target buffer. - * Set pointer and zero the buffer. In the kernel, this - * may be a user buffer in which case we have to remap it. - */ - xorNode->results[0] = failedPDA->bufPtr; - RF_BZERO(bp, failedPDA->bufPtr, rf_RaidAddressToByte(raidPtr, - failedPDA->numSector)); - - /* connect nodes to form graph */ - /* connect the header to the block node */ - RF_ASSERT(dag_h->numSuccedents == 1); - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - /* connect the block node to the read nodes */ - RF_ASSERT(blockNode->numSuccedents == (1 + nRrdNodes + nRudNodes)); - RF_ASSERT(rpNode->numAntecedents == 1); - blockNode->succedents[0] = rpNode; - rpNode->antecedents[0] = blockNode; - rpNode->antType[0] = rf_control; - for (i = 0; i < nRrdNodes; i++) { - RF_ASSERT(rrdNodes[i].numSuccedents == 1); - blockNode->succedents[1 + i] = &rrdNodes[i]; - rrdNodes[i].antecedents[0] = blockNode; - rrdNodes[i].antType[0] = rf_control; - } - for (i = 0; i < nRudNodes; i++) { - RF_ASSERT(rudNodes[i].numSuccedents == 1); - blockNode->succedents[1 + nRrdNodes + i] = &rudNodes[i]; - rudNodes[i].antecedents[0] = blockNode; - rudNodes[i].antType[0] = rf_control; - } - - /* connect the read nodes to the xor node */ - RF_ASSERT(xorNode->numAntecedents == (1 + nRrdNodes + nRudNodes)); - RF_ASSERT(rpNode->numSuccedents == 1); - rpNode->succedents[0] = xorNode; - xorNode->antecedents[0] = rpNode; - xorNode->antType[0] = rf_trueData; - for (i = 0; i < nRrdNodes; i++) { - RF_ASSERT(rrdNodes[i].numSuccedents == 1); - rrdNodes[i].succedents[0] = xorNode; - xorNode->antecedents[1 + i] = &rrdNodes[i]; - xorNode->antType[1 + i] = rf_trueData; - } - for (i = 0; i < nRudNodes; i++) { - RF_ASSERT(rudNodes[i].numSuccedents == 1); - rudNodes[i].succedents[0] = xorNode; - xorNode->antecedents[1 + nRrdNodes + i] = &rudNodes[i]; - xorNode->antType[1 + nRrdNodes + i] = rf_trueData; - } - - /* connect the xor node to the commit node */ - RF_ASSERT(xorNode->numSuccedents == 1); - RF_ASSERT(commitNode->numAntecedents == 1); - xorNode->succedents[0] = commitNode; - commitNode->antecedents[0] = xorNode; - commitNode->antType[0] = rf_control; - - /* connect the termNode to the commit node */ - RF_ASSERT(commitNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - commitNode->succedents[0] = termNode; - termNode->antType[0] = rf_control; - termNode->antecedents[0] = commitNode; -} - -#if (RF_INCLUDE_CHAINDECLUSTER > 0) -/****************************************************************************** - * Create a degraded read DAG for Chained Declustering - * - * Hdr -> Nil -> R(p/s)d -> Cmt -> Trm - * - * The "Rd" node reads data from the surviving disk in the mirror pair - * Rpd - read of primary copy - * Rsd - read of secondary copy - * - * Parameters: raidPtr - description of the physical array - * asmap - logical & physical addresses for this access - * bp - buffer ptr (for holding write data) - * flags - general flags (e.g. disk locking) - * allocList - list of memory allocated in DAG creation - *****************************************************************************/ - -void -rf_CreateRaidCDegradedReadDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) -{ - RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode; - RF_StripeNum_t parityStripeID; - int useMirror, i, shiftable; - RF_ReconUnitNum_t which_ru; - RF_PhysDiskAddr_t *pda; - - if ((asmap->numDataFailed + asmap->numParityFailed) == 0) { - shiftable = RF_TRUE; - } else { - shiftable = RF_FALSE; - } - useMirror = 0; - parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), - asmap->raidAddress, &which_ru); - - if (rf_dagDebug) { - printf("[Creating RAID C degraded read DAG]\n"); - } - dag_h->creator = "RaidCDegradedReadDAG"; - /* alloc the Wnd nodes and the Wmir node */ - if (asmap->numDataFailed == 0) - useMirror = RF_FALSE; - else - useMirror = RF_TRUE; - - /* total number of nodes = 1 + (block + commit + terminator) */ - RF_CallocAndAdd(nodes, 4, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - i = 0; - rdNode = &nodes[i]; - i++; - blockNode = &nodes[i]; - i++; - commitNode = &nodes[i]; - i++; - termNode = &nodes[i]; - i++; - - /* - * This dag can not commit until the commit node is reached. - * Errors prior to the commit point imply the dag has failed - * and must be retried. - */ - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* initialize the block, commit, and terminator nodes */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - - pda = asmap->physInfo; - RF_ASSERT(pda != NULL); - /* parityInfo must describe entire parity unit */ - RF_ASSERT(asmap->parityInfo->next == NULL); - - /* initialize the data node */ - if (!useMirror) { - rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rpd", allocList); - if (shiftable && rf_compute_workload_shift(raidPtr, pda)) { - /* shift this read to the next disk in line */ - rdNode->params[0].p = asmap->parityInfo; - rdNode->params[1].p = pda->bufPtr; - rdNode->params[2].v = parityStripeID; - rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } else { - /* read primary copy */ - rdNode->params[0].p = pda; - rdNode->params[1].p = pda->bufPtr; - rdNode->params[2].v = parityStripeID; - rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - } else { - /* read secondary copy of data */ - rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rsd", allocList); - rdNode->params[0].p = asmap->parityInfo; - rdNode->params[1].p = pda->bufPtr; - rdNode->params[2].v = parityStripeID; - rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - - /* connect header to block node */ - RF_ASSERT(dag_h->numSuccedents == 1); - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - /* connect block node to rdnode */ - RF_ASSERT(blockNode->numSuccedents == 1); - RF_ASSERT(rdNode->numAntecedents == 1); - blockNode->succedents[0] = rdNode; - rdNode->antecedents[0] = blockNode; - rdNode->antType[0] = rf_control; - - /* connect rdnode to commit node */ - RF_ASSERT(rdNode->numSuccedents == 1); - RF_ASSERT(commitNode->numAntecedents == 1); - rdNode->succedents[0] = commitNode; - commitNode->antecedents[0] = rdNode; - commitNode->antType[0] = rf_control; - - /* connect commit node to terminator */ - RF_ASSERT(commitNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - commitNode->succedents[0] = termNode; - termNode->antecedents[0] = commitNode; - termNode->antType[0] = rf_control; -} -#endif /* (RF_INCLUDE_CHAINDECLUSTER > 0) */ - -#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) -/* - * XXX move this elsewhere? - */ -void -rf_DD_GenerateFailedAccessASMs( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_PhysDiskAddr_t ** pdap, - int *nNodep, - RF_PhysDiskAddr_t ** pqpdap, - int *nPQNodep, - RF_AllocListElem_t * allocList) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - int PDAPerDisk, i; - RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; - int numDataCol = layoutPtr->numDataCol; - int state; - RF_SectorNum_t suoff, suend; - unsigned firstDataCol, napdas, count; - RF_SectorNum_t fone_start, fone_end, ftwo_start = 0, ftwo_end = 0; - RF_PhysDiskAddr_t *fone = asmap->failedPDAs[0], *ftwo = asmap->failedPDAs[1]; - RF_PhysDiskAddr_t *pda_p; - RF_PhysDiskAddr_t *phys_p; - RF_RaidAddr_t sosAddr; - - /* determine how many pda's we will have to generate per unaccess - * stripe. If there is only one failed data unit, it is one; if two, - * possibly two, depending wether they overlap. */ - - fone_start = rf_StripeUnitOffset(layoutPtr, fone->startSector); - fone_end = fone_start + fone->numSector; - -#define CONS_PDA(if,start,num) \ - pda_p->row = asmap->if->row; pda_p->col = asmap->if->col; \ - pda_p->startSector = ((asmap->if->startSector / secPerSU) * secPerSU) + start; \ - pda_p->numSector = num; \ - pda_p->next = NULL; \ - RF_MallocAndAdd(pda_p->bufPtr,rf_RaidAddressToByte(raidPtr,num),(char *), allocList) - - if (asmap->numDataFailed == 1) { - PDAPerDisk = 1; - state = 1; - RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - pda_p = *pqpdap; - /* build p */ - CONS_PDA(parityInfo, fone_start, fone->numSector); - pda_p->type = RF_PDA_TYPE_PARITY; - pda_p++; - /* build q */ - CONS_PDA(qInfo, fone_start, fone->numSector); - pda_p->type = RF_PDA_TYPE_Q; - } else { - ftwo_start = rf_StripeUnitOffset(layoutPtr, ftwo->startSector); - ftwo_end = ftwo_start + ftwo->numSector; - if (fone->numSector + ftwo->numSector > secPerSU) { - PDAPerDisk = 1; - state = 2; - RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - pda_p = *pqpdap; - CONS_PDA(parityInfo, 0, secPerSU); - pda_p->type = RF_PDA_TYPE_PARITY; - pda_p++; - CONS_PDA(qInfo, 0, secPerSU); - pda_p->type = RF_PDA_TYPE_Q; - } else { - PDAPerDisk = 2; - state = 3; - /* four of them, fone, then ftwo */ - RF_MallocAndAdd(*pqpdap, 4 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - pda_p = *pqpdap; - CONS_PDA(parityInfo, fone_start, fone->numSector); - pda_p->type = RF_PDA_TYPE_PARITY; - pda_p++; - CONS_PDA(qInfo, fone_start, fone->numSector); - pda_p->type = RF_PDA_TYPE_Q; - pda_p++; - CONS_PDA(parityInfo, ftwo_start, ftwo->numSector); - pda_p->type = RF_PDA_TYPE_PARITY; - pda_p++; - CONS_PDA(qInfo, ftwo_start, ftwo->numSector); - pda_p->type = RF_PDA_TYPE_Q; - } - } - /* figure out number of nonaccessed pda */ - napdas = PDAPerDisk * (numDataCol - asmap->numStripeUnitsAccessed - (ftwo == NULL ? 1 : 0)); - *nPQNodep = PDAPerDisk; - - /* sweep over the over accessed pda's, figuring out the number of - * additional pda's to generate. Of course, skip the failed ones */ - - count = 0; - for (pda_p = asmap->physInfo; pda_p; pda_p = pda_p->next) { - if ((pda_p == fone) || (pda_p == ftwo)) - continue; - suoff = rf_StripeUnitOffset(layoutPtr, pda_p->startSector); - suend = suoff + pda_p->numSector; - switch (state) { - case 1: /* one failed PDA to overlap */ - /* if a PDA doesn't contain the failed unit, it can - * only miss the start or end, not both */ - if ((suoff > fone_start) || (suend < fone_end)) - count++; - break; - case 2: /* whole stripe */ - if (suoff) /* leak at begining */ - count++; - if (suend < numDataCol) /* leak at end */ - count++; - break; - case 3: /* two disjoint units */ - if ((suoff > fone_start) || (suend < fone_end)) - count++; - if ((suoff > ftwo_start) || (suend < ftwo_end)) - count++; - break; - default: - RF_PANIC(); - } - } - - napdas += count; - *nNodep = napdas; - if (napdas == 0) - return; /* short circuit */ - - /* allocate up our list of pda's */ - - RF_CallocAndAdd(pda_p, napdas, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - *pdap = pda_p; - - /* linkem together */ - for (i = 0; i < (napdas - 1); i++) - pda_p[i].next = pda_p + (i + 1); - - /* march through the one's up to the first accessed disk */ - firstDataCol = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), asmap->physInfo->raidAddress) % numDataCol; - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - for (i = 0; i < firstDataCol; i++) { - if ((pda_p - (*pdap)) == napdas) - continue; - pda_p->type = RF_PDA_TYPE_DATA; - pda_p->raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - /* skip over dead disks */ - if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status)) - continue; - switch (state) { - case 1: /* fone */ - pda_p->numSector = fone->numSector; - pda_p->raidAddress += fone_start; - pda_p->startSector += fone_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - break; - case 2: /* full stripe */ - pda_p->numSector = secPerSU; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, secPerSU), (char *), allocList); - break; - case 3: /* two slabs */ - pda_p->numSector = fone->numSector; - pda_p->raidAddress += fone_start; - pda_p->startSector += fone_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - pda_p++; - pda_p->type = RF_PDA_TYPE_DATA; - pda_p->raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - pda_p->numSector = ftwo->numSector; - pda_p->raidAddress += ftwo_start; - pda_p->startSector += ftwo_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - break; - default: - RF_PANIC(); - } - pda_p++; - } - - /* march through the touched stripe units */ - for (phys_p = asmap->physInfo; phys_p; phys_p = phys_p->next, i++) { - if ((phys_p == asmap->failedPDAs[0]) || (phys_p == asmap->failedPDAs[1])) - continue; - suoff = rf_StripeUnitOffset(layoutPtr, phys_p->startSector); - suend = suoff + phys_p->numSector; - switch (state) { - case 1: /* single buffer */ - if (suoff > fone_start) { - RF_ASSERT(suend >= fone_end); - /* The data read starts after the mapped - * access, snip off the begining */ - pda_p->numSector = suoff - fone_start; - pda_p->raidAddress = sosAddr + (i * secPerSU) + fone_start; - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - pda_p++; - } - if (suend < fone_end) { - RF_ASSERT(suoff <= fone_start); - /* The data read stops before the end of the - * failed access, extend */ - pda_p->numSector = fone_end - suend; - pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - pda_p++; - } - break; - case 2: /* whole stripe unit */ - RF_ASSERT((suoff == 0) || (suend == secPerSU)); - if (suend < secPerSU) { /* short read, snip from end - * on */ - pda_p->numSector = secPerSU - suend; - pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - pda_p++; - } else - if (suoff > 0) { /* short at front */ - pda_p->numSector = suoff; - pda_p->raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - pda_p++; - } - break; - case 3: /* two nonoverlapping failures */ - if ((suoff > fone_start) || (suend < fone_end)) { - if (suoff > fone_start) { - RF_ASSERT(suend >= fone_end); - /* The data read starts after the - * mapped access, snip off the - * begining */ - pda_p->numSector = suoff - fone_start; - pda_p->raidAddress = sosAddr + (i * secPerSU) + fone_start; - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - pda_p++; - } - if (suend < fone_end) { - RF_ASSERT(suoff <= fone_start); - /* The data read stops before the end - * of the failed access, extend */ - pda_p->numSector = fone_end - suend; - pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - pda_p++; - } - } - if ((suoff > ftwo_start) || (suend < ftwo_end)) { - if (suoff > ftwo_start) { - RF_ASSERT(suend >= ftwo_end); - /* The data read starts after the - * mapped access, snip off the - * begining */ - pda_p->numSector = suoff - ftwo_start; - pda_p->raidAddress = sosAddr + (i * secPerSU) + ftwo_start; - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - pda_p++; - } - if (suend < ftwo_end) { - RF_ASSERT(suoff <= ftwo_start); - /* The data read stops before the end - * of the failed access, extend */ - pda_p->numSector = ftwo_end - suend; - pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - pda_p++; - } - } - break; - default: - RF_PANIC(); - } - } - - /* after the last accessed disk */ - for (; i < numDataCol; i++) { - if ((pda_p - (*pdap)) == napdas) - continue; - pda_p->type = RF_PDA_TYPE_DATA; - pda_p->raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - /* skip over dead disks */ - if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status)) - continue; - switch (state) { - case 1: /* fone */ - pda_p->numSector = fone->numSector; - pda_p->raidAddress += fone_start; - pda_p->startSector += fone_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - break; - case 2: /* full stripe */ - pda_p->numSector = secPerSU; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, secPerSU), (char *), allocList); - break; - case 3: /* two slabs */ - pda_p->numSector = fone->numSector; - pda_p->raidAddress += fone_start; - pda_p->startSector += fone_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - pda_p++; - pda_p->type = RF_PDA_TYPE_DATA; - pda_p->raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - pda_p->numSector = ftwo->numSector; - pda_p->raidAddress += ftwo_start; - pda_p->startSector += ftwo_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - break; - default: - RF_PANIC(); - } - pda_p++; - } - - RF_ASSERT(pda_p - *pdap == napdas); - return; -} -#define INIT_DISK_NODE(node,name) \ -rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 2,1,4,0, dag_h, name, allocList); \ -(node)->succedents[0] = unblockNode; \ -(node)->succedents[1] = recoveryNode; \ -(node)->antecedents[0] = blockNode; \ -(node)->antType[0] = rf_control - -#define DISK_NODE_PARAMS(_node_,_p_) \ - (_node_).params[0].p = _p_ ; \ - (_node_).params[1].p = (_p_)->bufPtr; \ - (_node_).params[2].v = parityStripeID; \ - (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru) - -void -rf_DoubleDegRead( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - char *redundantReadNodeName, - char *recoveryNodeName, - int (*recovFunc) (RF_DagNode_t *)) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *recoveryNode, *blockNode, - *unblockNode, *rpNodes, *rqNodes, *termNode; - RF_PhysDiskAddr_t *pda, *pqPDAs; - RF_PhysDiskAddr_t *npdas; - int nNodes, nRrdNodes, nRudNodes, i; - RF_ReconUnitNum_t which_ru; - int nReadNodes, nPQNodes; - RF_PhysDiskAddr_t *failedPDA = asmap->failedPDAs[0]; - RF_PhysDiskAddr_t *failedPDAtwo = asmap->failedPDAs[1]; - RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, &which_ru); - - if (rf_dagDebug) - printf("[Creating Double Degraded Read DAG]\n"); - rf_DD_GenerateFailedAccessASMs(raidPtr, asmap, &npdas, &nRrdNodes, &pqPDAs, &nPQNodes, allocList); - - nRudNodes = asmap->numStripeUnitsAccessed - (asmap->numDataFailed); - nReadNodes = nRrdNodes + nRudNodes + 2 * nPQNodes; - nNodes = 4 /* block, unblock, recovery, term */ + nReadNodes; - - RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - i = 0; - blockNode = &nodes[i]; - i += 1; - unblockNode = &nodes[i]; - i += 1; - recoveryNode = &nodes[i]; - i += 1; - termNode = &nodes[i]; - i += 1; - rudNodes = &nodes[i]; - i += nRudNodes; - rrdNodes = &nodes[i]; - i += nRrdNodes; - rpNodes = &nodes[i]; - i += nPQNodes; - rqNodes = &nodes[i]; - i += nPQNodes; - RF_ASSERT(i == nNodes); - - dag_h->numSuccedents = 1; - dag_h->succedents[0] = blockNode; - dag_h->creator = "DoubleDegRead"; - dag_h->numCommits = 0; - dag_h->numCommitNodes = 1; /* unblock */ - - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 2, 0, 0, dag_h, "Trm", allocList); - termNode->antecedents[0] = unblockNode; - termNode->antType[0] = rf_control; - termNode->antecedents[1] = recoveryNode; - termNode->antType[1] = rf_control; - - /* init the block and unblock nodes */ - /* The block node has all nodes except itself, unblock and recovery as - * successors. Similarly for predecessors of the unblock. */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nReadNodes, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nReadNodes, 0, 0, dag_h, "Nil", allocList); - - for (i = 0; i < nReadNodes; i++) { - blockNode->succedents[i] = rudNodes + i; - unblockNode->antecedents[i] = rudNodes + i; - unblockNode->antType[i] = rf_control; - } - unblockNode->succedents[0] = termNode; - - /* The recovery node has all the reads as predecessors, and the term - * node as successors. It gets a pda as a param from each of the read - * nodes plus the raidPtr. For each failed unit is has a result pda. */ - rf_InitNode(recoveryNode, rf_wait, RF_FALSE, recovFunc, rf_NullNodeUndoFunc, NULL, - 1, /* succesors */ - nReadNodes, /* preds */ - nReadNodes + 2, /* params */ - asmap->numDataFailed, /* results */ - dag_h, recoveryNodeName, allocList); - - recoveryNode->succedents[0] = termNode; - for (i = 0; i < nReadNodes; i++) { - recoveryNode->antecedents[i] = rudNodes + i; - recoveryNode->antType[i] = rf_trueData; - } - - /* build the read nodes, then come back and fill in recovery params - * and results */ - pda = asmap->physInfo; - for (i = 0; i < nRudNodes; pda = pda->next) { - if ((pda == failedPDA) || (pda == failedPDAtwo)) - continue; - INIT_DISK_NODE(rudNodes + i, "Rud"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rudNodes[i], pda); - i++; - } - - pda = npdas; - for (i = 0; i < nRrdNodes; i++, pda = pda->next) { - INIT_DISK_NODE(rrdNodes + i, "Rrd"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rrdNodes[i], pda); - } - - /* redundancy pdas */ - pda = pqPDAs; - INIT_DISK_NODE(rpNodes, "Rp"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rpNodes[0], pda); - pda++; - INIT_DISK_NODE(rqNodes, redundantReadNodeName); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rqNodes[0], pda); - if (nPQNodes == 2) { - pda++; - INIT_DISK_NODE(rpNodes + 1, "Rp"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rpNodes[1], pda); - pda++; - INIT_DISK_NODE(rqNodes + 1, redundantReadNodeName); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rqNodes[1], pda); - } - /* fill in recovery node params */ - for (i = 0; i < nReadNodes; i++) - recoveryNode->params[i] = rudNodes[i].params[0]; /* pda */ - recoveryNode->params[i++].p = (void *) raidPtr; - recoveryNode->params[i++].p = (void *) asmap; - recoveryNode->results[0] = failedPDA; - if (asmap->numDataFailed == 2) - recoveryNode->results[1] = failedPDAtwo; - - /* zero fill the target data buffers? */ -} - -#endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) */ diff --git a/sys/dev/raidframe/rf_dagdegrd.h b/sys/dev/raidframe/rf_dagdegrd.h deleted file mode 100644 index 2e899d8..0000000 --- a/sys/dev/raidframe/rf_dagdegrd.h +++ /dev/null @@ -1,64 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_dagdegrd.h,v 1.3 1999/02/05 00:06:07 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_DAGDEGRD_H_ -#define _RF__RF_DAGDEGRD_H_ - -#include <dev/raidframe/rf_types.h> - -/* degraded read DAG creation routines */ -void -rf_CreateRaidFiveDegradedReadDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); -void -rf_CreateRaidOneDegradedReadDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); -void -rf_CreateDegradedReadDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, - RF_RedFuncs_t * recFunc); -void -rf_CreateRaidCDegradedReadDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); -void -rf_DD_GenerateFailedAccessASMs(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_PhysDiskAddr_t ** pdap, - int *nNodep, RF_PhysDiskAddr_t ** pqpdap, int *nPQNodep, - RF_AllocListElem_t * allocList); -void -rf_DoubleDegRead(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, char *redundantReadNodeName, - char *recoveryNodeName, int (*recovFunc) (RF_DagNode_t *)); - -#endif /* !_RF__RF_DAGDEGRD_H_ */ diff --git a/sys/dev/raidframe/rf_dagdegwr.c b/sys/dev/raidframe/rf_dagdegwr.c deleted file mode 100644 index 70e0db6..0000000 --- a/sys/dev/raidframe/rf_dagdegwr.c +++ /dev/null @@ -1,846 +0,0 @@ -/* $NetBSD: rf_dagdegwr.c,v 1.6 2001/01/26 04:05:08 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * rf_dagdegwr.c - * - * code for creating degraded write DAGs - * - */ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_memchunk.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_dagdegwr.h> - - -/****************************************************************************** - * - * General comments on DAG creation: - * - * All DAGs in this file use roll-away error recovery. Each DAG has a single - * commit node, usually called "Cmt." If an error occurs before the Cmt node - * is reached, the execution engine will halt forward execution and work - * backward through the graph, executing the undo functions. Assuming that - * each node in the graph prior to the Cmt node are undoable and atomic - or - - * does not make changes to permanent state, the graph will fail atomically. - * If an error occurs after the Cmt node executes, the engine will roll-forward - * through the graph, blindly executing nodes until it reaches the end. - * If a graph reaches the end, it is assumed to have completed successfully. - * - * A graph has only 1 Cmt node. - * - */ - - -/****************************************************************************** - * - * The following wrappers map the standard DAG creation interface to the - * DAG creation routines. Additionally, these wrappers enable experimentation - * with new DAG structures by providing an extra level of indirection, allowing - * the DAG creation routines to be replaced at this single point. - */ - -static -RF_CREATE_DAG_FUNC_DECL(rf_CreateSimpleDegradedWriteDAG) -{ - rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, - flags, allocList, 1, rf_RecoveryXorFunc, RF_TRUE); -} - -void -rf_CreateDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *asmap; - RF_DagHeader_t *dag_h; - void *bp; - RF_RaidAccessFlags_t flags; - RF_AllocListElem_t *allocList; -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_PhysDiskAddr_t *failedPDA = asmap->failedPDAs[0]; - - RF_ASSERT(asmap->numDataFailed == 1); - dag_h->creator = "DegradedWriteDAG"; - - /* if the access writes only a portion of the failed unit, and also - * writes some portion of at least one surviving unit, we create two - * DAGs, one for the failed component and one for the non-failed - * component, and do them sequentially. Note that the fact that we're - * accessing only a portion of the failed unit indicates that the - * access either starts or ends in the failed unit, and hence we need - * create only two dags. This is inefficient in that the same data or - * parity can get read and written twice using this structure. I need - * to fix this to do the access all at once. */ - RF_ASSERT(!(asmap->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit)); - rf_CreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList); -} - - - -/****************************************************************************** - * - * DAG creation code begins here - */ - - - -/****************************************************************************** - * - * CommonCreateSimpleDegradedWriteDAG -- creates a DAG to do a degraded-mode - * write, which is as follows - * - * / {Wnq} --\ - * hdr -> blockNode -> Rod -> Xor -> Cmt -> Wnp ----> unblock -> term - * \ {Rod} / \ Wnd ---/ - * \ {Wnd} -/ - * - * commit nodes: Xor, Wnd - * - * IMPORTANT: - * This DAG generator does not work for double-degraded archs since it does not - * generate Q - * - * This dag is essentially identical to the large-write dag, except that the - * write to the failed data unit is suppressed. - * - * IMPORTANT: this dag does not work in the case where the access writes only - * a portion of the failed unit, and also writes some portion of at least one - * surviving SU. this case is handled in CreateDegradedWriteDAG above. - * - * The block & unblock nodes are leftovers from a previous version. They - * do nothing, but I haven't deleted them because it would be a tremendous - * effort to put them back in. - * - * This dag is used whenever a one of the data units in a write has failed. - * If it is the parity unit that failed, the nonredundant write dag (below) - * is used. - *****************************************************************************/ - -void -rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, - allocList, nfaults, redFunc, allowBufferRecycle) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *asmap; - RF_DagHeader_t *dag_h; - void *bp; - RF_RaidAccessFlags_t flags; - RF_AllocListElem_t *allocList; - int nfaults; - int (*redFunc) (RF_DagNode_t *); - int allowBufferRecycle; -{ - int nNodes, nRrdNodes, nWndNodes, nXorBufs, i, j, paramNum, - rdnodesFaked; - RF_DagNode_t *blockNode, *unblockNode, *wnpNode, *wnqNode, *termNode; - RF_DagNode_t *nodes, *wndNodes, *rrdNodes, *xorNode, *commitNode; - RF_SectorCount_t sectorsPerSU; - RF_ReconUnitNum_t which_ru; - char *xorTargetBuf = NULL; /* the target buffer for the XOR - * operation */ - char *overlappingPDAs;/* a temporary array of flags */ - RF_AccessStripeMapHeader_t *new_asm_h[2]; - RF_PhysDiskAddr_t *pda, *parityPDA; - RF_StripeNum_t parityStripeID; - RF_PhysDiskAddr_t *failedPDA; - RF_RaidLayout_t *layoutPtr; - - layoutPtr = &(raidPtr->Layout); - parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, - &which_ru); - sectorsPerSU = layoutPtr->sectorsPerStripeUnit; - /* failedPDA points to the pda within the asm that targets the failed - * disk */ - failedPDA = asmap->failedPDAs[0]; - - if (rf_dagDebug) - printf("[Creating degraded-write DAG]\n"); - - RF_ASSERT(asmap->numDataFailed == 1); - dag_h->creator = "SimpleDegradedWriteDAG"; - - /* - * Generate two ASMs identifying the surviving data - * we need in order to recover the lost data. - */ - /* overlappingPDAs array must be zero'd */ - RF_Calloc(overlappingPDAs, asmap->numStripeUnitsAccessed, sizeof(char), (char *)); - rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h, new_asm_h, - &nXorBufs, NULL, overlappingPDAs, allocList); - - /* create all the nodes at once */ - nWndNodes = asmap->numStripeUnitsAccessed - 1; /* no access is - * generated for the - * failed pda */ - - nRrdNodes = ((new_asm_h[0]) ? new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) + - ((new_asm_h[1]) ? new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0); - /* - * XXX - * - * There's a bug with a complete stripe overwrite- that means 0 reads - * of old data, and the rest of the DAG generation code doesn't like - * that. A release is coming, and I don't wanna risk breaking a critical - * DAG generator, so here's what I'm gonna do- if there's no read nodes, - * I'm gonna fake there being a read node, and I'm gonna swap in a - * no-op node in its place (to make all the link-up code happy). - * This should be fixed at some point. --jimz - */ - if (nRrdNodes == 0) { - nRrdNodes = 1; - rdnodesFaked = 1; - } else { - rdnodesFaked = 0; - } - /* lock, unlock, xor, Wnd, Rrd, W(nfaults) */ - nNodes = 5 + nfaults + nWndNodes + nRrdNodes; - RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), - (RF_DagNode_t *), allocList); - i = 0; - blockNode = &nodes[i]; - i += 1; - commitNode = &nodes[i]; - i += 1; - unblockNode = &nodes[i]; - i += 1; - termNode = &nodes[i]; - i += 1; - xorNode = &nodes[i]; - i += 1; - wnpNode = &nodes[i]; - i += 1; - wndNodes = &nodes[i]; - i += nWndNodes; - rrdNodes = &nodes[i]; - i += nRrdNodes; - if (nfaults == 2) { - wnqNode = &nodes[i]; - i += 1; - } else { - wnqNode = NULL; - } - RF_ASSERT(i == nNodes); - - /* this dag can not commit until all rrd and xor Nodes have completed */ - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - RF_ASSERT(nRrdNodes > 0); - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, nRrdNodes, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, nWndNodes + nfaults, 1, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, nWndNodes + nfaults, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1, - nRrdNodes, 2 * nXorBufs + 2, nfaults, dag_h, "Xrc", allocList); - - /* - * Fill in the Rrd nodes. If any of the rrd buffers are the same size as - * the failed buffer, save a pointer to it so we can use it as the target - * of the XOR. The pdas in the rrd nodes have been range-restricted, so if - * a buffer is the same size as the failed buffer, it must also be at the - * same alignment within the SU. - */ - i = 0; - if (new_asm_h[0]) { - for (i = 0, pda = new_asm_h[0]->stripeMap->physInfo; - i < new_asm_h[0]->stripeMap->numStripeUnitsAccessed; - i++, pda = pda->next) { - rf_InitNode(&rrdNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rrd", allocList); - RF_ASSERT(pda); - rrdNodes[i].params[0].p = pda; - rrdNodes[i].params[1].p = pda->bufPtr; - rrdNodes[i].params[2].v = parityStripeID; - rrdNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - } - /* i now equals the number of stripe units accessed in new_asm_h[0] */ - if (new_asm_h[1]) { - for (j = 0, pda = new_asm_h[1]->stripeMap->physInfo; - j < new_asm_h[1]->stripeMap->numStripeUnitsAccessed; - j++, pda = pda->next) { - rf_InitNode(&rrdNodes[i + j], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rrd", allocList); - RF_ASSERT(pda); - rrdNodes[i + j].params[0].p = pda; - rrdNodes[i + j].params[1].p = pda->bufPtr; - rrdNodes[i + j].params[2].v = parityStripeID; - rrdNodes[i + j].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - if (allowBufferRecycle && (pda->numSector == failedPDA->numSector)) - xorTargetBuf = pda->bufPtr; - } - } - if (rdnodesFaked) { - /* - * This is where we'll init that fake noop read node - * (XXX should the wakeup func be different?) - */ - rf_InitNode(&rrdNodes[0], rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 1, 0, 0, dag_h, "RrN", allocList); - } - /* - * Make a PDA for the parity unit. The parity PDA should start at - * the same offset into the SU as the failed PDA. - */ - /* Danner comment: I don't think this copy is really necessary. We are - * in one of two cases here. (1) The entire failed unit is written. - * Then asmap->parityInfo will describe the entire parity. (2) We are - * only writing a subset of the failed unit and nothing else. Then the - * asmap->parityInfo describes the failed unit and the copy can also - * be avoided. */ - - RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - parityPDA->row = asmap->parityInfo->row; - parityPDA->col = asmap->parityInfo->col; - parityPDA->startSector = ((asmap->parityInfo->startSector / sectorsPerSU) - * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU); - parityPDA->numSector = failedPDA->numSector; - - if (!xorTargetBuf) { - RF_CallocAndAdd(xorTargetBuf, 1, - rf_RaidAddressToByte(raidPtr, failedPDA->numSector), (char *), allocList); - } - /* init the Wnp node */ - rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnp", allocList); - wnpNode->params[0].p = parityPDA; - wnpNode->params[1].p = xorTargetBuf; - wnpNode->params[2].v = parityStripeID; - wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - - /* fill in the Wnq Node */ - if (nfaults == 2) { - { - RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t), - (RF_PhysDiskAddr_t *), allocList); - parityPDA->row = asmap->qInfo->row; - parityPDA->col = asmap->qInfo->col; - parityPDA->startSector = ((asmap->qInfo->startSector / sectorsPerSU) - * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU); - parityPDA->numSector = failedPDA->numSector; - - rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnq", allocList); - wnqNode->params[0].p = parityPDA; - RF_CallocAndAdd(xorNode->results[1], 1, - rf_RaidAddressToByte(raidPtr, failedPDA->numSector), (char *), allocList); - wnqNode->params[1].p = xorNode->results[1]; - wnqNode->params[2].v = parityStripeID; - wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - } - /* fill in the Wnd nodes */ - for (pda = asmap->physInfo, i = 0; i < nWndNodes; i++, pda = pda->next) { - if (pda == failedPDA) { - i--; - continue; - } - rf_InitNode(&wndNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList); - RF_ASSERT(pda); - wndNodes[i].params[0].p = pda; - wndNodes[i].params[1].p = pda->bufPtr; - wndNodes[i].params[2].v = parityStripeID; - wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - - /* fill in the results of the xor node */ - xorNode->results[0] = xorTargetBuf; - - /* fill in the params of the xor node */ - - paramNum = 0; - if (rdnodesFaked == 0) { - for (i = 0; i < nRrdNodes; i++) { - /* all the Rrd nodes need to be xored together */ - xorNode->params[paramNum++] = rrdNodes[i].params[0]; - xorNode->params[paramNum++] = rrdNodes[i].params[1]; - } - } - for (i = 0; i < nWndNodes; i++) { - /* any Wnd nodes that overlap the failed access need to be - * xored in */ - if (overlappingPDAs[i]) { - RF_MallocAndAdd(pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - bcopy((char *) wndNodes[i].params[0].p, (char *) pda, sizeof(RF_PhysDiskAddr_t)); - rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_DOBUFFER, 0); - xorNode->params[paramNum++].p = pda; - xorNode->params[paramNum++].p = pda->bufPtr; - } - } - RF_Free(overlappingPDAs, asmap->numStripeUnitsAccessed * sizeof(char)); - - /* - * Install the failed PDA into the xor param list so that the - * new data gets xor'd in. - */ - xorNode->params[paramNum++].p = failedPDA; - xorNode->params[paramNum++].p = failedPDA->bufPtr; - - /* - * The last 2 params to the recovery xor node are always the failed - * PDA and the raidPtr. install the failedPDA even though we have just - * done so above. This allows us to use the same XOR function for both - * degraded reads and degraded writes. - */ - xorNode->params[paramNum++].p = failedPDA; - xorNode->params[paramNum++].p = raidPtr; - RF_ASSERT(paramNum == 2 * nXorBufs + 2); - - /* - * Code to link nodes begins here - */ - - /* link header to block node */ - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - /* link block node to rd nodes */ - RF_ASSERT(blockNode->numSuccedents == nRrdNodes); - for (i = 0; i < nRrdNodes; i++) { - RF_ASSERT(rrdNodes[i].numAntecedents == 1); - blockNode->succedents[i] = &rrdNodes[i]; - rrdNodes[i].antecedents[0] = blockNode; - rrdNodes[i].antType[0] = rf_control; - } - - /* link read nodes to xor node */ - RF_ASSERT(xorNode->numAntecedents == nRrdNodes); - for (i = 0; i < nRrdNodes; i++) { - RF_ASSERT(rrdNodes[i].numSuccedents == 1); - rrdNodes[i].succedents[0] = xorNode; - xorNode->antecedents[i] = &rrdNodes[i]; - xorNode->antType[i] = rf_trueData; - } - - /* link xor node to commit node */ - RF_ASSERT(xorNode->numSuccedents == 1); - RF_ASSERT(commitNode->numAntecedents == 1); - xorNode->succedents[0] = commitNode; - commitNode->antecedents[0] = xorNode; - commitNode->antType[0] = rf_control; - - /* link commit node to wnd nodes */ - RF_ASSERT(commitNode->numSuccedents == nfaults + nWndNodes); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNodes[i].numAntecedents == 1); - commitNode->succedents[i] = &wndNodes[i]; - wndNodes[i].antecedents[0] = commitNode; - wndNodes[i].antType[0] = rf_control; - } - - /* link the commit node to wnp, wnq nodes */ - RF_ASSERT(wnpNode->numAntecedents == 1); - commitNode->succedents[nWndNodes] = wnpNode; - wnpNode->antecedents[0] = commitNode; - wnpNode->antType[0] = rf_control; - if (nfaults == 2) { - RF_ASSERT(wnqNode->numAntecedents == 1); - commitNode->succedents[nWndNodes + 1] = wnqNode; - wnqNode->antecedents[0] = commitNode; - wnqNode->antType[0] = rf_control; - } - /* link write new data nodes to unblock node */ - RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nfaults)); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNodes[i].numSuccedents == 1); - wndNodes[i].succedents[0] = unblockNode; - unblockNode->antecedents[i] = &wndNodes[i]; - unblockNode->antType[i] = rf_control; - } - - /* link write new parity node to unblock node */ - RF_ASSERT(wnpNode->numSuccedents == 1); - wnpNode->succedents[0] = unblockNode; - unblockNode->antecedents[nWndNodes] = wnpNode; - unblockNode->antType[nWndNodes] = rf_control; - - /* link write new q node to unblock node */ - if (nfaults == 2) { - RF_ASSERT(wnqNode->numSuccedents == 1); - wnqNode->succedents[0] = unblockNode; - unblockNode->antecedents[nWndNodes + 1] = wnqNode; - unblockNode->antType[nWndNodes + 1] = rf_control; - } - /* link unblock node to term node */ - RF_ASSERT(unblockNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - unblockNode->succedents[0] = termNode; - termNode->antecedents[0] = unblockNode; - termNode->antType[0] = rf_control; -} -#define CONS_PDA(if,start,num) \ - pda_p->row = asmap->if->row; pda_p->col = asmap->if->col; \ - pda_p->startSector = ((asmap->if->startSector / secPerSU) * secPerSU) + start; \ - pda_p->numSector = num; \ - pda_p->next = NULL; \ - RF_MallocAndAdd(pda_p->bufPtr,rf_RaidAddressToByte(raidPtr,num),(char *), allocList) -#if (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_EVENODD > 0) -void -rf_WriteGenerateFailedAccessASMs( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_PhysDiskAddr_t ** pdap, - int *nNodep, - RF_PhysDiskAddr_t ** pqpdap, - int *nPQNodep, - RF_AllocListElem_t * allocList) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - int PDAPerDisk, i; - RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; - int numDataCol = layoutPtr->numDataCol; - int state; - unsigned napdas; - RF_SectorNum_t fone_start, fone_end, ftwo_start = 0, ftwo_end; - RF_PhysDiskAddr_t *fone = asmap->failedPDAs[0], *ftwo = asmap->failedPDAs[1]; - RF_PhysDiskAddr_t *pda_p; - RF_RaidAddr_t sosAddr; - - /* determine how many pda's we will have to generate per unaccess - * stripe. If there is only one failed data unit, it is one; if two, - * possibly two, depending wether they overlap. */ - - fone_start = rf_StripeUnitOffset(layoutPtr, fone->startSector); - fone_end = fone_start + fone->numSector; - - if (asmap->numDataFailed == 1) { - PDAPerDisk = 1; - state = 1; - RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - pda_p = *pqpdap; - /* build p */ - CONS_PDA(parityInfo, fone_start, fone->numSector); - pda_p->type = RF_PDA_TYPE_PARITY; - pda_p++; - /* build q */ - CONS_PDA(qInfo, fone_start, fone->numSector); - pda_p->type = RF_PDA_TYPE_Q; - } else { - ftwo_start = rf_StripeUnitOffset(layoutPtr, ftwo->startSector); - ftwo_end = ftwo_start + ftwo->numSector; - if (fone->numSector + ftwo->numSector > secPerSU) { - PDAPerDisk = 1; - state = 2; - RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - pda_p = *pqpdap; - CONS_PDA(parityInfo, 0, secPerSU); - pda_p->type = RF_PDA_TYPE_PARITY; - pda_p++; - CONS_PDA(qInfo, 0, secPerSU); - pda_p->type = RF_PDA_TYPE_Q; - } else { - PDAPerDisk = 2; - state = 3; - /* four of them, fone, then ftwo */ - RF_MallocAndAdd(*pqpdap, 4 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - pda_p = *pqpdap; - CONS_PDA(parityInfo, fone_start, fone->numSector); - pda_p->type = RF_PDA_TYPE_PARITY; - pda_p++; - CONS_PDA(qInfo, fone_start, fone->numSector); - pda_p->type = RF_PDA_TYPE_Q; - pda_p++; - CONS_PDA(parityInfo, ftwo_start, ftwo->numSector); - pda_p->type = RF_PDA_TYPE_PARITY; - pda_p++; - CONS_PDA(qInfo, ftwo_start, ftwo->numSector); - pda_p->type = RF_PDA_TYPE_Q; - } - } - /* figure out number of nonaccessed pda */ - napdas = PDAPerDisk * (numDataCol - 2); - *nPQNodep = PDAPerDisk; - - *nNodep = napdas; - if (napdas == 0) - return; /* short circuit */ - - /* allocate up our list of pda's */ - - RF_CallocAndAdd(pda_p, napdas, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - *pdap = pda_p; - - /* linkem together */ - for (i = 0; i < (napdas - 1); i++) - pda_p[i].next = pda_p + (i + 1); - - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - for (i = 0; i < numDataCol; i++) { - if ((pda_p - (*pdap)) == napdas) - continue; - pda_p->type = RF_PDA_TYPE_DATA; - pda_p->raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - /* skip over dead disks */ - if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status)) - continue; - switch (state) { - case 1: /* fone */ - pda_p->numSector = fone->numSector; - pda_p->raidAddress += fone_start; - pda_p->startSector += fone_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - break; - case 2: /* full stripe */ - pda_p->numSector = secPerSU; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, secPerSU), (char *), allocList); - break; - case 3: /* two slabs */ - pda_p->numSector = fone->numSector; - pda_p->raidAddress += fone_start; - pda_p->startSector += fone_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - pda_p++; - pda_p->type = RF_PDA_TYPE_DATA; - pda_p->raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - pda_p->numSector = ftwo->numSector; - pda_p->raidAddress += ftwo_start; - pda_p->startSector += ftwo_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - break; - default: - RF_PANIC(); - } - pda_p++; - } - - RF_ASSERT(pda_p - *pdap == napdas); - return; -} -#define DISK_NODE_PDA(node) ((node)->params[0].p) - -#define DISK_NODE_PARAMS(_node_,_p_) \ - (_node_).params[0].p = _p_ ; \ - (_node_).params[1].p = (_p_)->bufPtr; \ - (_node_).params[2].v = parityStripeID; \ - (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru) - -void -rf_DoubleDegSmallWrite( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - char *redundantReadNodeName, - char *redundantWriteNodeName, - char *recoveryNodeName, - int (*recovFunc) (RF_DagNode_t *)) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DagNode_t *nodes, *wudNodes, *rrdNodes, *recoveryNode, *blockNode, - *unblockNode, *rpNodes, *rqNodes, *wpNodes, *wqNodes, *termNode; - RF_PhysDiskAddr_t *pda, *pqPDAs; - RF_PhysDiskAddr_t *npdas; - int nWriteNodes, nNodes, nReadNodes, nRrdNodes, nWudNodes, i; - RF_ReconUnitNum_t which_ru; - int nPQNodes; - RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, &which_ru); - - /* simple small write case - First part looks like a reconstruct-read - * of the failed data units. Then a write of all data units not - * failed. */ - - - /* Hdr | ------Block- / / \ Rrd Rrd ... Rrd Rp Rq \ \ - * / -------PQ----- / \ \ Wud Wp WQ \ | / - * --Unblock- | T - * - * Rrd = read recovery data (potentially none) Wud = write user data - * (not incl. failed disks) Wp = Write P (could be two) Wq = Write Q - * (could be two) - * - */ - - rf_WriteGenerateFailedAccessASMs(raidPtr, asmap, &npdas, &nRrdNodes, &pqPDAs, &nPQNodes, allocList); - - RF_ASSERT(asmap->numDataFailed == 1); - - nWudNodes = asmap->numStripeUnitsAccessed - (asmap->numDataFailed); - nReadNodes = nRrdNodes + 2 * nPQNodes; - nWriteNodes = nWudNodes + 2 * nPQNodes; - nNodes = 4 + nReadNodes + nWriteNodes; - - RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - blockNode = nodes; - unblockNode = blockNode + 1; - termNode = unblockNode + 1; - recoveryNode = termNode + 1; - rrdNodes = recoveryNode + 1; - rpNodes = rrdNodes + nRrdNodes; - rqNodes = rpNodes + nPQNodes; - wudNodes = rqNodes + nPQNodes; - wpNodes = wudNodes + nWudNodes; - wqNodes = wpNodes + nPQNodes; - - dag_h->creator = "PQ_DDSimpleSmallWrite"; - dag_h->numSuccedents = 1; - dag_h->succedents[0] = blockNode; - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - termNode->antecedents[0] = unblockNode; - termNode->antType[0] = rf_control; - - /* init the block and unblock nodes */ - /* The block node has all the read nodes as successors */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nReadNodes, 0, 0, 0, dag_h, "Nil", allocList); - for (i = 0; i < nReadNodes; i++) - blockNode->succedents[i] = rrdNodes + i; - - /* The unblock node has all the writes as successors */ - rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nWriteNodes, 0, 0, dag_h, "Nil", allocList); - for (i = 0; i < nWriteNodes; i++) { - unblockNode->antecedents[i] = wudNodes + i; - unblockNode->antType[i] = rf_control; - } - unblockNode->succedents[0] = termNode; - -#define INIT_READ_NODE(node,name) \ - rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, allocList); \ - (node)->succedents[0] = recoveryNode; \ - (node)->antecedents[0] = blockNode; \ - (node)->antType[0] = rf_control; - - /* build the read nodes */ - pda = npdas; - for (i = 0; i < nRrdNodes; i++, pda = pda->next) { - INIT_READ_NODE(rrdNodes + i, "rrd"); - DISK_NODE_PARAMS(rrdNodes[i], pda); - } - - /* read redundancy pdas */ - pda = pqPDAs; - INIT_READ_NODE(rpNodes, "Rp"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rpNodes[0], pda); - pda++; - INIT_READ_NODE(rqNodes, redundantReadNodeName); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rqNodes[0], pda); - if (nPQNodes == 2) { - pda++; - INIT_READ_NODE(rpNodes + 1, "Rp"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rpNodes[1], pda); - pda++; - INIT_READ_NODE(rqNodes + 1, redundantReadNodeName); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rqNodes[1], pda); - } - /* the recovery node has all reads as precedessors and all writes as - * successors. It generates a result for every write P or write Q - * node. As parameters, it takes a pda per read and a pda per stripe - * of user data written. It also takes as the last params the raidPtr - * and asm. For results, it takes PDA for P & Q. */ - - - rf_InitNode(recoveryNode, rf_wait, RF_FALSE, recovFunc, rf_NullNodeUndoFunc, NULL, - nWriteNodes, /* succesors */ - nReadNodes, /* preds */ - nReadNodes + nWudNodes + 3, /* params */ - 2 * nPQNodes, /* results */ - dag_h, recoveryNodeName, allocList); - - - - for (i = 0; i < nReadNodes; i++) { - recoveryNode->antecedents[i] = rrdNodes + i; - recoveryNode->antType[i] = rf_control; - recoveryNode->params[i].p = DISK_NODE_PDA(rrdNodes + i); - } - for (i = 0; i < nWudNodes; i++) { - recoveryNode->succedents[i] = wudNodes + i; - } - recoveryNode->params[nReadNodes + nWudNodes].p = asmap->failedPDAs[0]; - recoveryNode->params[nReadNodes + nWudNodes + 1].p = raidPtr; - recoveryNode->params[nReadNodes + nWudNodes + 2].p = asmap; - - for (; i < nWriteNodes; i++) - recoveryNode->succedents[i] = wudNodes + i; - - pda = pqPDAs; - recoveryNode->results[0] = pda; - pda++; - recoveryNode->results[1] = pda; - if (nPQNodes == 2) { - pda++; - recoveryNode->results[2] = pda; - pda++; - recoveryNode->results[3] = pda; - } - /* fill writes */ -#define INIT_WRITE_NODE(node,name) \ - rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, allocList); \ - (node)->succedents[0] = unblockNode; \ - (node)->antecedents[0] = recoveryNode; \ - (node)->antType[0] = rf_control; - - pda = asmap->physInfo; - for (i = 0; i < nWudNodes; i++) { - INIT_WRITE_NODE(wudNodes + i, "Wd"); - DISK_NODE_PARAMS(wudNodes[i], pda); - recoveryNode->params[nReadNodes + i].p = DISK_NODE_PDA(wudNodes + i); - pda = pda->next; - } - /* write redundancy pdas */ - pda = pqPDAs; - INIT_WRITE_NODE(wpNodes, "Wp"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(wpNodes[0], pda); - pda++; - INIT_WRITE_NODE(wqNodes, "Wq"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(wqNodes[0], pda); - if (nPQNodes == 2) { - pda++; - INIT_WRITE_NODE(wpNodes + 1, "Wp"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(wpNodes[1], pda); - pda++; - INIT_WRITE_NODE(wqNodes + 1, "Wq"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(wqNodes[1], pda); - } -} -#endif /* (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_EVENODD > 0) */ diff --git a/sys/dev/raidframe/rf_dagdegwr.h b/sys/dev/raidframe/rf_dagdegwr.h deleted file mode 100644 index 1e4b5e2..0000000 --- a/sys/dev/raidframe/rf_dagdegwr.h +++ /dev/null @@ -1,55 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_dagdegwr.h,v 1.4 1999/08/15 02:36:03 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - - -#ifndef _RF__RF_DAGDEGWR_H_ -#define _RF__RF_DAGDEGWR_H_ - -/* degraded write DAG creation routines */ -void rf_CreateDegradedWriteDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); - -void rf_CommonCreateSimpleDegradedWriteDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, - int nfaults, int (*redFunc) (RF_DagNode_t *), int allowBufferRecycle); - -void rf_WriteGenerateFailedAccessASMs(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_PhysDiskAddr_t ** pdap, - int *nNodep, RF_PhysDiskAddr_t ** pqpdap, - int *nPQNodep, RF_AllocListElem_t * allocList); - -void rf_DoubleDegSmallWrite(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, char *redundantReadNodeName, - char *redundantWriteNodeName, char *recoveryNodeName, - int (*recovFunc) (RF_DagNode_t *)); - -#endif /* !_RF__RF_DAGDEGWR_H_ */ diff --git a/sys/dev/raidframe/rf_dagffrd.c b/sys/dev/raidframe/rf_dagffrd.c deleted file mode 100644 index 13c0af7..0000000 --- a/sys/dev/raidframe/rf_dagffrd.c +++ /dev/null @@ -1,441 +0,0 @@ -/* $NetBSD: rf_dagffrd.c,v 1.4 2000/01/07 03:40:58 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * rf_dagffrd.c - * - * code for creating fault-free read DAGs - * - */ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_memchunk.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_dagffrd.h> - -/****************************************************************************** - * - * General comments on DAG creation: - * - * All DAGs in this file use roll-away error recovery. Each DAG has a single - * commit node, usually called "Cmt." If an error occurs before the Cmt node - * is reached, the execution engine will halt forward execution and work - * backward through the graph, executing the undo functions. Assuming that - * each node in the graph prior to the Cmt node are undoable and atomic - or - - * does not make changes to permanent state, the graph will fail atomically. - * If an error occurs after the Cmt node executes, the engine will roll-forward - * through the graph, blindly executing nodes until it reaches the end. - * If a graph reaches the end, it is assumed to have completed successfully. - * - * A graph has only 1 Cmt node. - * - */ - - -/****************************************************************************** - * - * The following wrappers map the standard DAG creation interface to the - * DAG creation routines. Additionally, these wrappers enable experimentation - * with new DAG structures by providing an extra level of indirection, allowing - * the DAG creation routines to be replaced at this single point. - */ - -void -rf_CreateFaultFreeReadDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) -{ - rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - RF_IO_TYPE_READ); -} - - -/****************************************************************************** - * - * DAG creation code begins here - */ - -/****************************************************************************** - * - * creates a DAG to perform a nonredundant read or write of data within one - * stripe. - * For reads, this DAG is as follows: - * - * /---- read ----\ - * Header -- Block ---- read ---- Commit -- Terminate - * \---- read ----/ - * - * For writes, this DAG is as follows: - * - * /---- write ----\ - * Header -- Commit ---- write ---- Block -- Terminate - * \---- write ----/ - * - * There is one disk node per stripe unit accessed, and all disk nodes are in - * parallel. - * - * Tricky point here: The first disk node (read or write) is created - * normally. Subsequent disk nodes are created by copying the first one, - * and modifying a few params. The "succedents" and "antecedents" fields are - * _not_ re-created in each node, but rather left pointing to the same array - * that was malloc'd when the first node was created. Thus, it's essential - * that when this DAG is freed, the succedents and antecedents fields be freed - * in ONLY ONE of the read nodes. This does not apply to the "params" field - * because it is recreated for each READ node. - * - * Note that normal-priority accesses do not need to be tagged with their - * parity stripe ID, because they will never be promoted. Hence, I've - * commented-out the code to do this, and marked it with UNNEEDED. - * - *****************************************************************************/ - -void -rf_CreateNonredundantDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - RF_IoType_t type) -{ - RF_DagNode_t *nodes, *diskNodes, *blockNode, *commitNode, *termNode; - RF_PhysDiskAddr_t *pda = asmap->physInfo; - int (*doFunc) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *); - int i, n, totalNumNodes; - char *name; - - n = asmap->numStripeUnitsAccessed; - dag_h->creator = "NonredundantDAG"; - - RF_ASSERT(RF_IO_IS_R_OR_W(type)); - switch (type) { - case RF_IO_TYPE_READ: - doFunc = rf_DiskReadFunc; - undoFunc = rf_DiskReadUndoFunc; - name = "R "; - if (rf_dagDebug) - printf("[Creating non-redundant read DAG]\n"); - break; - case RF_IO_TYPE_WRITE: - doFunc = rf_DiskWriteFunc; - undoFunc = rf_DiskWriteUndoFunc; - name = "W "; - if (rf_dagDebug) - printf("[Creating non-redundant write DAG]\n"); - break; - default: - RF_PANIC(); - } - - /* - * For reads, the dag can not commit until the block node is reached. - * for writes, the dag commits immediately. - */ - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* - * Node count: - * 1 block node - * n data reads (or writes) - * 1 commit node - * 1 terminator node - */ - RF_ASSERT(n > 0); - totalNumNodes = n + 3; - RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), - (RF_DagNode_t *), allocList); - i = 0; - diskNodes = &nodes[i]; - i += n; - blockNode = &nodes[i]; - i += 1; - commitNode = &nodes[i]; - i += 1; - termNode = &nodes[i]; - i += 1; - RF_ASSERT(i == totalNumNodes); - - /* initialize nodes */ - switch (type) { - case RF_IO_TYPE_READ: - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, n, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, n, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - break; - case RF_IO_TYPE_WRITE: - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, n, 1, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, n, 0, 0, dag_h, "Trm", allocList); - break; - default: - RF_PANIC(); - } - - for (i = 0; i < n; i++) { - RF_ASSERT(pda != NULL); - rf_InitNode(&diskNodes[i], rf_wait, RF_FALSE, doFunc, undoFunc, rf_GenericWakeupFunc, - 1, 1, 4, 0, dag_h, name, allocList); - diskNodes[i].params[0].p = pda; - diskNodes[i].params[1].p = pda->bufPtr; - /* parity stripe id is not necessary */ - diskNodes[i].params[2].v = 0; - diskNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); - pda = pda->next; - } - - /* - * Connect nodes. - */ - - /* connect hdr to block node */ - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - if (type == RF_IO_TYPE_READ) { - /* connecting a nonredundant read DAG */ - RF_ASSERT(blockNode->numSuccedents == n); - RF_ASSERT(commitNode->numAntecedents == n); - for (i = 0; i < n; i++) { - /* connect block node to each read node */ - RF_ASSERT(diskNodes[i].numAntecedents == 1); - blockNode->succedents[i] = &diskNodes[i]; - diskNodes[i].antecedents[0] = blockNode; - diskNodes[i].antType[0] = rf_control; - - /* connect each read node to the commit node */ - RF_ASSERT(diskNodes[i].numSuccedents == 1); - diskNodes[i].succedents[0] = commitNode; - commitNode->antecedents[i] = &diskNodes[i]; - commitNode->antType[i] = rf_control; - } - /* connect the commit node to the term node */ - RF_ASSERT(commitNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - commitNode->succedents[0] = termNode; - termNode->antecedents[0] = commitNode; - termNode->antType[0] = rf_control; - } else { - /* connecting a nonredundant write DAG */ - /* connect the block node to the commit node */ - RF_ASSERT(blockNode->numSuccedents == 1); - RF_ASSERT(commitNode->numAntecedents == 1); - blockNode->succedents[0] = commitNode; - commitNode->antecedents[0] = blockNode; - commitNode->antType[0] = rf_control; - - RF_ASSERT(commitNode->numSuccedents == n); - RF_ASSERT(termNode->numAntecedents == n); - RF_ASSERT(termNode->numSuccedents == 0); - for (i = 0; i < n; i++) { - /* connect the commit node to each write node */ - RF_ASSERT(diskNodes[i].numAntecedents == 1); - commitNode->succedents[i] = &diskNodes[i]; - diskNodes[i].antecedents[0] = commitNode; - diskNodes[i].antType[0] = rf_control; - - /* connect each write node to the term node */ - RF_ASSERT(diskNodes[i].numSuccedents == 1); - diskNodes[i].succedents[0] = termNode; - termNode->antecedents[i] = &diskNodes[i]; - termNode->antType[i] = rf_control; - } - } -} -/****************************************************************************** - * Create a fault-free read DAG for RAID level 1 - * - * Hdr -> Nil -> Rmir -> Cmt -> Trm - * - * The "Rmir" node schedules a read from the disk in the mirror pair with the - * shortest disk queue. the proper queue is selected at Rmir execution. this - * deferred mapping is unlike other archs in RAIDframe which generally fix - * mapping at DAG creation time. - * - * Parameters: raidPtr - description of the physical array - * asmap - logical & physical addresses for this access - * bp - buffer ptr (for holding read data) - * flags - general flags (e.g. disk locking) - * allocList - list of memory allocated in DAG creation - *****************************************************************************/ - -static void -CreateMirrorReadDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - int (*readfunc) (RF_DagNode_t * node)) -{ - RF_DagNode_t *readNodes, *nodes, *blockNode, *commitNode, *termNode; - RF_PhysDiskAddr_t *data_pda = asmap->physInfo; - RF_PhysDiskAddr_t *parity_pda = asmap->parityInfo; - int i, n, totalNumNodes; - - n = asmap->numStripeUnitsAccessed; - dag_h->creator = "RaidOneReadDAG"; - if (rf_dagDebug) { - printf("[Creating RAID level 1 read DAG]\n"); - } - /* - * This dag can not commit until the commit node is reached - * errors prior to the commit point imply the dag has failed. - */ - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* - * Node count: - * n data reads - * 1 block node - * 1 commit node - * 1 terminator node - */ - RF_ASSERT(n > 0); - totalNumNodes = n + 3; - RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), - (RF_DagNode_t *), allocList); - i = 0; - readNodes = &nodes[i]; - i += n; - blockNode = &nodes[i]; - i += 1; - commitNode = &nodes[i]; - i += 1; - termNode = &nodes[i]; - i += 1; - RF_ASSERT(i == totalNumNodes); - - /* initialize nodes */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, - rf_NullNodeUndoFunc, NULL, n, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, - rf_NullNodeUndoFunc, NULL, 1, n, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, - rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - - for (i = 0; i < n; i++) { - RF_ASSERT(data_pda != NULL); - RF_ASSERT(parity_pda != NULL); - rf_InitNode(&readNodes[i], rf_wait, RF_FALSE, readfunc, - rf_DiskReadMirrorUndoFunc, rf_GenericWakeupFunc, 1, 1, 5, 0, dag_h, - "Rmir", allocList); - readNodes[i].params[0].p = data_pda; - readNodes[i].params[1].p = data_pda->bufPtr; - /* parity stripe id is not necessary */ - readNodes[i].params[2].p = 0; - readNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); - readNodes[i].params[4].p = parity_pda; - data_pda = data_pda->next; - parity_pda = parity_pda->next; - } - - /* - * Connect nodes - */ - - /* connect hdr to block node */ - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - /* connect block node to read nodes */ - RF_ASSERT(blockNode->numSuccedents == n); - for (i = 0; i < n; i++) { - RF_ASSERT(readNodes[i].numAntecedents == 1); - blockNode->succedents[i] = &readNodes[i]; - readNodes[i].antecedents[0] = blockNode; - readNodes[i].antType[0] = rf_control; - } - - /* connect read nodes to commit node */ - RF_ASSERT(commitNode->numAntecedents == n); - for (i = 0; i < n; i++) { - RF_ASSERT(readNodes[i].numSuccedents == 1); - readNodes[i].succedents[0] = commitNode; - commitNode->antecedents[i] = &readNodes[i]; - commitNode->antType[i] = rf_control; - } - - /* connect commit node to term node */ - RF_ASSERT(commitNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - commitNode->succedents[0] = termNode; - termNode->antecedents[0] = commitNode; - termNode->antType[0] = rf_control; -} - -void -rf_CreateMirrorIdleReadDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) -{ - CreateMirrorReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - rf_DiskReadMirrorIdleFunc); -} - -void -rf_CreateMirrorPartitionReadDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) -{ - CreateMirrorReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - rf_DiskReadMirrorPartitionFunc); -} diff --git a/sys/dev/raidframe/rf_dagffrd.h b/sys/dev/raidframe/rf_dagffrd.h deleted file mode 100644 index 6862a8d..0000000 --- a/sys/dev/raidframe/rf_dagffrd.h +++ /dev/null @@ -1,53 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_dagffrd.h,v 1.3 1999/02/05 00:06:07 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_DAGFFRD_H_ -#define _RF__RF_DAGFFRD_H_ - -#include <dev/raidframe/rf_types.h> - -/* fault-free read DAG creation routines */ -void -rf_CreateFaultFreeReadDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList); -void -rf_CreateNonredundantDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, RF_IoType_t type); -void -rf_CreateMirrorIdleReadDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); -void -rf_CreateMirrorPartitionReadDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); - -#endif /* !_RF__RF_DAGFFRD_H_ */ diff --git a/sys/dev/raidframe/rf_dagffwr.c b/sys/dev/raidframe/rf_dagffwr.c deleted file mode 100644 index 9216b29..0000000 --- a/sys/dev/raidframe/rf_dagffwr.c +++ /dev/null @@ -1,2131 +0,0 @@ -/* $NetBSD: rf_dagffwr.c,v 1.5 2000/01/07 03:40:58 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * rf_dagff.c - * - * code for creating fault-free DAGs - * - */ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_dagffrd.h> -#include <dev/raidframe/rf_memchunk.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_dagffwr.h> - -/****************************************************************************** - * - * General comments on DAG creation: - * - * All DAGs in this file use roll-away error recovery. Each DAG has a single - * commit node, usually called "Cmt." If an error occurs before the Cmt node - * is reached, the execution engine will halt forward execution and work - * backward through the graph, executing the undo functions. Assuming that - * each node in the graph prior to the Cmt node are undoable and atomic - or - - * does not make changes to permanent state, the graph will fail atomically. - * If an error occurs after the Cmt node executes, the engine will roll-forward - * through the graph, blindly executing nodes until it reaches the end. - * If a graph reaches the end, it is assumed to have completed successfully. - * - * A graph has only 1 Cmt node. - * - */ - - -/****************************************************************************** - * - * The following wrappers map the standard DAG creation interface to the - * DAG creation routines. Additionally, these wrappers enable experimentation - * with new DAG structures by providing an extra level of indirection, allowing - * the DAG creation routines to be replaced at this single point. - */ - - -void -rf_CreateNonRedundantWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - RF_IoType_t type) -{ - rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - RF_IO_TYPE_WRITE); -} - -void -rf_CreateRAID0WriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - RF_IoType_t type) -{ - rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - RF_IO_TYPE_WRITE); -} - -void -rf_CreateSmallWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) -{ - /* "normal" rollaway */ - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - &rf_xorFuncs, NULL); -} - -void -rf_CreateLargeWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) -{ - /* "normal" rollaway */ - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - 1, rf_RegularXorFunc, RF_TRUE); -} - - -/****************************************************************************** - * - * DAG creation code begins here - */ - - -/****************************************************************************** - * - * creates a DAG to perform a large-write operation: - * - * / Rod \ / Wnd \ - * H -- block- Rod - Xor - Cmt - Wnd --- T - * \ Rod / \ Wnp / - * \[Wnq]/ - * - * The XOR node also does the Q calculation in the P+Q architecture. - * All nodes are before the commit node (Cmt) are assumed to be atomic and - * undoable - or - they make no changes to permanent state. - * - * Rod = read old data - * Cmt = commit node - * Wnp = write new parity - * Wnd = write new data - * Wnq = write new "q" - * [] denotes optional segments in the graph - * - * Parameters: raidPtr - description of the physical array - * asmap - logical & physical addresses for this access - * bp - buffer ptr (holds write data) - * flags - general flags (e.g. disk locking) - * allocList - list of memory allocated in DAG creation - * nfaults - number of faults array can tolerate - * (equal to # redundancy units in stripe) - * redfuncs - list of redundancy generating functions - * - *****************************************************************************/ - -void -rf_CommonCreateLargeWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - int nfaults, - int (*redFunc) (RF_DagNode_t *), - int allowBufferRecycle) -{ - RF_DagNode_t *nodes, *wndNodes, *rodNodes, *xorNode, *wnpNode; - RF_DagNode_t *wnqNode, *blockNode, *commitNode, *termNode; - int nWndNodes, nRodNodes, i, nodeNum, asmNum; - RF_AccessStripeMapHeader_t *new_asm_h[2]; - RF_StripeNum_t parityStripeID; - char *sosBuffer, *eosBuffer; - RF_ReconUnitNum_t which_ru; - RF_RaidLayout_t *layoutPtr; - RF_PhysDiskAddr_t *pda; - - layoutPtr = &(raidPtr->Layout); - parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, - &which_ru); - - if (rf_dagDebug) { - printf("[Creating large-write DAG]\n"); - } - dag_h->creator = "LargeWriteDAG"; - - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* alloc the nodes: Wnd, xor, commit, block, term, and Wnp */ - nWndNodes = asmap->numStripeUnitsAccessed; - RF_CallocAndAdd(nodes, nWndNodes + 4 + nfaults, sizeof(RF_DagNode_t), - (RF_DagNode_t *), allocList); - i = 0; - wndNodes = &nodes[i]; - i += nWndNodes; - xorNode = &nodes[i]; - i += 1; - wnpNode = &nodes[i]; - i += 1; - blockNode = &nodes[i]; - i += 1; - commitNode = &nodes[i]; - i += 1; - termNode = &nodes[i]; - i += 1; - if (nfaults == 2) { - wnqNode = &nodes[i]; - i += 1; - } else { - wnqNode = NULL; - } - rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h, new_asm_h, - &nRodNodes, &sosBuffer, &eosBuffer, allocList); - if (nRodNodes > 0) { - RF_CallocAndAdd(rodNodes, nRodNodes, sizeof(RF_DagNode_t), - (RF_DagNode_t *), allocList); - } else { - rodNodes = NULL; - } - - /* begin node initialization */ - if (nRodNodes > 0) { - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, nRodNodes, 0, 0, 0, dag_h, "Nil", allocList); - } else { - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); - } - - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, - nWndNodes + nfaults, 1, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, - 0, nWndNodes + nfaults, 0, 0, dag_h, "Trm", allocList); - - /* initialize the Rod nodes */ - for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) { - if (new_asm_h[asmNum]) { - pda = new_asm_h[asmNum]->stripeMap->physInfo; - while (pda) { - rf_InitNode(&rodNodes[nodeNum], rf_wait, RF_FALSE, rf_DiskReadFunc, - rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, - "Rod", allocList); - rodNodes[nodeNum].params[0].p = pda; - rodNodes[nodeNum].params[1].p = pda->bufPtr; - rodNodes[nodeNum].params[2].v = parityStripeID; - rodNodes[nodeNum].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, 0, which_ru); - nodeNum++; - pda = pda->next; - } - } - } - RF_ASSERT(nodeNum == nRodNodes); - - /* initialize the wnd nodes */ - pda = asmap->physInfo; - for (i = 0; i < nWndNodes; i++) { - rf_InitNode(&wndNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList); - RF_ASSERT(pda != NULL); - wndNodes[i].params[0].p = pda; - wndNodes[i].params[1].p = pda->bufPtr; - wndNodes[i].params[2].v = parityStripeID; - wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - pda = pda->next; - } - - /* initialize the redundancy node */ - if (nRodNodes > 0) { - rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1, - nRodNodes, 2 * (nWndNodes + nRodNodes) + 1, nfaults, dag_h, - "Xr ", allocList); - } else { - rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1, - 1, 2 * (nWndNodes + nRodNodes) + 1, nfaults, dag_h, "Xr ", allocList); - } - xorNode->flags |= RF_DAGNODE_FLAG_YIELD; - for (i = 0; i < nWndNodes; i++) { - xorNode->params[2 * i + 0] = wndNodes[i].params[0]; /* pda */ - xorNode->params[2 * i + 1] = wndNodes[i].params[1]; /* buf ptr */ - } - for (i = 0; i < nRodNodes; i++) { - xorNode->params[2 * (nWndNodes + i) + 0] = rodNodes[i].params[0]; /* pda */ - xorNode->params[2 * (nWndNodes + i) + 1] = rodNodes[i].params[1]; /* buf ptr */ - } - /* xor node needs to get at RAID information */ - xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr; - - /* - * Look for an Rod node that reads a complete SU. If none, alloc a buffer - * to receive the parity info. Note that we can't use a new data buffer - * because it will not have gotten written when the xor occurs. - */ - if (allowBufferRecycle) { - for (i = 0; i < nRodNodes; i++) { - if (((RF_PhysDiskAddr_t *) rodNodes[i].params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit) - break; - } - } - if ((!allowBufferRecycle) || (i == nRodNodes)) { - RF_CallocAndAdd(xorNode->results[0], 1, - rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), - (void *), allocList); - } else { - xorNode->results[0] = rodNodes[i].params[1].p; - } - - /* initialize the Wnp node */ - rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnp", allocList); - wnpNode->params[0].p = asmap->parityInfo; - wnpNode->params[1].p = xorNode->results[0]; - wnpNode->params[2].v = parityStripeID; - wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - /* parityInfo must describe entire parity unit */ - RF_ASSERT(asmap->parityInfo->next == NULL); - - if (nfaults == 2) { - /* - * We never try to recycle a buffer for the Q calcuation - * in addition to the parity. This would cause two buffers - * to get smashed during the P and Q calculation, guaranteeing - * one would be wrong. - */ - RF_CallocAndAdd(xorNode->results[1], 1, - rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), - (void *), allocList); - rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnq", allocList); - wnqNode->params[0].p = asmap->qInfo; - wnqNode->params[1].p = xorNode->results[1]; - wnqNode->params[2].v = parityStripeID; - wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - /* parityInfo must describe entire parity unit */ - RF_ASSERT(asmap->parityInfo->next == NULL); - } - /* - * Connect nodes to form graph. - */ - - /* connect dag header to block node */ - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - if (nRodNodes > 0) { - /* connect the block node to the Rod nodes */ - RF_ASSERT(blockNode->numSuccedents == nRodNodes); - RF_ASSERT(xorNode->numAntecedents == nRodNodes); - for (i = 0; i < nRodNodes; i++) { - RF_ASSERT(rodNodes[i].numAntecedents == 1); - blockNode->succedents[i] = &rodNodes[i]; - rodNodes[i].antecedents[0] = blockNode; - rodNodes[i].antType[0] = rf_control; - - /* connect the Rod nodes to the Xor node */ - RF_ASSERT(rodNodes[i].numSuccedents == 1); - rodNodes[i].succedents[0] = xorNode; - xorNode->antecedents[i] = &rodNodes[i]; - xorNode->antType[i] = rf_trueData; - } - } else { - /* connect the block node to the Xor node */ - RF_ASSERT(blockNode->numSuccedents == 1); - RF_ASSERT(xorNode->numAntecedents == 1); - blockNode->succedents[0] = xorNode; - xorNode->antecedents[0] = blockNode; - xorNode->antType[0] = rf_control; - } - - /* connect the xor node to the commit node */ - RF_ASSERT(xorNode->numSuccedents == 1); - RF_ASSERT(commitNode->numAntecedents == 1); - xorNode->succedents[0] = commitNode; - commitNode->antecedents[0] = xorNode; - commitNode->antType[0] = rf_control; - - /* connect the commit node to the write nodes */ - RF_ASSERT(commitNode->numSuccedents == nWndNodes + nfaults); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNodes->numAntecedents == 1); - commitNode->succedents[i] = &wndNodes[i]; - wndNodes[i].antecedents[0] = commitNode; - wndNodes[i].antType[0] = rf_control; - } - RF_ASSERT(wnpNode->numAntecedents == 1); - commitNode->succedents[nWndNodes] = wnpNode; - wnpNode->antecedents[0] = commitNode; - wnpNode->antType[0] = rf_trueData; - if (nfaults == 2) { - RF_ASSERT(wnqNode->numAntecedents == 1); - commitNode->succedents[nWndNodes + 1] = wnqNode; - wnqNode->antecedents[0] = commitNode; - wnqNode->antType[0] = rf_trueData; - } - /* connect the write nodes to the term node */ - RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults); - RF_ASSERT(termNode->numSuccedents == 0); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNodes->numSuccedents == 1); - wndNodes[i].succedents[0] = termNode; - termNode->antecedents[i] = &wndNodes[i]; - termNode->antType[i] = rf_control; - } - RF_ASSERT(wnpNode->numSuccedents == 1); - wnpNode->succedents[0] = termNode; - termNode->antecedents[nWndNodes] = wnpNode; - termNode->antType[nWndNodes] = rf_control; - if (nfaults == 2) { - RF_ASSERT(wnqNode->numSuccedents == 1); - wnqNode->succedents[0] = termNode; - termNode->antecedents[nWndNodes + 1] = wnqNode; - termNode->antType[nWndNodes + 1] = rf_control; - } -} -/****************************************************************************** - * - * creates a DAG to perform a small-write operation (either raid 5 or pq), - * which is as follows: - * - * Hdr -> Nil -> Rop -> Xor -> Cmt ----> Wnp [Unp] --> Trm - * \- Rod X / \----> Wnd [Und]-/ - * [\- Rod X / \---> Wnd [Und]-/] - * [\- Roq -> Q / \--> Wnq [Unq]-/] - * - * Rop = read old parity - * Rod = read old data - * Roq = read old "q" - * Cmt = commit node - * Und = unlock data disk - * Unp = unlock parity disk - * Unq = unlock q disk - * Wnp = write new parity - * Wnd = write new data - * Wnq = write new "q" - * [ ] denotes optional segments in the graph - * - * Parameters: raidPtr - description of the physical array - * asmap - logical & physical addresses for this access - * bp - buffer ptr (holds write data) - * flags - general flags (e.g. disk locking) - * allocList - list of memory allocated in DAG creation - * pfuncs - list of parity generating functions - * qfuncs - list of q generating functions - * - * A null qfuncs indicates single fault tolerant - *****************************************************************************/ - -void -rf_CommonCreateSmallWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - RF_RedFuncs_t * pfuncs, - RF_RedFuncs_t * qfuncs) -{ - RF_DagNode_t *readDataNodes, *readParityNodes, *readQNodes, *termNode; - RF_DagNode_t *unlockDataNodes, *unlockParityNodes, *unlockQNodes; - RF_DagNode_t *xorNodes, *qNodes, *blockNode, *commitNode, *nodes; - RF_DagNode_t *writeDataNodes, *writeParityNodes, *writeQNodes; - int i, j, nNodes, totalNumNodes, lu_flag; - RF_ReconUnitNum_t which_ru; - int (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *); - int (*qfunc) (RF_DagNode_t *); - int numDataNodes, numParityNodes; - RF_StripeNum_t parityStripeID; - RF_PhysDiskAddr_t *pda; - char *name, *qname; - long nfaults; - - nfaults = qfuncs ? 2 : 1; - lu_flag = (rf_enableAtomicRMW) ? 1 : 0; /* lock/unlock flag */ - - parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), - asmap->raidAddress, &which_ru); - pda = asmap->physInfo; - numDataNodes = asmap->numStripeUnitsAccessed; - numParityNodes = (asmap->parityInfo->next) ? 2 : 1; - - if (rf_dagDebug) { - printf("[Creating small-write DAG]\n"); - } - RF_ASSERT(numDataNodes > 0); - dag_h->creator = "SmallWriteDAG"; - - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* - * DAG creation occurs in four steps: - * 1. count the number of nodes in the DAG - * 2. create the nodes - * 3. initialize the nodes - * 4. connect the nodes - */ - - /* - * Step 1. compute number of nodes in the graph - */ - - /* number of nodes: a read and write for each data unit a redundancy - * computation node for each parity node (nfaults * nparity) a read - * and write for each parity unit a block and commit node (2) a - * terminate node if atomic RMW an unlock node for each data unit, - * redundancy unit */ - totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes) - + (nfaults * 2 * numParityNodes) + 3; - if (lu_flag) { - totalNumNodes += (numDataNodes + (nfaults * numParityNodes)); - } - /* - * Step 2. create the nodes - */ - RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), - (RF_DagNode_t *), allocList); - i = 0; - blockNode = &nodes[i]; - i += 1; - commitNode = &nodes[i]; - i += 1; - readDataNodes = &nodes[i]; - i += numDataNodes; - readParityNodes = &nodes[i]; - i += numParityNodes; - writeDataNodes = &nodes[i]; - i += numDataNodes; - writeParityNodes = &nodes[i]; - i += numParityNodes; - xorNodes = &nodes[i]; - i += numParityNodes; - termNode = &nodes[i]; - i += 1; - if (lu_flag) { - unlockDataNodes = &nodes[i]; - i += numDataNodes; - unlockParityNodes = &nodes[i]; - i += numParityNodes; - } else { - unlockDataNodes = unlockParityNodes = NULL; - } - if (nfaults == 2) { - readQNodes = &nodes[i]; - i += numParityNodes; - writeQNodes = &nodes[i]; - i += numParityNodes; - qNodes = &nodes[i]; - i += numParityNodes; - if (lu_flag) { - unlockQNodes = &nodes[i]; - i += numParityNodes; - } else { - unlockQNodes = NULL; - } - } else { - readQNodes = writeQNodes = qNodes = unlockQNodes = NULL; - } - RF_ASSERT(i == totalNumNodes); - - /* - * Step 3. initialize the nodes - */ - /* initialize block node (Nil) */ - nNodes = numDataNodes + (nfaults * numParityNodes); - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, nNodes, 0, 0, 0, dag_h, "Nil", allocList); - - /* initialize commit node (Cmt) */ - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, nNodes, (nfaults * numParityNodes), 0, 0, dag_h, "Cmt", allocList); - - /* initialize terminate node (Trm) */ - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, nNodes, 0, 0, dag_h, "Trm", allocList); - - /* initialize nodes which read old data (Rod) */ - for (i = 0; i < numDataNodes; i++) { - rf_InitNode(&readDataNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, (nfaults * numParityNodes), 1, 4, 0, dag_h, - "Rod", allocList); - RF_ASSERT(pda != NULL); - /* physical disk addr desc */ - readDataNodes[i].params[0].p = pda; - /* buffer to hold old data */ - readDataNodes[i].params[1].p = rf_AllocBuffer(raidPtr, - dag_h, pda, allocList); - readDataNodes[i].params[2].v = parityStripeID; - readDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - lu_flag, 0, which_ru); - pda = pda->next; - for (j = 0; j < readDataNodes[i].numSuccedents; j++) { - readDataNodes[i].propList[j] = NULL; - } - } - - /* initialize nodes which read old parity (Rop) */ - pda = asmap->parityInfo; - i = 0; - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(pda != NULL); - rf_InitNode(&readParityNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, - rf_DiskReadUndoFunc, rf_GenericWakeupFunc, numParityNodes, 1, 4, - 0, dag_h, "Rop", allocList); - readParityNodes[i].params[0].p = pda; - /* buffer to hold old parity */ - readParityNodes[i].params[1].p = rf_AllocBuffer(raidPtr, - dag_h, pda, allocList); - readParityNodes[i].params[2].v = parityStripeID; - readParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - lu_flag, 0, which_ru); - pda = pda->next; - for (j = 0; j < readParityNodes[i].numSuccedents; j++) { - readParityNodes[i].propList[0] = NULL; - } - } - - /* initialize nodes which read old Q (Roq) */ - if (nfaults == 2) { - pda = asmap->qInfo; - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(pda != NULL); - rf_InitNode(&readQNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, numParityNodes, 1, 4, 0, dag_h, "Roq", allocList); - readQNodes[i].params[0].p = pda; - /* buffer to hold old Q */ - readQNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, - allocList); - readQNodes[i].params[2].v = parityStripeID; - readQNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - lu_flag, 0, which_ru); - pda = pda->next; - for (j = 0; j < readQNodes[i].numSuccedents; j++) { - readQNodes[i].propList[0] = NULL; - } - } - } - /* initialize nodes which write new data (Wnd) */ - pda = asmap->physInfo; - for (i = 0; i < numDataNodes; i++) { - RF_ASSERT(pda != NULL); - rf_InitNode(&writeDataNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, - rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, - "Wnd", allocList); - /* physical disk addr desc */ - writeDataNodes[i].params[0].p = pda; - /* buffer holding new data to be written */ - writeDataNodes[i].params[1].p = pda->bufPtr; - writeDataNodes[i].params[2].v = parityStripeID; - writeDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, 0, which_ru); - if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockDataNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, - rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, - "Und", allocList); - /* physical disk addr desc */ - unlockDataNodes[i].params[0].p = pda; - unlockDataNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, lu_flag, which_ru); - } - pda = pda->next; - } - - /* - * Initialize nodes which compute new parity and Q. - */ - /* - * We use the simple XOR func in the double-XOR case, and when - * we're accessing only a portion of one stripe unit. The distinction - * between the two is that the regular XOR func assumes that the targbuf - * is a full SU in size, and examines the pda associated with the buffer - * to decide where within the buffer to XOR the data, whereas - * the simple XOR func just XORs the data into the start of the buffer. - */ - if ((numParityNodes == 2) || ((numDataNodes == 1) - && (asmap->totalSectorsAccessed < raidPtr->Layout.sectorsPerStripeUnit))) { - func = pfuncs->simple; - undoFunc = rf_NullNodeUndoFunc; - name = pfuncs->SimpleName; - if (qfuncs) { - qfunc = qfuncs->simple; - qname = qfuncs->SimpleName; - } else { - qfunc = NULL; - qname = NULL; - } - } else { - func = pfuncs->regular; - undoFunc = rf_NullNodeUndoFunc; - name = pfuncs->RegularName; - if (qfuncs) { - qfunc = qfuncs->regular; - qname = qfuncs->RegularName; - } else { - qfunc = NULL; - qname = NULL; - } - } - /* - * Initialize the xor nodes: params are {pda,buf} - * from {Rod,Wnd,Rop} nodes, and raidPtr - */ - if (numParityNodes == 2) { - /* double-xor case */ - for (i = 0; i < numParityNodes; i++) { - /* note: no wakeup func for xor */ - rf_InitNode(&xorNodes[i], rf_wait, RF_FALSE, func, undoFunc, NULL, - 1, (numDataNodes + numParityNodes), 7, 1, dag_h, name, allocList); - xorNodes[i].flags |= RF_DAGNODE_FLAG_YIELD; - xorNodes[i].params[0] = readDataNodes[i].params[0]; - xorNodes[i].params[1] = readDataNodes[i].params[1]; - xorNodes[i].params[2] = readParityNodes[i].params[0]; - xorNodes[i].params[3] = readParityNodes[i].params[1]; - xorNodes[i].params[4] = writeDataNodes[i].params[0]; - xorNodes[i].params[5] = writeDataNodes[i].params[1]; - xorNodes[i].params[6].p = raidPtr; - /* use old parity buf as target buf */ - xorNodes[i].results[0] = readParityNodes[i].params[1].p; - if (nfaults == 2) { - /* note: no wakeup func for qor */ - rf_InitNode(&qNodes[i], rf_wait, RF_FALSE, qfunc, undoFunc, NULL, 1, - (numDataNodes + numParityNodes), 7, 1, dag_h, qname, allocList); - qNodes[i].params[0] = readDataNodes[i].params[0]; - qNodes[i].params[1] = readDataNodes[i].params[1]; - qNodes[i].params[2] = readQNodes[i].params[0]; - qNodes[i].params[3] = readQNodes[i].params[1]; - qNodes[i].params[4] = writeDataNodes[i].params[0]; - qNodes[i].params[5] = writeDataNodes[i].params[1]; - qNodes[i].params[6].p = raidPtr; - /* use old Q buf as target buf */ - qNodes[i].results[0] = readQNodes[i].params[1].p; - } - } - } else { - /* there is only one xor node in this case */ - rf_InitNode(&xorNodes[0], rf_wait, RF_FALSE, func, undoFunc, NULL, 1, - (numDataNodes + numParityNodes), - (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, name, allocList); - xorNodes[0].flags |= RF_DAGNODE_FLAG_YIELD; - for (i = 0; i < numDataNodes + 1; i++) { - /* set up params related to Rod and Rop nodes */ - xorNodes[0].params[2 * i + 0] = readDataNodes[i].params[0]; /* pda */ - xorNodes[0].params[2 * i + 1] = readDataNodes[i].params[1]; /* buffer ptr */ - } - for (i = 0; i < numDataNodes; i++) { - /* set up params related to Wnd and Wnp nodes */ - xorNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = /* pda */ - writeDataNodes[i].params[0]; - xorNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = /* buffer ptr */ - writeDataNodes[i].params[1]; - } - /* xor node needs to get at RAID information */ - xorNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr; - xorNodes[0].results[0] = readParityNodes[0].params[1].p; - if (nfaults == 2) { - rf_InitNode(&qNodes[0], rf_wait, RF_FALSE, qfunc, undoFunc, NULL, 1, - (numDataNodes + numParityNodes), - (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, - qname, allocList); - for (i = 0; i < numDataNodes; i++) { - /* set up params related to Rod */ - qNodes[0].params[2 * i + 0] = readDataNodes[i].params[0]; /* pda */ - qNodes[0].params[2 * i + 1] = readDataNodes[i].params[1]; /* buffer ptr */ - } - /* and read old q */ - qNodes[0].params[2 * numDataNodes + 0] = /* pda */ - readQNodes[0].params[0]; - qNodes[0].params[2 * numDataNodes + 1] = /* buffer ptr */ - readQNodes[0].params[1]; - for (i = 0; i < numDataNodes; i++) { - /* set up params related to Wnd nodes */ - qNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = /* pda */ - writeDataNodes[i].params[0]; - qNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = /* buffer ptr */ - writeDataNodes[i].params[1]; - } - /* xor node needs to get at RAID information */ - qNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr; - qNodes[0].results[0] = readQNodes[0].params[1].p; - } - } - - /* initialize nodes which write new parity (Wnp) */ - pda = asmap->parityInfo; - for (i = 0; i < numParityNodes; i++) { - rf_InitNode(&writeParityNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, - rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, - "Wnp", allocList); - RF_ASSERT(pda != NULL); - writeParityNodes[i].params[0].p = pda; /* param 1 (bufPtr) - * filled in by xor node */ - writeParityNodes[i].params[1].p = xorNodes[i].results[0]; /* buffer pointer for - * parity write - * operation */ - writeParityNodes[i].params[2].v = parityStripeID; - writeParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, 0, which_ru); - if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockParityNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, - rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, - "Unp", allocList); - unlockParityNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - unlockParityNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, lu_flag, which_ru); - } - pda = pda->next; - } - - /* initialize nodes which write new Q (Wnq) */ - if (nfaults == 2) { - pda = asmap->qInfo; - for (i = 0; i < numParityNodes; i++) { - rf_InitNode(&writeQNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, - rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, - "Wnq", allocList); - RF_ASSERT(pda != NULL); - writeQNodes[i].params[0].p = pda; /* param 1 (bufPtr) - * filled in by xor node */ - writeQNodes[i].params[1].p = qNodes[i].results[0]; /* buffer pointer for - * parity write - * operation */ - writeQNodes[i].params[2].v = parityStripeID; - writeQNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, 0, which_ru); - if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockQNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, - rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, - "Unq", allocList); - unlockQNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - unlockQNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, lu_flag, which_ru); - } - pda = pda->next; - } - } - /* - * Step 4. connect the nodes. - */ - - /* connect header to block node */ - dag_h->succedents[0] = blockNode; - - /* connect block node to read old data nodes */ - RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults))); - for (i = 0; i < numDataNodes; i++) { - blockNode->succedents[i] = &readDataNodes[i]; - RF_ASSERT(readDataNodes[i].numAntecedents == 1); - readDataNodes[i].antecedents[0] = blockNode; - readDataNodes[i].antType[0] = rf_control; - } - - /* connect block node to read old parity nodes */ - for (i = 0; i < numParityNodes; i++) { - blockNode->succedents[numDataNodes + i] = &readParityNodes[i]; - RF_ASSERT(readParityNodes[i].numAntecedents == 1); - readParityNodes[i].antecedents[0] = blockNode; - readParityNodes[i].antType[0] = rf_control; - } - - /* connect block node to read old Q nodes */ - if (nfaults == 2) { - for (i = 0; i < numParityNodes; i++) { - blockNode->succedents[numDataNodes + numParityNodes + i] = &readQNodes[i]; - RF_ASSERT(readQNodes[i].numAntecedents == 1); - readQNodes[i].antecedents[0] = blockNode; - readQNodes[i].antType[0] = rf_control; - } - } - /* connect read old data nodes to xor nodes */ - for (i = 0; i < numDataNodes; i++) { - RF_ASSERT(readDataNodes[i].numSuccedents == (nfaults * numParityNodes)); - for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(xorNodes[j].numAntecedents == numDataNodes + numParityNodes); - readDataNodes[i].succedents[j] = &xorNodes[j]; - xorNodes[j].antecedents[i] = &readDataNodes[i]; - xorNodes[j].antType[i] = rf_trueData; - } - } - - /* connect read old data nodes to q nodes */ - if (nfaults == 2) { - for (i = 0; i < numDataNodes; i++) { - for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(qNodes[j].numAntecedents == numDataNodes + numParityNodes); - readDataNodes[i].succedents[numParityNodes + j] = &qNodes[j]; - qNodes[j].antecedents[i] = &readDataNodes[i]; - qNodes[j].antType[i] = rf_trueData; - } - } - } - /* connect read old parity nodes to xor nodes */ - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(readParityNodes[i].numSuccedents == numParityNodes); - for (j = 0; j < numParityNodes; j++) { - readParityNodes[i].succedents[j] = &xorNodes[j]; - xorNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i]; - xorNodes[j].antType[numDataNodes + i] = rf_trueData; - } - } - - /* connect read old q nodes to q nodes */ - if (nfaults == 2) { - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(readParityNodes[i].numSuccedents == numParityNodes); - for (j = 0; j < numParityNodes; j++) { - readQNodes[i].succedents[j] = &qNodes[j]; - qNodes[j].antecedents[numDataNodes + i] = &readQNodes[i]; - qNodes[j].antType[numDataNodes + i] = rf_trueData; - } - } - } - /* connect xor nodes to commit node */ - RF_ASSERT(commitNode->numAntecedents == (nfaults * numParityNodes)); - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(xorNodes[i].numSuccedents == 1); - xorNodes[i].succedents[0] = commitNode; - commitNode->antecedents[i] = &xorNodes[i]; - commitNode->antType[i] = rf_control; - } - - /* connect q nodes to commit node */ - if (nfaults == 2) { - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(qNodes[i].numSuccedents == 1); - qNodes[i].succedents[0] = commitNode; - commitNode->antecedents[i + numParityNodes] = &qNodes[i]; - commitNode->antType[i + numParityNodes] = rf_control; - } - } - /* connect commit node to write nodes */ - RF_ASSERT(commitNode->numSuccedents == (numDataNodes + (nfaults * numParityNodes))); - for (i = 0; i < numDataNodes; i++) { - RF_ASSERT(writeDataNodes[i].numAntecedents == 1); - commitNode->succedents[i] = &writeDataNodes[i]; - writeDataNodes[i].antecedents[0] = commitNode; - writeDataNodes[i].antType[0] = rf_trueData; - } - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(writeParityNodes[i].numAntecedents == 1); - commitNode->succedents[i + numDataNodes] = &writeParityNodes[i]; - writeParityNodes[i].antecedents[0] = commitNode; - writeParityNodes[i].antType[0] = rf_trueData; - } - if (nfaults == 2) { - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(writeQNodes[i].numAntecedents == 1); - commitNode->succedents[i + numDataNodes + numParityNodes] = &writeQNodes[i]; - writeQNodes[i].antecedents[0] = commitNode; - writeQNodes[i].antType[0] = rf_trueData; - } - } - RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); - RF_ASSERT(termNode->numSuccedents == 0); - for (i = 0; i < numDataNodes; i++) { - if (lu_flag) { - /* connect write new data nodes to unlock nodes */ - RF_ASSERT(writeDataNodes[i].numSuccedents == 1); - RF_ASSERT(unlockDataNodes[i].numAntecedents == 1); - writeDataNodes[i].succedents[0] = &unlockDataNodes[i]; - unlockDataNodes[i].antecedents[0] = &writeDataNodes[i]; - unlockDataNodes[i].antType[0] = rf_control; - - /* connect unlock nodes to term node */ - RF_ASSERT(unlockDataNodes[i].numSuccedents == 1); - unlockDataNodes[i].succedents[0] = termNode; - termNode->antecedents[i] = &unlockDataNodes[i]; - termNode->antType[i] = rf_control; - } else { - /* connect write new data nodes to term node */ - RF_ASSERT(writeDataNodes[i].numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); - writeDataNodes[i].succedents[0] = termNode; - termNode->antecedents[i] = &writeDataNodes[i]; - termNode->antType[i] = rf_control; - } - } - - for (i = 0; i < numParityNodes; i++) { - if (lu_flag) { - /* connect write new parity nodes to unlock nodes */ - RF_ASSERT(writeParityNodes[i].numSuccedents == 1); - RF_ASSERT(unlockParityNodes[i].numAntecedents == 1); - writeParityNodes[i].succedents[0] = &unlockParityNodes[i]; - unlockParityNodes[i].antecedents[0] = &writeParityNodes[i]; - unlockParityNodes[i].antType[0] = rf_control; - - /* connect unlock nodes to term node */ - RF_ASSERT(unlockParityNodes[i].numSuccedents == 1); - unlockParityNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + i] = &unlockParityNodes[i]; - termNode->antType[numDataNodes + i] = rf_control; - } else { - RF_ASSERT(writeParityNodes[i].numSuccedents == 1); - writeParityNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + i] = &writeParityNodes[i]; - termNode->antType[numDataNodes + i] = rf_control; - } - } - - if (nfaults == 2) { - for (i = 0; i < numParityNodes; i++) { - if (lu_flag) { - /* connect write new Q nodes to unlock nodes */ - RF_ASSERT(writeQNodes[i].numSuccedents == 1); - RF_ASSERT(unlockQNodes[i].numAntecedents == 1); - writeQNodes[i].succedents[0] = &unlockQNodes[i]; - unlockQNodes[i].antecedents[0] = &writeQNodes[i]; - unlockQNodes[i].antType[0] = rf_control; - - /* connect unlock nodes to unblock node */ - RF_ASSERT(unlockQNodes[i].numSuccedents == 1); - unlockQNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + numParityNodes + i] = &unlockQNodes[i]; - termNode->antType[numDataNodes + numParityNodes + i] = rf_control; - } else { - RF_ASSERT(writeQNodes[i].numSuccedents == 1); - writeQNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + numParityNodes + i] = &writeQNodes[i]; - termNode->antType[numDataNodes + numParityNodes + i] = rf_control; - } - } - } -} - - -/****************************************************************************** - * create a write graph (fault-free or degraded) for RAID level 1 - * - * Hdr -> Commit -> Wpd -> Nil -> Trm - * -> Wsd -> - * - * The "Wpd" node writes data to the primary copy in the mirror pair - * The "Wsd" node writes data to the secondary copy in the mirror pair - * - * Parameters: raidPtr - description of the physical array - * asmap - logical & physical addresses for this access - * bp - buffer ptr (holds write data) - * flags - general flags (e.g. disk locking) - * allocList - list of memory allocated in DAG creation - *****************************************************************************/ - -void -rf_CreateRaidOneWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) -{ - RF_DagNode_t *unblockNode, *termNode, *commitNode; - RF_DagNode_t *nodes, *wndNode, *wmirNode; - int nWndNodes, nWmirNodes, i; - RF_ReconUnitNum_t which_ru; - RF_PhysDiskAddr_t *pda, *pdaP; - RF_StripeNum_t parityStripeID; - - parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), - asmap->raidAddress, &which_ru); - if (rf_dagDebug) { - printf("[Creating RAID level 1 write DAG]\n"); - } - dag_h->creator = "RaidOneWriteDAG"; - - /* 2 implies access not SU aligned */ - nWmirNodes = (asmap->parityInfo->next) ? 2 : 1; - nWndNodes = (asmap->physInfo->next) ? 2 : 1; - - /* alloc the Wnd nodes and the Wmir node */ - if (asmap->numDataFailed == 1) - nWndNodes--; - if (asmap->numParityFailed == 1) - nWmirNodes--; - - /* total number of nodes = nWndNodes + nWmirNodes + (commit + unblock - * + terminator) */ - RF_CallocAndAdd(nodes, nWndNodes + nWmirNodes + 3, sizeof(RF_DagNode_t), - (RF_DagNode_t *), allocList); - i = 0; - wndNode = &nodes[i]; - i += nWndNodes; - wmirNode = &nodes[i]; - i += nWmirNodes; - commitNode = &nodes[i]; - i += 1; - unblockNode = &nodes[i]; - i += 1; - termNode = &nodes[i]; - i += 1; - RF_ASSERT(i == (nWndNodes + nWmirNodes + 3)); - - /* this dag can commit immediately */ - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* initialize the commit, unblock, and term nodes */ - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, (nWndNodes + nWmirNodes), 0, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, (nWndNodes + nWmirNodes), 0, 0, dag_h, "Nil", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - - /* initialize the wnd nodes */ - if (nWndNodes > 0) { - pda = asmap->physInfo; - for (i = 0; i < nWndNodes; i++) { - rf_InitNode(&wndNode[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wpd", allocList); - RF_ASSERT(pda != NULL); - wndNode[i].params[0].p = pda; - wndNode[i].params[1].p = pda->bufPtr; - wndNode[i].params[2].v = parityStripeID; - wndNode[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - pda = pda->next; - } - RF_ASSERT(pda == NULL); - } - /* initialize the mirror nodes */ - if (nWmirNodes > 0) { - pda = asmap->physInfo; - pdaP = asmap->parityInfo; - for (i = 0; i < nWmirNodes; i++) { - rf_InitNode(&wmirNode[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wsd", allocList); - RF_ASSERT(pda != NULL); - wmirNode[i].params[0].p = pdaP; - wmirNode[i].params[1].p = pda->bufPtr; - wmirNode[i].params[2].v = parityStripeID; - wmirNode[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - pda = pda->next; - pdaP = pdaP->next; - } - RF_ASSERT(pda == NULL); - RF_ASSERT(pdaP == NULL); - } - /* link the header node to the commit node */ - RF_ASSERT(dag_h->numSuccedents == 1); - RF_ASSERT(commitNode->numAntecedents == 0); - dag_h->succedents[0] = commitNode; - - /* link the commit node to the write nodes */ - RF_ASSERT(commitNode->numSuccedents == (nWndNodes + nWmirNodes)); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNode[i].numAntecedents == 1); - commitNode->succedents[i] = &wndNode[i]; - wndNode[i].antecedents[0] = commitNode; - wndNode[i].antType[0] = rf_control; - } - for (i = 0; i < nWmirNodes; i++) { - RF_ASSERT(wmirNode[i].numAntecedents == 1); - commitNode->succedents[i + nWndNodes] = &wmirNode[i]; - wmirNode[i].antecedents[0] = commitNode; - wmirNode[i].antType[0] = rf_control; - } - - /* link the write nodes to the unblock node */ - RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes)); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNode[i].numSuccedents == 1); - wndNode[i].succedents[0] = unblockNode; - unblockNode->antecedents[i] = &wndNode[i]; - unblockNode->antType[i] = rf_control; - } - for (i = 0; i < nWmirNodes; i++) { - RF_ASSERT(wmirNode[i].numSuccedents == 1); - wmirNode[i].succedents[0] = unblockNode; - unblockNode->antecedents[i + nWndNodes] = &wmirNode[i]; - unblockNode->antType[i + nWndNodes] = rf_control; - } - - /* link the unblock node to the term node */ - RF_ASSERT(unblockNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - unblockNode->succedents[0] = termNode; - termNode->antecedents[0] = unblockNode; - termNode->antType[0] = rf_control; -} - - - -/* DAGs which have no commit points. - * - * The following DAGs are used in forward and backward error recovery experiments. - * They are identical to the DAGs above this comment with the exception that the - * the commit points have been removed. - */ - - - -void -rf_CommonCreateLargeWriteDAGFwd( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - int nfaults, - int (*redFunc) (RF_DagNode_t *), - int allowBufferRecycle) -{ - RF_DagNode_t *nodes, *wndNodes, *rodNodes, *xorNode, *wnpNode; - RF_DagNode_t *wnqNode, *blockNode, *syncNode, *termNode; - int nWndNodes, nRodNodes, i, nodeNum, asmNum; - RF_AccessStripeMapHeader_t *new_asm_h[2]; - RF_StripeNum_t parityStripeID; - char *sosBuffer, *eosBuffer; - RF_ReconUnitNum_t which_ru; - RF_RaidLayout_t *layoutPtr; - RF_PhysDiskAddr_t *pda; - - layoutPtr = &(raidPtr->Layout); - parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru); - - if (rf_dagDebug) - printf("[Creating large-write DAG]\n"); - dag_h->creator = "LargeWriteDAGFwd"; - - dag_h->numCommitNodes = 0; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* alloc the nodes: Wnd, xor, commit, block, term, and Wnp */ - nWndNodes = asmap->numStripeUnitsAccessed; - RF_CallocAndAdd(nodes, nWndNodes + 4 + nfaults, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - i = 0; - wndNodes = &nodes[i]; - i += nWndNodes; - xorNode = &nodes[i]; - i += 1; - wnpNode = &nodes[i]; - i += 1; - blockNode = &nodes[i]; - i += 1; - syncNode = &nodes[i]; - i += 1; - termNode = &nodes[i]; - i += 1; - if (nfaults == 2) { - wnqNode = &nodes[i]; - i += 1; - } else { - wnqNode = NULL; - } - rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h, new_asm_h, &nRodNodes, &sosBuffer, &eosBuffer, allocList); - if (nRodNodes > 0) { - RF_CallocAndAdd(rodNodes, nRodNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - } else { - rodNodes = NULL; - } - - /* begin node initialization */ - if (nRodNodes > 0) { - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(syncNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nWndNodes + 1, nRodNodes, 0, 0, dag_h, "Nil", allocList); - } else { - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(syncNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nWndNodes + 1, 1, 0, 0, dag_h, "Nil", allocList); - } - - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0, dag_h, "Trm", allocList); - - /* initialize the Rod nodes */ - for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) { - if (new_asm_h[asmNum]) { - pda = new_asm_h[asmNum]->stripeMap->physInfo; - while (pda) { - rf_InitNode(&rodNodes[nodeNum], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rod", allocList); - rodNodes[nodeNum].params[0].p = pda; - rodNodes[nodeNum].params[1].p = pda->bufPtr; - rodNodes[nodeNum].params[2].v = parityStripeID; - rodNodes[nodeNum].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - nodeNum++; - pda = pda->next; - } - } - } - RF_ASSERT(nodeNum == nRodNodes); - - /* initialize the wnd nodes */ - pda = asmap->physInfo; - for (i = 0; i < nWndNodes; i++) { - rf_InitNode(&wndNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList); - RF_ASSERT(pda != NULL); - wndNodes[i].params[0].p = pda; - wndNodes[i].params[1].p = pda->bufPtr; - wndNodes[i].params[2].v = parityStripeID; - wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - pda = pda->next; - } - - /* initialize the redundancy node */ - rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1, nfaults, 2 * (nWndNodes + nRodNodes) + 1, nfaults, dag_h, "Xr ", allocList); - xorNode->flags |= RF_DAGNODE_FLAG_YIELD; - for (i = 0; i < nWndNodes; i++) { - xorNode->params[2 * i + 0] = wndNodes[i].params[0]; /* pda */ - xorNode->params[2 * i + 1] = wndNodes[i].params[1]; /* buf ptr */ - } - for (i = 0; i < nRodNodes; i++) { - xorNode->params[2 * (nWndNodes + i) + 0] = rodNodes[i].params[0]; /* pda */ - xorNode->params[2 * (nWndNodes + i) + 1] = rodNodes[i].params[1]; /* buf ptr */ - } - xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr; /* xor node needs to get - * at RAID information */ - - /* look for an Rod node that reads a complete SU. If none, alloc a - * buffer to receive the parity info. Note that we can't use a new - * data buffer because it will not have gotten written when the xor - * occurs. */ - if (allowBufferRecycle) { - for (i = 0; i < nRodNodes; i++) - if (((RF_PhysDiskAddr_t *) rodNodes[i].params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit) - break; - } - if ((!allowBufferRecycle) || (i == nRodNodes)) { - RF_CallocAndAdd(xorNode->results[0], 1, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (void *), allocList); - } else - xorNode->results[0] = rodNodes[i].params[1].p; - - /* initialize the Wnp node */ - rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnp", allocList); - wnpNode->params[0].p = asmap->parityInfo; - wnpNode->params[1].p = xorNode->results[0]; - wnpNode->params[2].v = parityStripeID; - wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - RF_ASSERT(asmap->parityInfo->next == NULL); /* parityInfo must - * describe entire - * parity unit */ - - if (nfaults == 2) { - /* we never try to recycle a buffer for the Q calcuation in - * addition to the parity. This would cause two buffers to get - * smashed during the P and Q calculation, guaranteeing one - * would be wrong. */ - RF_CallocAndAdd(xorNode->results[1], 1, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (void *), allocList); - rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnq", allocList); - wnqNode->params[0].p = asmap->qInfo; - wnqNode->params[1].p = xorNode->results[1]; - wnqNode->params[2].v = parityStripeID; - wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - RF_ASSERT(asmap->parityInfo->next == NULL); /* parityInfo must - * describe entire - * parity unit */ - } - /* connect nodes to form graph */ - - /* connect dag header to block node */ - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - if (nRodNodes > 0) { - /* connect the block node to the Rod nodes */ - RF_ASSERT(blockNode->numSuccedents == nRodNodes); - RF_ASSERT(syncNode->numAntecedents == nRodNodes); - for (i = 0; i < nRodNodes; i++) { - RF_ASSERT(rodNodes[i].numAntecedents == 1); - blockNode->succedents[i] = &rodNodes[i]; - rodNodes[i].antecedents[0] = blockNode; - rodNodes[i].antType[0] = rf_control; - - /* connect the Rod nodes to the Nil node */ - RF_ASSERT(rodNodes[i].numSuccedents == 1); - rodNodes[i].succedents[0] = syncNode; - syncNode->antecedents[i] = &rodNodes[i]; - syncNode->antType[i] = rf_trueData; - } - } else { - /* connect the block node to the Nil node */ - RF_ASSERT(blockNode->numSuccedents == 1); - RF_ASSERT(syncNode->numAntecedents == 1); - blockNode->succedents[0] = syncNode; - syncNode->antecedents[0] = blockNode; - syncNode->antType[0] = rf_control; - } - - /* connect the sync node to the Wnd nodes */ - RF_ASSERT(syncNode->numSuccedents == (1 + nWndNodes)); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNodes->numAntecedents == 1); - syncNode->succedents[i] = &wndNodes[i]; - wndNodes[i].antecedents[0] = syncNode; - wndNodes[i].antType[0] = rf_control; - } - - /* connect the sync node to the Xor node */ - RF_ASSERT(xorNode->numAntecedents == 1); - syncNode->succedents[nWndNodes] = xorNode; - xorNode->antecedents[0] = syncNode; - xorNode->antType[0] = rf_control; - - /* connect the xor node to the write parity node */ - RF_ASSERT(xorNode->numSuccedents == nfaults); - RF_ASSERT(wnpNode->numAntecedents == 1); - xorNode->succedents[0] = wnpNode; - wnpNode->antecedents[0] = xorNode; - wnpNode->antType[0] = rf_trueData; - if (nfaults == 2) { - RF_ASSERT(wnqNode->numAntecedents == 1); - xorNode->succedents[1] = wnqNode; - wnqNode->antecedents[0] = xorNode; - wnqNode->antType[0] = rf_trueData; - } - /* connect the write nodes to the term node */ - RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults); - RF_ASSERT(termNode->numSuccedents == 0); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNodes->numSuccedents == 1); - wndNodes[i].succedents[0] = termNode; - termNode->antecedents[i] = &wndNodes[i]; - termNode->antType[i] = rf_control; - } - RF_ASSERT(wnpNode->numSuccedents == 1); - wnpNode->succedents[0] = termNode; - termNode->antecedents[nWndNodes] = wnpNode; - termNode->antType[nWndNodes] = rf_control; - if (nfaults == 2) { - RF_ASSERT(wnqNode->numSuccedents == 1); - wnqNode->succedents[0] = termNode; - termNode->antecedents[nWndNodes + 1] = wnqNode; - termNode->antType[nWndNodes + 1] = rf_control; - } -} - - -/****************************************************************************** - * - * creates a DAG to perform a small-write operation (either raid 5 or pq), - * which is as follows: - * - * Hdr -> Nil -> Rop - Xor - Wnp [Unp] -- Trm - * \- Rod X- Wnd [Und] -------/ - * [\- Rod X- Wnd [Und] ------/] - * [\- Roq - Q --> Wnq [Unq]-/] - * - * Rop = read old parity - * Rod = read old data - * Roq = read old "q" - * Cmt = commit node - * Und = unlock data disk - * Unp = unlock parity disk - * Unq = unlock q disk - * Wnp = write new parity - * Wnd = write new data - * Wnq = write new "q" - * [ ] denotes optional segments in the graph - * - * Parameters: raidPtr - description of the physical array - * asmap - logical & physical addresses for this access - * bp - buffer ptr (holds write data) - * flags - general flags (e.g. disk locking) - * allocList - list of memory allocated in DAG creation - * pfuncs - list of parity generating functions - * qfuncs - list of q generating functions - * - * A null qfuncs indicates single fault tolerant - *****************************************************************************/ - -void -rf_CommonCreateSmallWriteDAGFwd( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - RF_RedFuncs_t * pfuncs, - RF_RedFuncs_t * qfuncs) -{ - RF_DagNode_t *readDataNodes, *readParityNodes, *readQNodes, *termNode; - RF_DagNode_t *unlockDataNodes, *unlockParityNodes, *unlockQNodes; - RF_DagNode_t *xorNodes, *qNodes, *blockNode, *nodes; - RF_DagNode_t *writeDataNodes, *writeParityNodes, *writeQNodes; - int i, j, nNodes, totalNumNodes, lu_flag; - RF_ReconUnitNum_t which_ru; - int (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *); - int (*qfunc) (RF_DagNode_t *); - int numDataNodes, numParityNodes; - RF_StripeNum_t parityStripeID; - RF_PhysDiskAddr_t *pda; - char *name, *qname; - long nfaults; - - nfaults = qfuncs ? 2 : 1; - lu_flag = (rf_enableAtomicRMW) ? 1 : 0; /* lock/unlock flag */ - - parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru); - pda = asmap->physInfo; - numDataNodes = asmap->numStripeUnitsAccessed; - numParityNodes = (asmap->parityInfo->next) ? 2 : 1; - - if (rf_dagDebug) - printf("[Creating small-write DAG]\n"); - RF_ASSERT(numDataNodes > 0); - dag_h->creator = "SmallWriteDAGFwd"; - - dag_h->numCommitNodes = 0; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - qfunc = NULL; - qname = NULL; - - /* DAG creation occurs in four steps: 1. count the number of nodes in - * the DAG 2. create the nodes 3. initialize the nodes 4. connect the - * nodes */ - - /* Step 1. compute number of nodes in the graph */ - - /* number of nodes: a read and write for each data unit a redundancy - * computation node for each parity node (nfaults * nparity) a read - * and write for each parity unit a block node a terminate node if - * atomic RMW an unlock node for each data unit, redundancy unit */ - totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes) + (nfaults * 2 * numParityNodes) + 2; - if (lu_flag) - totalNumNodes += (numDataNodes + (nfaults * numParityNodes)); - - - /* Step 2. create the nodes */ - RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - i = 0; - blockNode = &nodes[i]; - i += 1; - readDataNodes = &nodes[i]; - i += numDataNodes; - readParityNodes = &nodes[i]; - i += numParityNodes; - writeDataNodes = &nodes[i]; - i += numDataNodes; - writeParityNodes = &nodes[i]; - i += numParityNodes; - xorNodes = &nodes[i]; - i += numParityNodes; - termNode = &nodes[i]; - i += 1; - if (lu_flag) { - unlockDataNodes = &nodes[i]; - i += numDataNodes; - unlockParityNodes = &nodes[i]; - i += numParityNodes; - } else { - unlockDataNodes = unlockParityNodes = NULL; - } - if (nfaults == 2) { - readQNodes = &nodes[i]; - i += numParityNodes; - writeQNodes = &nodes[i]; - i += numParityNodes; - qNodes = &nodes[i]; - i += numParityNodes; - if (lu_flag) { - unlockQNodes = &nodes[i]; - i += numParityNodes; - } else { - unlockQNodes = NULL; - } - } else { - readQNodes = writeQNodes = qNodes = unlockQNodes = NULL; - } - RF_ASSERT(i == totalNumNodes); - - /* Step 3. initialize the nodes */ - /* initialize block node (Nil) */ - nNodes = numDataNodes + (nfaults * numParityNodes); - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", allocList); - - /* initialize terminate node (Trm) */ - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, nNodes, 0, 0, dag_h, "Trm", allocList); - - /* initialize nodes which read old data (Rod) */ - for (i = 0; i < numDataNodes; i++) { - rf_InitNode(&readDataNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, (numParityNodes * nfaults) + 1, 1, 4, 0, dag_h, "Rod", allocList); - RF_ASSERT(pda != NULL); - readDataNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - readDataNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old - * data */ - readDataNodes[i].params[2].v = parityStripeID; - readDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru); - pda = pda->next; - for (j = 0; j < readDataNodes[i].numSuccedents; j++) - readDataNodes[i].propList[j] = NULL; - } - - /* initialize nodes which read old parity (Rop) */ - pda = asmap->parityInfo; - i = 0; - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(pda != NULL); - rf_InitNode(&readParityNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, numParityNodes, 1, 4, 0, dag_h, "Rop", allocList); - readParityNodes[i].params[0].p = pda; - readParityNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old - * parity */ - readParityNodes[i].params[2].v = parityStripeID; - readParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru); - for (j = 0; j < readParityNodes[i].numSuccedents; j++) - readParityNodes[i].propList[0] = NULL; - pda = pda->next; - } - - /* initialize nodes which read old Q (Roq) */ - if (nfaults == 2) { - pda = asmap->qInfo; - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(pda != NULL); - rf_InitNode(&readQNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, numParityNodes, 1, 4, 0, dag_h, "Roq", allocList); - readQNodes[i].params[0].p = pda; - readQNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old Q */ - readQNodes[i].params[2].v = parityStripeID; - readQNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru); - for (j = 0; j < readQNodes[i].numSuccedents; j++) - readQNodes[i].propList[0] = NULL; - pda = pda->next; - } - } - /* initialize nodes which write new data (Wnd) */ - pda = asmap->physInfo; - for (i = 0; i < numDataNodes; i++) { - RF_ASSERT(pda != NULL); - rf_InitNode(&writeDataNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList); - writeDataNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - writeDataNodes[i].params[1].p = pda->bufPtr; /* buffer holding new - * data to be written */ - writeDataNodes[i].params[2].v = parityStripeID; - writeDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - - if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockDataNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Und", allocList); - unlockDataNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - unlockDataNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru); - } - pda = pda->next; - } - - - /* initialize nodes which compute new parity and Q */ - /* we use the simple XOR func in the double-XOR case, and when we're - * accessing only a portion of one stripe unit. the distinction - * between the two is that the regular XOR func assumes that the - * targbuf is a full SU in size, and examines the pda associated with - * the buffer to decide where within the buffer to XOR the data, - * whereas the simple XOR func just XORs the data into the start of - * the buffer. */ - if ((numParityNodes == 2) || ((numDataNodes == 1) && (asmap->totalSectorsAccessed < raidPtr->Layout.sectorsPerStripeUnit))) { - func = pfuncs->simple; - undoFunc = rf_NullNodeUndoFunc; - name = pfuncs->SimpleName; - if (qfuncs) { - qfunc = qfuncs->simple; - qname = qfuncs->SimpleName; - } - } else { - func = pfuncs->regular; - undoFunc = rf_NullNodeUndoFunc; - name = pfuncs->RegularName; - if (qfuncs) { - qfunc = qfuncs->regular; - qname = qfuncs->RegularName; - } - } - /* initialize the xor nodes: params are {pda,buf} from {Rod,Wnd,Rop} - * nodes, and raidPtr */ - if (numParityNodes == 2) { /* double-xor case */ - for (i = 0; i < numParityNodes; i++) { - rf_InitNode(&xorNodes[i], rf_wait, RF_FALSE, func, undoFunc, NULL, numParityNodes, numParityNodes + numDataNodes, 7, 1, dag_h, name, allocList); /* no wakeup func for - * xor */ - xorNodes[i].flags |= RF_DAGNODE_FLAG_YIELD; - xorNodes[i].params[0] = readDataNodes[i].params[0]; - xorNodes[i].params[1] = readDataNodes[i].params[1]; - xorNodes[i].params[2] = readParityNodes[i].params[0]; - xorNodes[i].params[3] = readParityNodes[i].params[1]; - xorNodes[i].params[4] = writeDataNodes[i].params[0]; - xorNodes[i].params[5] = writeDataNodes[i].params[1]; - xorNodes[i].params[6].p = raidPtr; - xorNodes[i].results[0] = readParityNodes[i].params[1].p; /* use old parity buf as - * target buf */ - if (nfaults == 2) { - rf_InitNode(&qNodes[i], rf_wait, RF_FALSE, qfunc, undoFunc, NULL, numParityNodes, numParityNodes + numDataNodes, 7, 1, dag_h, qname, allocList); /* no wakeup func for - * xor */ - qNodes[i].params[0] = readDataNodes[i].params[0]; - qNodes[i].params[1] = readDataNodes[i].params[1]; - qNodes[i].params[2] = readQNodes[i].params[0]; - qNodes[i].params[3] = readQNodes[i].params[1]; - qNodes[i].params[4] = writeDataNodes[i].params[0]; - qNodes[i].params[5] = writeDataNodes[i].params[1]; - qNodes[i].params[6].p = raidPtr; - qNodes[i].results[0] = readQNodes[i].params[1].p; /* use old Q buf as - * target buf */ - } - } - } else { - /* there is only one xor node in this case */ - rf_InitNode(&xorNodes[0], rf_wait, RF_FALSE, func, undoFunc, NULL, numParityNodes, numParityNodes + numDataNodes, (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, name, allocList); - xorNodes[0].flags |= RF_DAGNODE_FLAG_YIELD; - for (i = 0; i < numDataNodes + 1; i++) { - /* set up params related to Rod and Rop nodes */ - xorNodes[0].params[2 * i + 0] = readDataNodes[i].params[0]; /* pda */ - xorNodes[0].params[2 * i + 1] = readDataNodes[i].params[1]; /* buffer pointer */ - } - for (i = 0; i < numDataNodes; i++) { - /* set up params related to Wnd and Wnp nodes */ - xorNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = writeDataNodes[i].params[0]; /* pda */ - xorNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = writeDataNodes[i].params[1]; /* buffer pointer */ - } - xorNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr; /* xor node needs to get - * at RAID information */ - xorNodes[0].results[0] = readParityNodes[0].params[1].p; - if (nfaults == 2) { - rf_InitNode(&qNodes[0], rf_wait, RF_FALSE, qfunc, undoFunc, NULL, numParityNodes, numParityNodes + numDataNodes, (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, qname, allocList); - for (i = 0; i < numDataNodes; i++) { - /* set up params related to Rod */ - qNodes[0].params[2 * i + 0] = readDataNodes[i].params[0]; /* pda */ - qNodes[0].params[2 * i + 1] = readDataNodes[i].params[1]; /* buffer pointer */ - } - /* and read old q */ - qNodes[0].params[2 * numDataNodes + 0] = readQNodes[0].params[0]; /* pda */ - qNodes[0].params[2 * numDataNodes + 1] = readQNodes[0].params[1]; /* buffer pointer */ - for (i = 0; i < numDataNodes; i++) { - /* set up params related to Wnd nodes */ - qNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = writeDataNodes[i].params[0]; /* pda */ - qNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = writeDataNodes[i].params[1]; /* buffer pointer */ - } - qNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr; /* xor node needs to get - * at RAID information */ - qNodes[0].results[0] = readQNodes[0].params[1].p; - } - } - - /* initialize nodes which write new parity (Wnp) */ - pda = asmap->parityInfo; - for (i = 0; i < numParityNodes; i++) { - rf_InitNode(&writeParityNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, numParityNodes, 4, 0, dag_h, "Wnp", allocList); - RF_ASSERT(pda != NULL); - writeParityNodes[i].params[0].p = pda; /* param 1 (bufPtr) - * filled in by xor node */ - writeParityNodes[i].params[1].p = xorNodes[i].results[0]; /* buffer pointer for - * parity write - * operation */ - writeParityNodes[i].params[2].v = parityStripeID; - writeParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - - if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockParityNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Unp", allocList); - unlockParityNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - unlockParityNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru); - } - pda = pda->next; - } - - /* initialize nodes which write new Q (Wnq) */ - if (nfaults == 2) { - pda = asmap->qInfo; - for (i = 0; i < numParityNodes; i++) { - rf_InitNode(&writeQNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, numParityNodes, 4, 0, dag_h, "Wnq", allocList); - RF_ASSERT(pda != NULL); - writeQNodes[i].params[0].p = pda; /* param 1 (bufPtr) - * filled in by xor node */ - writeQNodes[i].params[1].p = qNodes[i].results[0]; /* buffer pointer for - * parity write - * operation */ - writeQNodes[i].params[2].v = parityStripeID; - writeQNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - - if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockQNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Unq", allocList); - unlockQNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - unlockQNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru); - } - pda = pda->next; - } - } - /* Step 4. connect the nodes */ - - /* connect header to block node */ - dag_h->succedents[0] = blockNode; - - /* connect block node to read old data nodes */ - RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults))); - for (i = 0; i < numDataNodes; i++) { - blockNode->succedents[i] = &readDataNodes[i]; - RF_ASSERT(readDataNodes[i].numAntecedents == 1); - readDataNodes[i].antecedents[0] = blockNode; - readDataNodes[i].antType[0] = rf_control; - } - - /* connect block node to read old parity nodes */ - for (i = 0; i < numParityNodes; i++) { - blockNode->succedents[numDataNodes + i] = &readParityNodes[i]; - RF_ASSERT(readParityNodes[i].numAntecedents == 1); - readParityNodes[i].antecedents[0] = blockNode; - readParityNodes[i].antType[0] = rf_control; - } - - /* connect block node to read old Q nodes */ - if (nfaults == 2) - for (i = 0; i < numParityNodes; i++) { - blockNode->succedents[numDataNodes + numParityNodes + i] = &readQNodes[i]; - RF_ASSERT(readQNodes[i].numAntecedents == 1); - readQNodes[i].antecedents[0] = blockNode; - readQNodes[i].antType[0] = rf_control; - } - - /* connect read old data nodes to write new data nodes */ - for (i = 0; i < numDataNodes; i++) { - RF_ASSERT(readDataNodes[i].numSuccedents == ((nfaults * numParityNodes) + 1)); - RF_ASSERT(writeDataNodes[i].numAntecedents == 1); - readDataNodes[i].succedents[0] = &writeDataNodes[i]; - writeDataNodes[i].antecedents[0] = &readDataNodes[i]; - writeDataNodes[i].antType[0] = rf_antiData; - } - - /* connect read old data nodes to xor nodes */ - for (i = 0; i < numDataNodes; i++) { - for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(xorNodes[j].numAntecedents == numDataNodes + numParityNodes); - readDataNodes[i].succedents[1 + j] = &xorNodes[j]; - xorNodes[j].antecedents[i] = &readDataNodes[i]; - xorNodes[j].antType[i] = rf_trueData; - } - } - - /* connect read old data nodes to q nodes */ - if (nfaults == 2) - for (i = 0; i < numDataNodes; i++) - for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(qNodes[j].numAntecedents == numDataNodes + numParityNodes); - readDataNodes[i].succedents[1 + numParityNodes + j] = &qNodes[j]; - qNodes[j].antecedents[i] = &readDataNodes[i]; - qNodes[j].antType[i] = rf_trueData; - } - - /* connect read old parity nodes to xor nodes */ - for (i = 0; i < numParityNodes; i++) { - for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(readParityNodes[i].numSuccedents == numParityNodes); - readParityNodes[i].succedents[j] = &xorNodes[j]; - xorNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i]; - xorNodes[j].antType[numDataNodes + i] = rf_trueData; - } - } - - /* connect read old q nodes to q nodes */ - if (nfaults == 2) - for (i = 0; i < numParityNodes; i++) { - for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(readQNodes[i].numSuccedents == numParityNodes); - readQNodes[i].succedents[j] = &qNodes[j]; - qNodes[j].antecedents[numDataNodes + i] = &readQNodes[i]; - qNodes[j].antType[numDataNodes + i] = rf_trueData; - } - } - - /* connect xor nodes to the write new parity nodes */ - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(writeParityNodes[i].numAntecedents == numParityNodes); - for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(xorNodes[j].numSuccedents == numParityNodes); - xorNodes[i].succedents[j] = &writeParityNodes[j]; - writeParityNodes[j].antecedents[i] = &xorNodes[i]; - writeParityNodes[j].antType[i] = rf_trueData; - } - } - - /* connect q nodes to the write new q nodes */ - if (nfaults == 2) - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(writeQNodes[i].numAntecedents == numParityNodes); - for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(qNodes[j].numSuccedents == 1); - qNodes[i].succedents[j] = &writeQNodes[j]; - writeQNodes[j].antecedents[i] = &qNodes[i]; - writeQNodes[j].antType[i] = rf_trueData; - } - } - - RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); - RF_ASSERT(termNode->numSuccedents == 0); - for (i = 0; i < numDataNodes; i++) { - if (lu_flag) { - /* connect write new data nodes to unlock nodes */ - RF_ASSERT(writeDataNodes[i].numSuccedents == 1); - RF_ASSERT(unlockDataNodes[i].numAntecedents == 1); - writeDataNodes[i].succedents[0] = &unlockDataNodes[i]; - unlockDataNodes[i].antecedents[0] = &writeDataNodes[i]; - unlockDataNodes[i].antType[0] = rf_control; - - /* connect unlock nodes to term node */ - RF_ASSERT(unlockDataNodes[i].numSuccedents == 1); - unlockDataNodes[i].succedents[0] = termNode; - termNode->antecedents[i] = &unlockDataNodes[i]; - termNode->antType[i] = rf_control; - } else { - /* connect write new data nodes to term node */ - RF_ASSERT(writeDataNodes[i].numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); - writeDataNodes[i].succedents[0] = termNode; - termNode->antecedents[i] = &writeDataNodes[i]; - termNode->antType[i] = rf_control; - } - } - - for (i = 0; i < numParityNodes; i++) { - if (lu_flag) { - /* connect write new parity nodes to unlock nodes */ - RF_ASSERT(writeParityNodes[i].numSuccedents == 1); - RF_ASSERT(unlockParityNodes[i].numAntecedents == 1); - writeParityNodes[i].succedents[0] = &unlockParityNodes[i]; - unlockParityNodes[i].antecedents[0] = &writeParityNodes[i]; - unlockParityNodes[i].antType[0] = rf_control; - - /* connect unlock nodes to term node */ - RF_ASSERT(unlockParityNodes[i].numSuccedents == 1); - unlockParityNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + i] = &unlockParityNodes[i]; - termNode->antType[numDataNodes + i] = rf_control; - } else { - RF_ASSERT(writeParityNodes[i].numSuccedents == 1); - writeParityNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + i] = &writeParityNodes[i]; - termNode->antType[numDataNodes + i] = rf_control; - } - } - - if (nfaults == 2) - for (i = 0; i < numParityNodes; i++) { - if (lu_flag) { - /* connect write new Q nodes to unlock nodes */ - RF_ASSERT(writeQNodes[i].numSuccedents == 1); - RF_ASSERT(unlockQNodes[i].numAntecedents == 1); - writeQNodes[i].succedents[0] = &unlockQNodes[i]; - unlockQNodes[i].antecedents[0] = &writeQNodes[i]; - unlockQNodes[i].antType[0] = rf_control; - - /* connect unlock nodes to unblock node */ - RF_ASSERT(unlockQNodes[i].numSuccedents == 1); - unlockQNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + numParityNodes + i] = &unlockQNodes[i]; - termNode->antType[numDataNodes + numParityNodes + i] = rf_control; - } else { - RF_ASSERT(writeQNodes[i].numSuccedents == 1); - writeQNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + numParityNodes + i] = &writeQNodes[i]; - termNode->antType[numDataNodes + numParityNodes + i] = rf_control; - } - } -} - - - -/****************************************************************************** - * create a write graph (fault-free or degraded) for RAID level 1 - * - * Hdr Nil -> Wpd -> Nil -> Trm - * Nil -> Wsd -> - * - * The "Wpd" node writes data to the primary copy in the mirror pair - * The "Wsd" node writes data to the secondary copy in the mirror pair - * - * Parameters: raidPtr - description of the physical array - * asmap - logical & physical addresses for this access - * bp - buffer ptr (holds write data) - * flags - general flags (e.g. disk locking) - * allocList - list of memory allocated in DAG creation - *****************************************************************************/ - -void -rf_CreateRaidOneWriteDAGFwd( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) -{ - RF_DagNode_t *blockNode, *unblockNode, *termNode; - RF_DagNode_t *nodes, *wndNode, *wmirNode; - int nWndNodes, nWmirNodes, i; - RF_ReconUnitNum_t which_ru; - RF_PhysDiskAddr_t *pda, *pdaP; - RF_StripeNum_t parityStripeID; - - parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), - asmap->raidAddress, &which_ru); - if (rf_dagDebug) { - printf("[Creating RAID level 1 write DAG]\n"); - } - nWmirNodes = (asmap->parityInfo->next) ? 2 : 1; /* 2 implies access not - * SU aligned */ - nWndNodes = (asmap->physInfo->next) ? 2 : 1; - - /* alloc the Wnd nodes and the Wmir node */ - if (asmap->numDataFailed == 1) - nWndNodes--; - if (asmap->numParityFailed == 1) - nWmirNodes--; - - /* total number of nodes = nWndNodes + nWmirNodes + (block + unblock + - * terminator) */ - RF_CallocAndAdd(nodes, nWndNodes + nWmirNodes + 3, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - i = 0; - wndNode = &nodes[i]; - i += nWndNodes; - wmirNode = &nodes[i]; - i += nWmirNodes; - blockNode = &nodes[i]; - i += 1; - unblockNode = &nodes[i]; - i += 1; - termNode = &nodes[i]; - i += 1; - RF_ASSERT(i == (nWndNodes + nWmirNodes + 3)); - - /* this dag can commit immediately */ - dag_h->numCommitNodes = 0; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* initialize the unblock and term nodes */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, (nWndNodes + nWmirNodes), 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, (nWndNodes + nWmirNodes), 0, 0, dag_h, "Nil", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - - /* initialize the wnd nodes */ - if (nWndNodes > 0) { - pda = asmap->physInfo; - for (i = 0; i < nWndNodes; i++) { - rf_InitNode(&wndNode[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wpd", allocList); - RF_ASSERT(pda != NULL); - wndNode[i].params[0].p = pda; - wndNode[i].params[1].p = pda->bufPtr; - wndNode[i].params[2].v = parityStripeID; - wndNode[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - pda = pda->next; - } - RF_ASSERT(pda == NULL); - } - /* initialize the mirror nodes */ - if (nWmirNodes > 0) { - pda = asmap->physInfo; - pdaP = asmap->parityInfo; - for (i = 0; i < nWmirNodes; i++) { - rf_InitNode(&wmirNode[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wsd", allocList); - RF_ASSERT(pda != NULL); - wmirNode[i].params[0].p = pdaP; - wmirNode[i].params[1].p = pda->bufPtr; - wmirNode[i].params[2].v = parityStripeID; - wmirNode[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - pda = pda->next; - pdaP = pdaP->next; - } - RF_ASSERT(pda == NULL); - RF_ASSERT(pdaP == NULL); - } - /* link the header node to the block node */ - RF_ASSERT(dag_h->numSuccedents == 1); - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - /* link the block node to the write nodes */ - RF_ASSERT(blockNode->numSuccedents == (nWndNodes + nWmirNodes)); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNode[i].numAntecedents == 1); - blockNode->succedents[i] = &wndNode[i]; - wndNode[i].antecedents[0] = blockNode; - wndNode[i].antType[0] = rf_control; - } - for (i = 0; i < nWmirNodes; i++) { - RF_ASSERT(wmirNode[i].numAntecedents == 1); - blockNode->succedents[i + nWndNodes] = &wmirNode[i]; - wmirNode[i].antecedents[0] = blockNode; - wmirNode[i].antType[0] = rf_control; - } - - /* link the write nodes to the unblock node */ - RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes)); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNode[i].numSuccedents == 1); - wndNode[i].succedents[0] = unblockNode; - unblockNode->antecedents[i] = &wndNode[i]; - unblockNode->antType[i] = rf_control; - } - for (i = 0; i < nWmirNodes; i++) { - RF_ASSERT(wmirNode[i].numSuccedents == 1); - wmirNode[i].succedents[0] = unblockNode; - unblockNode->antecedents[i + nWndNodes] = &wmirNode[i]; - unblockNode->antType[i + nWndNodes] = rf_control; - } - - /* link the unblock node to the term node */ - RF_ASSERT(unblockNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - unblockNode->succedents[0] = termNode; - termNode->antecedents[0] = unblockNode; - termNode->antType[0] = rf_control; - - return; -} diff --git a/sys/dev/raidframe/rf_dagffwr.h b/sys/dev/raidframe/rf_dagffwr.h deleted file mode 100644 index f65875e..0000000 --- a/sys/dev/raidframe/rf_dagffwr.h +++ /dev/null @@ -1,77 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_dagffwr.h,v 1.3 1999/02/05 00:06:08 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_DAGFFWR_H_ -#define _RF__RF_DAGFFWR_H_ - -#include <dev/raidframe/rf_types.h> - -/* fault-free write DAG creation routines */ -void -rf_CreateNonRedundantWriteDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, - RF_IoType_t type); -void -rf_CreateRAID0WriteDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, RF_IoType_t type); -void -rf_CreateSmallWriteDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList); -void -rf_CreateLargeWriteDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList); -void -rf_CommonCreateLargeWriteDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, int nfaults, - int (*redFunc) (RF_DagNode_t *), int allowBufferRecycle); - void rf_CommonCreateLargeWriteDAGFwd(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, int nfaults, - int (*redFunc) (RF_DagNode_t *), int allowBufferRecycle); - void rf_CommonCreateSmallWriteDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, - RF_RedFuncs_t * pfuncs, RF_RedFuncs_t * qfuncs); - void rf_CommonCreateSmallWriteDAGFwd(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, - RF_RedFuncs_t * pfuncs, RF_RedFuncs_t * qfuncs); - void rf_CreateRaidOneWriteDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList); - void rf_CreateRaidOneWriteDAGFwd(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); - -#endif /* !_RF__RF_DAGFFWR_H_ */ diff --git a/sys/dev/raidframe/rf_dagflags.h b/sys/dev/raidframe/rf_dagflags.h deleted file mode 100644 index b0777bd..0000000 --- a/sys/dev/raidframe/rf_dagflags.h +++ /dev/null @@ -1,68 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_dagflags.h,v 1.3 1999/02/05 00:06:08 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/************************************************************************************** - * - * dagflags.h -- flags that can be given to DoAccess - * I pulled these out of dag.h because routines that call DoAccess may need these flags, - * but certainly do not need the declarations related to the DAG data structures. - * - **************************************************************************************/ - - -#ifndef _RF__RF_DAGFLAGS_H_ -#define _RF__RF_DAGFLAGS_H_ - -/* - * Bitmasks for the "flags" parameter (RF_RaidAccessFlags_t) used - * by DoAccess, SelectAlgorithm, and the DAG creation routines. - * - * If USE_DAG or USE_ASM is specified, neither the DAG nor the ASM - * will be modified, which means that you can't SUPRESS if you - * specify USE_DAG. - */ - -#define RF_DAG_FLAGS_NONE 0 /* no flags */ -#define RF_DAG_SUPPRESS_LOCKS (1<<0) /* supress all stripe locks in - * the DAG */ -#define RF_DAG_RETURN_ASM (1<<1) /* create an ASM and return it - * instead of freeing it */ -#define RF_DAG_RETURN_DAG (1<<2) /* create a DAG and return it - * instead of freeing it */ -#define RF_DAG_NONBLOCKING_IO (1<<3) /* cause DoAccess to be - * non-blocking */ -#define RF_DAG_ACCESS_COMPLETE (1<<4) /* the access is complete */ -#define RF_DAG_DISPATCH_RETURNED (1<<5) /* used to handle the case - * where the dag invokes no - * I/O */ -#define RF_DAG_TEST_ACCESS (1<<6) /* this access came through - * rf_ioctl instead of - * rf_strategy */ - -#endif /* !_RF__RF_DAGFLAGS_H_ */ diff --git a/sys/dev/raidframe/rf_dagfuncs.c b/sys/dev/raidframe/rf_dagfuncs.c deleted file mode 100644 index a1ba8150..0000000 --- a/sys/dev/raidframe/rf_dagfuncs.c +++ /dev/null @@ -1,906 +0,0 @@ -/* $NetBSD: rf_dagfuncs.c,v 1.7 2001/02/03 12:51:10 mrg Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * dagfuncs.c -- DAG node execution routines - * - * Rules: - * 1. Every DAG execution function must eventually cause node->status to - * get set to "good" or "bad", and "FinishNode" to be called. In the - * case of nodes that complete immediately (xor, NullNodeFunc, etc), - * the node execution function can do these two things directly. In - * the case of nodes that have to wait for some event (a disk read to - * complete, a lock to be released, etc) to occur before they can - * complete, this is typically achieved by having whatever module - * is doing the operation call GenericWakeupFunc upon completion. - * 2. DAG execution functions should check the status in the DAG header - * and NOP out their operations if the status is not "enable". However, - * execution functions that release resources must be sure to release - * them even when they NOP out the function that would use them. - * Functions that acquire resources should go ahead and acquire them - * even when they NOP, so that a downstream release node will not have - * to check to find out whether or not the acquire was suppressed. - */ - -#include <sys/param.h> -#if defined(__NetBSD__) -#include <sys/ioctl.h> -#elif defined(__FreeBSD__) -#include <sys/ioccom.h> -#include <sys/filio.h> -#endif - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_layout.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_acctrace.h> -#include <dev/raidframe/rf_diskqueue.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_engine.h> -#include <dev/raidframe/rf_dagutils.h> - -#include <dev/raidframe/rf_kintf.h> - -#if RF_INCLUDE_PARITYLOGGING > 0 -#include <dev/raidframe/rf_paritylog.h> -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ - -int (*rf_DiskReadFunc) (RF_DagNode_t *); -int (*rf_DiskWriteFunc) (RF_DagNode_t *); -int (*rf_DiskReadUndoFunc) (RF_DagNode_t *); -int (*rf_DiskWriteUndoFunc) (RF_DagNode_t *); -int (*rf_DiskUnlockFunc) (RF_DagNode_t *); -int (*rf_DiskUnlockUndoFunc) (RF_DagNode_t *); -int (*rf_RegularXorUndoFunc) (RF_DagNode_t *); -int (*rf_SimpleXorUndoFunc) (RF_DagNode_t *); -int (*rf_RecoveryXorUndoFunc) (RF_DagNode_t *); - -/***************************************************************************************** - * main (only) configuration routine for this module - ****************************************************************************************/ -int -rf_ConfigureDAGFuncs(listp) - RF_ShutdownList_t **listp; -{ - RF_ASSERT(((sizeof(long) == 8) && RF_LONGSHIFT == 3) || ((sizeof(long) == 4) && RF_LONGSHIFT == 2)); - rf_DiskReadFunc = rf_DiskReadFuncForThreads; - rf_DiskReadUndoFunc = rf_DiskUndoFunc; - rf_DiskWriteFunc = rf_DiskWriteFuncForThreads; - rf_DiskWriteUndoFunc = rf_DiskUndoFunc; - rf_DiskUnlockFunc = rf_DiskUnlockFuncForThreads; - rf_DiskUnlockUndoFunc = rf_NullNodeUndoFunc; - rf_RegularXorUndoFunc = rf_NullNodeUndoFunc; - rf_SimpleXorUndoFunc = rf_NullNodeUndoFunc; - rf_RecoveryXorUndoFunc = rf_NullNodeUndoFunc; - return (0); -} - - - -/***************************************************************************************** - * the execution function associated with a terminate node - ****************************************************************************************/ -int -rf_TerminateFunc(node) - RF_DagNode_t *node; -{ - RF_ASSERT(node->dagHdr->numCommits == node->dagHdr->numCommitNodes); - node->status = rf_good; - return (rf_FinishNode(node, RF_THREAD_CONTEXT)); -} - -int -rf_TerminateUndoFunc(node) - RF_DagNode_t *node; -{ - return (0); -} - - -/***************************************************************************************** - * execution functions associated with a mirror node - * - * parameters: - * - * 0 - physical disk addres of data - * 1 - buffer for holding read data - * 2 - parity stripe ID - * 3 - flags - * 4 - physical disk address of mirror (parity) - * - ****************************************************************************************/ - -int -rf_DiskReadMirrorIdleFunc(node) - RF_DagNode_t *node; -{ - /* select the mirror copy with the shortest queue and fill in node - * parameters with physical disk address */ - - rf_SelectMirrorDiskIdle(node); - return (rf_DiskReadFunc(node)); -} - -int -rf_DiskReadMirrorPartitionFunc(node) - RF_DagNode_t *node; -{ - /* select the mirror copy with the shortest queue and fill in node - * parameters with physical disk address */ - - rf_SelectMirrorDiskPartition(node); - return (rf_DiskReadFunc(node)); -} - -int -rf_DiskReadMirrorUndoFunc(node) - RF_DagNode_t *node; -{ - return (0); -} - - - -#if RF_INCLUDE_PARITYLOGGING > 0 -/***************************************************************************************** - * the execution function associated with a parity log update node - ****************************************************************************************/ -int -rf_ParityLogUpdateFunc(node) - RF_DagNode_t *node; -{ - RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; - caddr_t buf = (caddr_t) node->params[1].p; - RF_ParityLogData_t *logData; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - - if (node->dagHdr->status == rf_enable) { - RF_ETIMER_START(timer); - logData = rf_CreateParityLogData(RF_UPDATE, pda, buf, - (RF_Raid_t *) (node->dagHdr->raidPtr), - node->wakeFunc, (void *) node, - node->dagHdr->tracerec, timer); - if (logData) - rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE); - else { - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->plog_us += RF_ETIMER_VAL_US(timer); - (node->wakeFunc) (node, ENOMEM); - } - } - return (0); -} - - -/***************************************************************************************** - * the execution function associated with a parity log overwrite node - ****************************************************************************************/ -int -rf_ParityLogOverwriteFunc(node) - RF_DagNode_t *node; -{ - RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; - caddr_t buf = (caddr_t) node->params[1].p; - RF_ParityLogData_t *logData; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - - if (node->dagHdr->status == rf_enable) { - RF_ETIMER_START(timer); - logData = rf_CreateParityLogData(RF_OVERWRITE, pda, buf, (RF_Raid_t *) (node->dagHdr->raidPtr), - node->wakeFunc, (void *) node, node->dagHdr->tracerec, timer); - if (logData) - rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE); - else { - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->plog_us += RF_ETIMER_VAL_US(timer); - (node->wakeFunc) (node, ENOMEM); - } - } - return (0); -} -#else /* RF_INCLUDE_PARITYLOGGING > 0 */ - -int -rf_ParityLogUpdateFunc(node) - RF_DagNode_t *node; -{ - return (0); -} -int -rf_ParityLogOverwriteFunc(node) - RF_DagNode_t *node; -{ - return (0); -} -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ - -int -rf_ParityLogUpdateUndoFunc(node) - RF_DagNode_t *node; -{ - return (0); -} - -int -rf_ParityLogOverwriteUndoFunc(node) - RF_DagNode_t *node; -{ - return (0); -} -/***************************************************************************************** - * the execution function associated with a NOP node - ****************************************************************************************/ -int -rf_NullNodeFunc(node) - RF_DagNode_t *node; -{ - node->status = rf_good; - return (rf_FinishNode(node, RF_THREAD_CONTEXT)); -} - -int -rf_NullNodeUndoFunc(node) - RF_DagNode_t *node; -{ - node->status = rf_undone; - return (rf_FinishNode(node, RF_THREAD_CONTEXT)); -} - - -/***************************************************************************************** - * the execution function associated with a disk-read node - ****************************************************************************************/ -int -rf_DiskReadFuncForThreads(node) - RF_DagNode_t *node; -{ - RF_DiskQueueData_t *req; - RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; - caddr_t buf = (caddr_t) node->params[1].p; - RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v; - unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v); - unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v); - unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); - unsigned which_ru = RF_EXTRACT_RU(node->params[3].v); - RF_DiskQueueDataFlags_t flags = 0; - RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_READ : RF_IO_TYPE_NOP; - RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; - void *b_proc = NULL; - -#if defined(__NetBSD__) - if (node->dagHdr->bp) - b_proc = (void *) ((RF_Buf_t) node->dagHdr->bp)->b_proc; -#endif - - RF_ASSERT(!(lock && unlock)); - flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0; - flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0; - - req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector, - buf, parityStripeID, which_ru, - (int (*) (void *, int)) node->wakeFunc, - node, NULL, node->dagHdr->tracerec, - (void *) (node->dagHdr->raidPtr), flags, b_proc); - if (!req) { - (node->wakeFunc) (node, ENOMEM); - } else { - node->dagFuncData = (void *) req; - rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority); - } - return (0); -} - - -/***************************************************************************************** - * the execution function associated with a disk-write node - ****************************************************************************************/ -int -rf_DiskWriteFuncForThreads(node) - RF_DagNode_t *node; -{ - RF_DiskQueueData_t *req; - RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; - caddr_t buf = (caddr_t) node->params[1].p; - RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v; - unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v); - unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v); - unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); - unsigned which_ru = RF_EXTRACT_RU(node->params[3].v); - RF_DiskQueueDataFlags_t flags = 0; - RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_WRITE : RF_IO_TYPE_NOP; - RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; - void *b_proc = NULL; - -#if defined(__NetBSD__) - if (node->dagHdr->bp) - b_proc = (void *) ((RF_Buf_t) node->dagHdr->bp)->b_proc; -#endif - - /* normal processing (rollaway or forward recovery) begins here */ - RF_ASSERT(!(lock && unlock)); - flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0; - flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0; - req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector, - buf, parityStripeID, which_ru, - (int (*) (void *, int)) node->wakeFunc, - (void *) node, NULL, - node->dagHdr->tracerec, - (void *) (node->dagHdr->raidPtr), - flags, b_proc); - - if (!req) { - (node->wakeFunc) (node, ENOMEM); - } else { - node->dagFuncData = (void *) req; - rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority); - } - - return (0); -} -/***************************************************************************************** - * the undo function for disk nodes - * Note: this is not a proper undo of a write node, only locks are released. - * old data is not restored to disk! - ****************************************************************************************/ -int -rf_DiskUndoFunc(node) - RF_DagNode_t *node; -{ - RF_DiskQueueData_t *req; - RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; - RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; - - req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, - 0L, 0, NULL, 0L, 0, - (int (*) (void *, int)) node->wakeFunc, - (void *) node, - NULL, node->dagHdr->tracerec, - (void *) (node->dagHdr->raidPtr), - RF_UNLOCK_DISK_QUEUE, NULL); - if (!req) - (node->wakeFunc) (node, ENOMEM); - else { - node->dagFuncData = (void *) req; - rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, RF_IO_NORMAL_PRIORITY); - } - - return (0); -} -/***************************************************************************************** - * the execution function associated with an "unlock disk queue" node - ****************************************************************************************/ -int -rf_DiskUnlockFuncForThreads(node) - RF_DagNode_t *node; -{ - RF_DiskQueueData_t *req; - RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; - RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; - - req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, - 0L, 0, NULL, 0L, 0, - (int (*) (void *, int)) node->wakeFunc, - (void *) node, - NULL, node->dagHdr->tracerec, - (void *) (node->dagHdr->raidPtr), - RF_UNLOCK_DISK_QUEUE, NULL); - if (!req) - (node->wakeFunc) (node, ENOMEM); - else { - node->dagFuncData = (void *) req; - rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, RF_IO_NORMAL_PRIORITY); - } - - return (0); -} -/***************************************************************************************** - * Callback routine for DiskRead and DiskWrite nodes. When the disk op completes, - * the routine is called to set the node status and inform the execution engine that - * the node has fired. - ****************************************************************************************/ -int -rf_GenericWakeupFunc(node, status) - RF_DagNode_t *node; - int status; -{ - switch (node->status) { - case rf_bwd1: - node->status = rf_bwd2; - if (node->dagFuncData) - rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData); - return (rf_DiskWriteFuncForThreads(node)); - break; - case rf_fired: - if (status) - node->status = rf_bad; - else - node->status = rf_good; - break; - case rf_recover: - /* probably should never reach this case */ - if (status) - node->status = rf_panic; - else - node->status = rf_undone; - break; - default: - printf("rf_GenericWakeupFunc:"); - printf("node->status is %d,", node->status); - printf("status is %d \n", status); - RF_PANIC(); - break; - } - if (node->dagFuncData) - rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData); - return (rf_FinishNode(node, RF_INTR_CONTEXT)); -} - - -/***************************************************************************************** - * there are three distinct types of xor nodes - * A "regular xor" is used in the fault-free case where the access spans a complete - * stripe unit. It assumes that the result buffer is one full stripe unit in size, - * and uses the stripe-unit-offset values that it computes from the PDAs to determine - * where within the stripe unit to XOR each argument buffer. - * - * A "simple xor" is used in the fault-free case where the access touches only a portion - * of one (or two, in some cases) stripe unit(s). It assumes that all the argument - * buffers are of the same size and have the same stripe unit offset. - * - * A "recovery xor" is used in the degraded-mode case. It's similar to the regular - * xor function except that it takes the failed PDA as an additional parameter, and - * uses it to determine what portions of the argument buffers need to be xor'd into - * the result buffer, and where in the result buffer they should go. - ****************************************************************************************/ - -/* xor the params together and store the result in the result field. - * assume the result field points to a buffer that is the size of one SU, - * and use the pda params to determine where within the buffer to XOR - * the input buffers. - */ -int -rf_RegularXorFunc(node) - RF_DagNode_t *node; -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - int i, retcode; - - retcode = 0; - if (node->dagHdr->status == rf_enable) { - /* don't do the XOR if the input is the same as the output */ - RF_ETIMER_START(timer); - for (i = 0; i < node->numParams - 1; i += 2) - if (node->params[i + 1].p != node->results[0]) { - retcode = rf_XorIntoBuffer(raidPtr, (RF_PhysDiskAddr_t *) node->params[i].p, - (char *) node->params[i + 1].p, (char *) node->results[0], node->dagHdr->bp); - } - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->xor_us += RF_ETIMER_VAL_US(timer); - } - return (rf_GenericWakeupFunc(node, retcode)); /* call wake func - * explicitly since no - * I/O in this node */ -} -/* xor the inputs into the result buffer, ignoring placement issues */ -int -rf_SimpleXorFunc(node) - RF_DagNode_t *node; -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; - int i, retcode = 0; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - - if (node->dagHdr->status == rf_enable) { - RF_ETIMER_START(timer); - /* don't do the XOR if the input is the same as the output */ - for (i = 0; i < node->numParams - 1; i += 2) - if (node->params[i + 1].p != node->results[0]) { - retcode = rf_bxor((char *)node->params[i + 1].p, - (char *)node->results[0], - rf_RaidAddressToByte(raidPtr, - ((RF_PhysDiskAddr_t *)node->params[i].p)-> - numSector), (RF_Buf_t)node->dagHdr->bp); - } - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->xor_us += RF_ETIMER_VAL_US(timer); - } - return (rf_GenericWakeupFunc(node, retcode)); /* call wake func - * explicitly since no - * I/O in this node */ -} -/* this xor is used by the degraded-mode dag functions to recover lost data. - * the second-to-last parameter is the PDA for the failed portion of the access. - * the code here looks at this PDA and assumes that the xor target buffer is - * equal in size to the number of sectors in the failed PDA. It then uses - * the other PDAs in the parameter list to determine where within the target - * buffer the corresponding data should be xored. - */ -int -rf_RecoveryXorFunc(node) - RF_DagNode_t *node; -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; - RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; - int i, retcode = 0; - RF_PhysDiskAddr_t *pda; - int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); - char *srcbuf, *destbuf; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - - if (node->dagHdr->status == rf_enable) { - RF_ETIMER_START(timer); - for (i = 0; i < node->numParams - 2; i += 2) - if (node->params[i + 1].p != node->results[0]) { - pda = (RF_PhysDiskAddr_t *) node->params[i].p; - srcbuf = (char *) node->params[i + 1].p; - suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); - destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); - retcode = rf_bxor(srcbuf, destbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), node->dagHdr->bp); - } - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->xor_us += RF_ETIMER_VAL_US(timer); - } - return (rf_GenericWakeupFunc(node, retcode)); -} -/***************************************************************************************** - * The next three functions are utilities used by the above xor-execution functions. - ****************************************************************************************/ - - -/* - * this is just a glorified buffer xor. targbuf points to a buffer that is one full stripe unit - * in size. srcbuf points to a buffer that may be less than 1 SU, but never more. When the - * access described by pda is one SU in size (which by implication means it's SU-aligned), - * all that happens is (targbuf) <- (srcbuf ^ targbuf). When the access is less than one - * SU in size the XOR occurs on only the portion of targbuf identified in the pda. - */ - -int -rf_XorIntoBuffer(raidPtr, pda, srcbuf, targbuf, bp) - RF_Raid_t *raidPtr; - RF_PhysDiskAddr_t *pda; - char *srcbuf; - char *targbuf; - void *bp; -{ - char *targptr; - int sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; - int SUOffset = pda->startSector % sectPerSU; - int length, retcode = 0; - - RF_ASSERT(pda->numSector <= sectPerSU); - - targptr = targbuf + rf_RaidAddressToByte(raidPtr, SUOffset); - length = rf_RaidAddressToByte(raidPtr, pda->numSector); - retcode = rf_bxor(srcbuf, targptr, length, bp); - return (retcode); -} -/* it really should be the case that the buffer pointers (returned by malloc) - * are aligned to the natural word size of the machine, so this is the only - * case we optimize for. The length should always be a multiple of the sector - * size, so there should be no problem with leftover bytes at the end. - */ -int -rf_bxor(src, dest, len, bp) - char *src; - char *dest; - int len; - void *bp; -{ - unsigned mask = sizeof(long) - 1, retcode = 0; - - if (!(((unsigned long) src) & mask) && !(((unsigned long) dest) & mask) && !(len & mask)) { - retcode = rf_longword_bxor((unsigned long *) src, (unsigned long *) dest, len >> RF_LONGSHIFT, bp); - } else { - RF_ASSERT(0); - } - return (retcode); -} -/* map a user buffer into kernel space, if necessary */ -#define REMAP_VA(_bp,x,y) (y) = (x) - -/* When XORing in kernel mode, we need to map each user page to kernel space before we can access it. - * We don't want to assume anything about which input buffers are in kernel/user - * space, nor about their alignment, so in each loop we compute the maximum number - * of bytes that we can xor without crossing any page boundaries, and do only this many - * bytes before the next remap. - */ -int -rf_longword_bxor(src, dest, len, bp) - unsigned long *src; - unsigned long *dest; - int len; /* longwords */ - void *bp; -{ - unsigned long *end = src + len; - unsigned long d0, d1, d2, d3, s0, s1, s2, s3; /* temps */ - unsigned long *pg_src, *pg_dest; /* per-page source/dest - * pointers */ - int longs_this_time;/* # longwords to xor in the current iteration */ - - REMAP_VA(bp, src, pg_src); - REMAP_VA(bp, dest, pg_dest); - if (!pg_src || !pg_dest) - return (EFAULT); - - while (len >= 4) { - longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(pg_src), RF_BLIP(pg_dest)) >> RF_LONGSHIFT); /* note len in longwords */ - src += longs_this_time; - dest += longs_this_time; - len -= longs_this_time; - while (longs_this_time >= 4) { - d0 = pg_dest[0]; - d1 = pg_dest[1]; - d2 = pg_dest[2]; - d3 = pg_dest[3]; - s0 = pg_src[0]; - s1 = pg_src[1]; - s2 = pg_src[2]; - s3 = pg_src[3]; - pg_dest[0] = d0 ^ s0; - pg_dest[1] = d1 ^ s1; - pg_dest[2] = d2 ^ s2; - pg_dest[3] = d3 ^ s3; - pg_src += 4; - pg_dest += 4; - longs_this_time -= 4; - } - while (longs_this_time > 0) { /* cannot cross any page - * boundaries here */ - *pg_dest++ ^= *pg_src++; - longs_this_time--; - } - - /* either we're done, or we've reached a page boundary on one - * (or possibly both) of the pointers */ - if (len) { - if (RF_PAGE_ALIGNED(src)) - REMAP_VA(bp, src, pg_src); - if (RF_PAGE_ALIGNED(dest)) - REMAP_VA(bp, dest, pg_dest); - if (!pg_src || !pg_dest) - return (EFAULT); - } - } - while (src < end) { - *pg_dest++ ^= *pg_src++; - src++; - dest++; - len--; - if (RF_PAGE_ALIGNED(src)) - REMAP_VA(bp, src, pg_src); - if (RF_PAGE_ALIGNED(dest)) - REMAP_VA(bp, dest, pg_dest); - } - RF_ASSERT(len == 0); - return (0); -} - - -/* - dst = a ^ b ^ c; - a may equal dst - see comment above longword_bxor -*/ -int -rf_longword_bxor3(dst, a, b, c, len, bp) - unsigned long *dst; - unsigned long *a; - unsigned long *b; - unsigned long *c; - int len; /* length in longwords */ - void *bp; -{ - unsigned long a0, a1, a2, a3, b0, b1, b2, b3; - unsigned long *pg_a, *pg_b, *pg_c, *pg_dst; /* per-page source/dest - * pointers */ - int longs_this_time;/* # longs to xor in the current iteration */ - char dst_is_a = 0; - - REMAP_VA(bp, a, pg_a); - REMAP_VA(bp, b, pg_b); - REMAP_VA(bp, c, pg_c); - if (a == dst) { - pg_dst = pg_a; - dst_is_a = 1; - } else { - REMAP_VA(bp, dst, pg_dst); - } - - /* align dest to cache line. Can't cross a pg boundary on dst here. */ - while ((((unsigned long) pg_dst) & 0x1f)) { - *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; - dst++; - a++; - b++; - c++; - if (RF_PAGE_ALIGNED(a)) { - REMAP_VA(bp, a, pg_a); - if (!pg_a) - return (EFAULT); - } - if (RF_PAGE_ALIGNED(b)) { - REMAP_VA(bp, a, pg_b); - if (!pg_b) - return (EFAULT); - } - if (RF_PAGE_ALIGNED(c)) { - REMAP_VA(bp, a, pg_c); - if (!pg_c) - return (EFAULT); - } - len--; - } - - while (len > 4) { - longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(a), RF_MIN(RF_BLIP(b), RF_MIN(RF_BLIP(c), RF_BLIP(dst)))) >> RF_LONGSHIFT); - a += longs_this_time; - b += longs_this_time; - c += longs_this_time; - dst += longs_this_time; - len -= longs_this_time; - while (longs_this_time >= 4) { - a0 = pg_a[0]; - longs_this_time -= 4; - - a1 = pg_a[1]; - a2 = pg_a[2]; - - a3 = pg_a[3]; - pg_a += 4; - - b0 = pg_b[0]; - b1 = pg_b[1]; - - b2 = pg_b[2]; - b3 = pg_b[3]; - /* start dual issue */ - a0 ^= b0; - b0 = pg_c[0]; - - pg_b += 4; - a1 ^= b1; - - a2 ^= b2; - a3 ^= b3; - - b1 = pg_c[1]; - a0 ^= b0; - - b2 = pg_c[2]; - a1 ^= b1; - - b3 = pg_c[3]; - a2 ^= b2; - - pg_dst[0] = a0; - a3 ^= b3; - pg_dst[1] = a1; - pg_c += 4; - pg_dst[2] = a2; - pg_dst[3] = a3; - pg_dst += 4; - } - while (longs_this_time > 0) { /* cannot cross any page - * boundaries here */ - *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; - longs_this_time--; - } - - if (len) { - if (RF_PAGE_ALIGNED(a)) { - REMAP_VA(bp, a, pg_a); - if (!pg_a) - return (EFAULT); - if (dst_is_a) - pg_dst = pg_a; - } - if (RF_PAGE_ALIGNED(b)) { - REMAP_VA(bp, b, pg_b); - if (!pg_b) - return (EFAULT); - } - if (RF_PAGE_ALIGNED(c)) { - REMAP_VA(bp, c, pg_c); - if (!pg_c) - return (EFAULT); - } - if (!dst_is_a) - if (RF_PAGE_ALIGNED(dst)) { - REMAP_VA(bp, dst, pg_dst); - if (!pg_dst) - return (EFAULT); - } - } - } - while (len) { - *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; - dst++; - a++; - b++; - c++; - if (RF_PAGE_ALIGNED(a)) { - REMAP_VA(bp, a, pg_a); - if (!pg_a) - return (EFAULT); - if (dst_is_a) - pg_dst = pg_a; - } - if (RF_PAGE_ALIGNED(b)) { - REMAP_VA(bp, b, pg_b); - if (!pg_b) - return (EFAULT); - } - if (RF_PAGE_ALIGNED(c)) { - REMAP_VA(bp, c, pg_c); - if (!pg_c) - return (EFAULT); - } - if (!dst_is_a) - if (RF_PAGE_ALIGNED(dst)) { - REMAP_VA(bp, dst, pg_dst); - if (!pg_dst) - return (EFAULT); - } - len--; - } - return (0); -} - -int -rf_bxor3(dst, a, b, c, len, bp) - unsigned char *dst; - unsigned char *a; - unsigned char *b; - unsigned char *c; - unsigned long len; - void *bp; -{ - RF_ASSERT(((RF_UL(dst) | RF_UL(a) | RF_UL(b) | RF_UL(c) | len) & 0x7) == 0); - - return (rf_longword_bxor3((unsigned long *) dst, (unsigned long *) a, - (unsigned long *) b, (unsigned long *) c, len >> RF_LONGSHIFT, bp)); -} diff --git a/sys/dev/raidframe/rf_dagfuncs.h b/sys/dev/raidframe/rf_dagfuncs.h deleted file mode 100644 index da7e8b2..0000000 --- a/sys/dev/raidframe/rf_dagfuncs.h +++ /dev/null @@ -1,90 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_dagfuncs.h,v 1.4 2000/03/30 13:39:07 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, William V. Courtright II, Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************************** - * - * dagfuncs.h -- header file for DAG node execution routines - * - ****************************************************************************************/ - -#ifndef _RF__RF_DAGFUNCS_H_ -#define _RF__RF_DAGFUNCS_H_ - -int rf_ConfigureDAGFuncs(RF_ShutdownList_t ** listp); -int rf_TerminateFunc(RF_DagNode_t * node); -int rf_TerminateUndoFunc(RF_DagNode_t * node); -int rf_DiskReadMirrorIdleFunc(RF_DagNode_t * node); -int rf_DiskReadMirrorPartitionFunc(RF_DagNode_t * node); -int rf_DiskReadMirrorUndoFunc(RF_DagNode_t * node); -int rf_ParityLogUpdateFunc(RF_DagNode_t * node); -int rf_ParityLogOverwriteFunc(RF_DagNode_t * node); -int rf_ParityLogUpdateUndoFunc(RF_DagNode_t * node); -int rf_ParityLogOverwriteUndoFunc(RF_DagNode_t * node); -int rf_NullNodeFunc(RF_DagNode_t * node); -int rf_NullNodeUndoFunc(RF_DagNode_t * node); -int rf_DiskReadFuncForThreads(RF_DagNode_t * node); -int rf_DiskWriteFuncForThreads(RF_DagNode_t * node); -int rf_DiskUndoFunc(RF_DagNode_t * node); -int rf_DiskUnlockFuncForThreads(RF_DagNode_t * node); -int rf_GenericWakeupFunc(RF_DagNode_t * node, int status); -int rf_RegularXorFunc(RF_DagNode_t * node); -int rf_SimpleXorFunc(RF_DagNode_t * node); -int rf_RecoveryXorFunc(RF_DagNode_t * node); -int -rf_XorIntoBuffer(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * pda, char *srcbuf, - char *targbuf, void *bp); -int rf_bxor(char *src, char *dest, int len, void *bp); -int -rf_longword_bxor(unsigned long *src, unsigned long *dest, int len, void *bp); -int -rf_longword_bxor3(unsigned long *dest, unsigned long *a, unsigned long *b, - unsigned long *c, int len, void *bp); -int -rf_bxor3(unsigned char *dst, unsigned char *a, unsigned char *b, - unsigned char *c, unsigned long len, void *bp); - -/* function ptrs defined in ConfigureDAGFuncs() */ -extern int (*rf_DiskReadFunc) (RF_DagNode_t *); -extern int (*rf_DiskWriteFunc) (RF_DagNode_t *); -extern int (*rf_DiskReadUndoFunc) (RF_DagNode_t *); -extern int (*rf_DiskWriteUndoFunc) (RF_DagNode_t *); -extern int (*rf_DiskUnlockFunc) (RF_DagNode_t *); -extern int (*rf_DiskUnlockUndoFunc) (RF_DagNode_t *); -extern int (*rf_SimpleXorUndoFunc) (RF_DagNode_t *); -extern int (*rf_RegularXorUndoFunc) (RF_DagNode_t *); -extern int (*rf_RecoveryXorUndoFunc) (RF_DagNode_t *); - -/* macros for manipulating the param[3] in a read or write node */ -#define RF_CREATE_PARAM3(pri, lk, unlk, wru) (((RF_uint64)(((wru&0xFFFFFF)<<8)|((lk)?0x10:0)|((unlk)?0x20:0)|((pri)&0xF)) )) -#define RF_EXTRACT_PRIORITY(_x_) ((((unsigned) ((unsigned long)(_x_))) >> 0) & 0x0F) -#define RF_EXTRACT_LOCK_FLAG(_x_) ((((unsigned) ((unsigned long)(_x_))) >> 4) & 0x1) -#define RF_EXTRACT_UNLOCK_FLAG(_x_) ((((unsigned) ((unsigned long)(_x_))) >> 5) & 0x1) -#define RF_EXTRACT_RU(_x_) ((((unsigned) ((unsigned long)(_x_))) >> 8) & 0xFFFFFF) - -#endif /* !_RF__RF_DAGFUNCS_H_ */ diff --git a/sys/dev/raidframe/rf_dagutils.c b/sys/dev/raidframe/rf_dagutils.c deleted file mode 100644 index c961870..0000000 --- a/sys/dev/raidframe/rf_dagutils.c +++ /dev/null @@ -1,1299 +0,0 @@ -/* $NetBSD: rf_dagutils.c,v 1.6 1999/12/09 02:26:09 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Authors: Mark Holland, William V. Courtright II, Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/****************************************************************************** - * - * rf_dagutils.c -- utility routines for manipulating dags - * - *****************************************************************************/ - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_freelist.h> -#include <dev/raidframe/rf_map.h> -#include <dev/raidframe/rf_shutdown.h> - -#define SNUM_DIFF(_a_,_b_) (((_a_)>(_b_))?((_a_)-(_b_)):((_b_)-(_a_))) - -RF_RedFuncs_t rf_xorFuncs = { - rf_RegularXorFunc, "Reg Xr", -rf_SimpleXorFunc, "Simple Xr"}; - -RF_RedFuncs_t rf_xorRecoveryFuncs = { - rf_RecoveryXorFunc, "Recovery Xr", -rf_RecoveryXorFunc, "Recovery Xr"}; - -static void rf_RecurPrintDAG(RF_DagNode_t *, int, int); -static void rf_PrintDAG(RF_DagHeader_t *); -static int -rf_ValidateBranch(RF_DagNode_t *, int *, int *, - RF_DagNode_t **, int); -static void rf_ValidateBranchVisitedBits(RF_DagNode_t *, int, int); -static void rf_ValidateVisitedBits(RF_DagHeader_t *); - -/****************************************************************************** - * - * InitNode - initialize a dag node - * - * the size of the propList array is always the same as that of the - * successors array. - * - *****************************************************************************/ -void -rf_InitNode( - RF_DagNode_t * node, - RF_NodeStatus_t initstatus, - int commit, - int (*doFunc) (RF_DagNode_t * node), - int (*undoFunc) (RF_DagNode_t * node), - int (*wakeFunc) (RF_DagNode_t * node, int status), - int nSucc, - int nAnte, - int nParam, - int nResult, - RF_DagHeader_t * hdr, - char *name, - RF_AllocListElem_t * alist) -{ - void **ptrs; - int nptrs; - - if (nAnte > RF_MAX_ANTECEDENTS) - RF_PANIC(); - node->status = initstatus; - node->commitNode = commit; - node->doFunc = doFunc; - node->undoFunc = undoFunc; - node->wakeFunc = wakeFunc; - node->numParams = nParam; - node->numResults = nResult; - node->numAntecedents = nAnte; - node->numAntDone = 0; - node->next = NULL; - node->numSuccedents = nSucc; - node->name = name; - node->dagHdr = hdr; - node->visited = 0; - - /* allocate all the pointers with one call to malloc */ - nptrs = nSucc + nAnte + nResult + nSucc; - - if (nptrs <= RF_DAG_PTRCACHESIZE) { - /* - * The dag_ptrs field of the node is basically some scribble - * space to be used here. We could get rid of it, and always - * allocate the range of pointers, but that's expensive. So, - * we pick a "common case" size for the pointer cache. Hopefully, - * we'll find that: - * (1) Generally, nptrs doesn't exceed RF_DAG_PTRCACHESIZE by - * only a little bit (least efficient case) - * (2) Generally, ntprs isn't a lot less than RF_DAG_PTRCACHESIZE - * (wasted memory) - */ - ptrs = (void **) node->dag_ptrs; - } else { - RF_CallocAndAdd(ptrs, nptrs, sizeof(void *), (void **), alist); - } - node->succedents = (nSucc) ? (RF_DagNode_t **) ptrs : NULL; - node->antecedents = (nAnte) ? (RF_DagNode_t **) (ptrs + nSucc) : NULL; - node->results = (nResult) ? (void **) (ptrs + nSucc + nAnte) : NULL; - node->propList = (nSucc) ? (RF_PropHeader_t **) (ptrs + nSucc + nAnte + nResult) : NULL; - - if (nParam) { - if (nParam <= RF_DAG_PARAMCACHESIZE) { - node->params = (RF_DagParam_t *) node->dag_params; - } else { - RF_CallocAndAdd(node->params, nParam, sizeof(RF_DagParam_t), (RF_DagParam_t *), alist); - } - } else { - node->params = NULL; - } -} - - - -/****************************************************************************** - * - * allocation and deallocation routines - * - *****************************************************************************/ - -void -rf_FreeDAG(dag_h) - RF_DagHeader_t *dag_h; -{ - RF_AccessStripeMapHeader_t *asmap, *t_asmap; - RF_DagHeader_t *nextDag; - int i; - - while (dag_h) { - nextDag = dag_h->next; - for (i = 0; dag_h->memChunk[i] && i < RF_MAXCHUNKS; i++) { - /* release mem chunks */ - rf_ReleaseMemChunk(dag_h->memChunk[i]); - dag_h->memChunk[i] = NULL; - } - - RF_ASSERT(i == dag_h->chunkIndex); - if (dag_h->xtraChunkCnt > 0) { - /* free xtraMemChunks */ - for (i = 0; dag_h->xtraMemChunk[i] && i < dag_h->xtraChunkIndex; i++) { - rf_ReleaseMemChunk(dag_h->xtraMemChunk[i]); - dag_h->xtraMemChunk[i] = NULL; - } - RF_ASSERT(i == dag_h->xtraChunkIndex); - /* free ptrs to xtraMemChunks */ - RF_Free(dag_h->xtraMemChunk, dag_h->xtraChunkCnt * sizeof(RF_ChunkDesc_t *)); - } - rf_FreeAllocList(dag_h->allocList); - for (asmap = dag_h->asmList; asmap;) { - t_asmap = asmap; - asmap = asmap->next; - rf_FreeAccessStripeMap(t_asmap); - } - rf_FreeDAGHeader(dag_h); - dag_h = nextDag; - } -} - -RF_PropHeader_t * -rf_MakePropListEntry( - RF_DagHeader_t * dag_h, - int resultNum, - int paramNum, - RF_PropHeader_t * next, - RF_AllocListElem_t * allocList) -{ - RF_PropHeader_t *p; - - RF_CallocAndAdd(p, 1, sizeof(RF_PropHeader_t), - (RF_PropHeader_t *), allocList); - p->resultNum = resultNum; - p->paramNum = paramNum; - p->next = next; - return (p); -} - -static RF_FreeList_t *rf_dagh_freelist; - -#define RF_MAX_FREE_DAGH 128 -#define RF_DAGH_INC 16 -#define RF_DAGH_INITIAL 32 - -static void rf_ShutdownDAGs(void *); -static void -rf_ShutdownDAGs(ignored) - void *ignored; -{ - RF_FREELIST_DESTROY(rf_dagh_freelist, next, (RF_DagHeader_t *)); -} - -int -rf_ConfigureDAGs(listp) - RF_ShutdownList_t **listp; -{ - int rc; - - RF_FREELIST_CREATE(rf_dagh_freelist, RF_MAX_FREE_DAGH, - RF_DAGH_INC, sizeof(RF_DagHeader_t)); - if (rf_dagh_freelist == NULL) - return (ENOMEM); - rc = rf_ShutdownCreate(listp, rf_ShutdownDAGs, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - rf_ShutdownDAGs(NULL); - return (rc); - } - RF_FREELIST_PRIME(rf_dagh_freelist, RF_DAGH_INITIAL, next, - (RF_DagHeader_t *)); - return (0); -} - -RF_DagHeader_t * -rf_AllocDAGHeader() -{ - RF_DagHeader_t *dh; - - RF_FREELIST_GET(rf_dagh_freelist, dh, next, (RF_DagHeader_t *)); - if (dh) { - bzero((char *) dh, sizeof(RF_DagHeader_t)); - } - return (dh); -} - -void -rf_FreeDAGHeader(RF_DagHeader_t * dh) -{ - RF_FREELIST_FREE(rf_dagh_freelist, dh, next); -} -/* allocates a buffer big enough to hold the data described by pda */ -void * -rf_AllocBuffer( - RF_Raid_t * raidPtr, - RF_DagHeader_t * dag_h, - RF_PhysDiskAddr_t * pda, - RF_AllocListElem_t * allocList) -{ - char *p; - - RF_MallocAndAdd(p, pda->numSector << raidPtr->logBytesPerSector, - (char *), allocList); - return ((void *) p); -} -/****************************************************************************** - * - * debug routines - * - *****************************************************************************/ - -char * -rf_NodeStatusString(RF_DagNode_t * node) -{ - switch (node->status) { - case rf_wait:return ("wait"); - case rf_fired: - return ("fired"); - case rf_good: - return ("good"); - case rf_bad: - return ("bad"); - default: - return ("?"); - } -} - -void -rf_PrintNodeInfoString(RF_DagNode_t * node) -{ - RF_PhysDiskAddr_t *pda; - int (*df) (RF_DagNode_t *) = node->doFunc; - int i, lk, unlk; - void *bufPtr; - - if ((df == rf_DiskReadFunc) || (df == rf_DiskWriteFunc) - || (df == rf_DiskReadMirrorIdleFunc) - || (df == rf_DiskReadMirrorPartitionFunc)) { - pda = (RF_PhysDiskAddr_t *) node->params[0].p; - bufPtr = (void *) node->params[1].p; - lk = RF_EXTRACT_LOCK_FLAG(node->params[3].v); - unlk = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); - RF_ASSERT(!(lk && unlk)); - printf("r %d c %d offs %ld nsect %d buf 0x%lx %s\n", pda->row, pda->col, - (long) pda->startSector, (int) pda->numSector, (long) bufPtr, - (lk) ? "LOCK" : ((unlk) ? "UNLK" : " ")); - return; - } - if (df == rf_DiskUnlockFunc) { - pda = (RF_PhysDiskAddr_t *) node->params[0].p; - lk = RF_EXTRACT_LOCK_FLAG(node->params[3].v); - unlk = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); - RF_ASSERT(!(lk && unlk)); - printf("r %d c %d %s\n", pda->row, pda->col, - (lk) ? "LOCK" : ((unlk) ? "UNLK" : "nop")); - return; - } - if ((df == rf_SimpleXorFunc) || (df == rf_RegularXorFunc) - || (df == rf_RecoveryXorFunc)) { - printf("result buf 0x%lx\n", (long) node->results[0]); - for (i = 0; i < node->numParams - 1; i += 2) { - pda = (RF_PhysDiskAddr_t *) node->params[i].p; - bufPtr = (RF_PhysDiskAddr_t *) node->params[i + 1].p; - printf(" buf 0x%lx r%d c%d offs %ld nsect %d\n", - (long) bufPtr, pda->row, pda->col, - (long) pda->startSector, (int) pda->numSector); - } - return; - } -#if RF_INCLUDE_PARITYLOGGING > 0 - if (df == rf_ParityLogOverwriteFunc || df == rf_ParityLogUpdateFunc) { - for (i = 0; i < node->numParams - 1; i += 2) { - pda = (RF_PhysDiskAddr_t *) node->params[i].p; - bufPtr = (RF_PhysDiskAddr_t *) node->params[i + 1].p; - printf(" r%d c%d offs %ld nsect %d buf 0x%lx\n", - pda->row, pda->col, (long) pda->startSector, - (int) pda->numSector, (long) bufPtr); - } - return; - } -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ - - if ((df == rf_TerminateFunc) || (df == rf_NullNodeFunc)) { - printf("\n"); - return; - } - printf("?\n"); -} - -static void -rf_RecurPrintDAG(node, depth, unvisited) - RF_DagNode_t *node; - int depth; - int unvisited; -{ - char *anttype; - int i; - - node->visited = (unvisited) ? 0 : 1; - printf("(%d) %d C%d %s: %s,s%d %d/%d,a%d/%d,p%d,r%d S{", depth, - node->nodeNum, node->commitNode, node->name, rf_NodeStatusString(node), - node->numSuccedents, node->numSuccFired, node->numSuccDone, - node->numAntecedents, node->numAntDone, node->numParams, node->numResults); - for (i = 0; i < node->numSuccedents; i++) { - printf("%d%s", node->succedents[i]->nodeNum, - ((i == node->numSuccedents - 1) ? "\0" : " ")); - } - printf("} A{"); - for (i = 0; i < node->numAntecedents; i++) { - switch (node->antType[i]) { - case rf_trueData: - anttype = "T"; - break; - case rf_antiData: - anttype = "A"; - break; - case rf_outputData: - anttype = "O"; - break; - case rf_control: - anttype = "C"; - break; - default: - anttype = "?"; - break; - } - printf("%d(%s)%s", node->antecedents[i]->nodeNum, anttype, (i == node->numAntecedents - 1) ? "\0" : " "); - } - printf("}; "); - rf_PrintNodeInfoString(node); - for (i = 0; i < node->numSuccedents; i++) { - if (node->succedents[i]->visited == unvisited) - rf_RecurPrintDAG(node->succedents[i], depth + 1, unvisited); - } -} - -static void -rf_PrintDAG(dag_h) - RF_DagHeader_t *dag_h; -{ - int unvisited, i; - char *status; - - /* set dag status */ - switch (dag_h->status) { - case rf_enable: - status = "enable"; - break; - case rf_rollForward: - status = "rollForward"; - break; - case rf_rollBackward: - status = "rollBackward"; - break; - default: - status = "illegal!"; - break; - } - /* find out if visited bits are currently set or clear */ - unvisited = dag_h->succedents[0]->visited; - - printf("DAG type: %s\n", dag_h->creator); - printf("format is (depth) num commit type: status,nSucc nSuccFired/nSuccDone,nAnte/nAnteDone,nParam,nResult S{x} A{x(type)}; info\n"); - printf("(0) %d Hdr: %s, s%d, (commit %d/%d) S{", dag_h->nodeNum, - status, dag_h->numSuccedents, dag_h->numCommitNodes, dag_h->numCommits); - for (i = 0; i < dag_h->numSuccedents; i++) { - printf("%d%s", dag_h->succedents[i]->nodeNum, - ((i == dag_h->numSuccedents - 1) ? "\0" : " ")); - } - printf("};\n"); - for (i = 0; i < dag_h->numSuccedents; i++) { - if (dag_h->succedents[i]->visited == unvisited) - rf_RecurPrintDAG(dag_h->succedents[i], 1, unvisited); - } -} -/* assigns node numbers */ -int -rf_AssignNodeNums(RF_DagHeader_t * dag_h) -{ - int unvisited, i, nnum; - RF_DagNode_t *node; - - nnum = 0; - unvisited = dag_h->succedents[0]->visited; - - dag_h->nodeNum = nnum++; - for (i = 0; i < dag_h->numSuccedents; i++) { - node = dag_h->succedents[i]; - if (node->visited == unvisited) { - nnum = rf_RecurAssignNodeNums(dag_h->succedents[i], nnum, unvisited); - } - } - return (nnum); -} - -int -rf_RecurAssignNodeNums(node, num, unvisited) - RF_DagNode_t *node; - int num; - int unvisited; -{ - int i; - - node->visited = (unvisited) ? 0 : 1; - - node->nodeNum = num++; - for (i = 0; i < node->numSuccedents; i++) { - if (node->succedents[i]->visited == unvisited) { - num = rf_RecurAssignNodeNums(node->succedents[i], num, unvisited); - } - } - return (num); -} -/* set the header pointers in each node to "newptr" */ -void -rf_ResetDAGHeaderPointers(dag_h, newptr) - RF_DagHeader_t *dag_h; - RF_DagHeader_t *newptr; -{ - int i; - for (i = 0; i < dag_h->numSuccedents; i++) - if (dag_h->succedents[i]->dagHdr != newptr) - rf_RecurResetDAGHeaderPointers(dag_h->succedents[i], newptr); -} - -void -rf_RecurResetDAGHeaderPointers(node, newptr) - RF_DagNode_t *node; - RF_DagHeader_t *newptr; -{ - int i; - node->dagHdr = newptr; - for (i = 0; i < node->numSuccedents; i++) - if (node->succedents[i]->dagHdr != newptr) - rf_RecurResetDAGHeaderPointers(node->succedents[i], newptr); -} - - -void -rf_PrintDAGList(RF_DagHeader_t * dag_h) -{ - int i = 0; - - for (; dag_h; dag_h = dag_h->next) { - rf_AssignNodeNums(dag_h); - printf("\n\nDAG %d IN LIST:\n", i++); - rf_PrintDAG(dag_h); - } -} - -static int -rf_ValidateBranch(node, scount, acount, nodes, unvisited) - RF_DagNode_t *node; - int *scount; - int *acount; - RF_DagNode_t **nodes; - int unvisited; -{ - int i, retcode = 0; - - /* construct an array of node pointers indexed by node num */ - node->visited = (unvisited) ? 0 : 1; - nodes[node->nodeNum] = node; - - if (node->next != NULL) { - printf("INVALID DAG: next pointer in node is not NULL\n"); - retcode = 1; - } - if (node->status != rf_wait) { - printf("INVALID DAG: Node status is not wait\n"); - retcode = 1; - } - if (node->numAntDone != 0) { - printf("INVALID DAG: numAntDone is not zero\n"); - retcode = 1; - } - if (node->doFunc == rf_TerminateFunc) { - if (node->numSuccedents != 0) { - printf("INVALID DAG: Terminator node has succedents\n"); - retcode = 1; - } - } else { - if (node->numSuccedents == 0) { - printf("INVALID DAG: Non-terminator node has no succedents\n"); - retcode = 1; - } - } - for (i = 0; i < node->numSuccedents; i++) { - if (!node->succedents[i]) { - printf("INVALID DAG: succedent %d of node %s is NULL\n", i, node->name); - retcode = 1; - } - scount[node->succedents[i]->nodeNum]++; - } - for (i = 0; i < node->numAntecedents; i++) { - if (!node->antecedents[i]) { - printf("INVALID DAG: antecedent %d of node %s is NULL\n", i, node->name); - retcode = 1; - } - acount[node->antecedents[i]->nodeNum]++; - } - for (i = 0; i < node->numSuccedents; i++) { - if (node->succedents[i]->visited == unvisited) { - if (rf_ValidateBranch(node->succedents[i], scount, - acount, nodes, unvisited)) { - retcode = 1; - } - } - } - return (retcode); -} - -static void -rf_ValidateBranchVisitedBits(node, unvisited, rl) - RF_DagNode_t *node; - int unvisited; - int rl; -{ - int i; - - RF_ASSERT(node->visited == unvisited); - for (i = 0; i < node->numSuccedents; i++) { - if (node->succedents[i] == NULL) { - printf("node=%lx node->succedents[%d] is NULL\n", (long) node, i); - RF_ASSERT(0); - } - rf_ValidateBranchVisitedBits(node->succedents[i], unvisited, rl + 1); - } -} -/* NOTE: never call this on a big dag, because it is exponential - * in execution time - */ -static void -rf_ValidateVisitedBits(dag) - RF_DagHeader_t *dag; -{ - int i, unvisited; - - unvisited = dag->succedents[0]->visited; - - for (i = 0; i < dag->numSuccedents; i++) { - if (dag->succedents[i] == NULL) { - printf("dag=%lx dag->succedents[%d] is NULL\n", (long) dag, i); - RF_ASSERT(0); - } - rf_ValidateBranchVisitedBits(dag->succedents[i], unvisited, 0); - } -} -/* validate a DAG. _at entry_ verify that: - * -- numNodesCompleted is zero - * -- node queue is null - * -- dag status is rf_enable - * -- next pointer is null on every node - * -- all nodes have status wait - * -- numAntDone is zero in all nodes - * -- terminator node has zero successors - * -- no other node besides terminator has zero successors - * -- no successor or antecedent pointer in a node is NULL - * -- number of times that each node appears as a successor of another node - * is equal to the antecedent count on that node - * -- number of times that each node appears as an antecedent of another node - * is equal to the succedent count on that node - * -- what else? - */ -int -rf_ValidateDAG(dag_h) - RF_DagHeader_t *dag_h; -{ - int i, nodecount; - int *scount, *acount;/* per-node successor and antecedent counts */ - RF_DagNode_t **nodes; /* array of ptrs to nodes in dag */ - int retcode = 0; - int unvisited; - int commitNodeCount = 0; - - if (rf_validateVisitedDebug) - rf_ValidateVisitedBits(dag_h); - - if (dag_h->numNodesCompleted != 0) { - printf("INVALID DAG: num nodes completed is %d, should be 0\n", dag_h->numNodesCompleted); - retcode = 1; - goto validate_dag_bad; - } - if (dag_h->status != rf_enable) { - printf("INVALID DAG: not enabled\n"); - retcode = 1; - goto validate_dag_bad; - } - if (dag_h->numCommits != 0) { - printf("INVALID DAG: numCommits != 0 (%d)\n", dag_h->numCommits); - retcode = 1; - goto validate_dag_bad; - } - if (dag_h->numSuccedents != 1) { - /* currently, all dags must have only one succedent */ - printf("INVALID DAG: numSuccedents !1 (%d)\n", dag_h->numSuccedents); - retcode = 1; - goto validate_dag_bad; - } - nodecount = rf_AssignNodeNums(dag_h); - - unvisited = dag_h->succedents[0]->visited; - - RF_Calloc(scount, nodecount, sizeof(int), (int *)); - RF_Calloc(acount, nodecount, sizeof(int), (int *)); - RF_Calloc(nodes, nodecount, sizeof(RF_DagNode_t *), (RF_DagNode_t **)); - for (i = 0; i < dag_h->numSuccedents; i++) { - if ((dag_h->succedents[i]->visited == unvisited) - && rf_ValidateBranch(dag_h->succedents[i], scount, - acount, nodes, unvisited)) { - retcode = 1; - } - } - /* start at 1 to skip the header node */ - for (i = 1; i < nodecount; i++) { - if (nodes[i]->commitNode) - commitNodeCount++; - if (nodes[i]->doFunc == NULL) { - printf("INVALID DAG: node %s has an undefined doFunc\n", nodes[i]->name); - retcode = 1; - goto validate_dag_out; - } - if (nodes[i]->undoFunc == NULL) { - printf("INVALID DAG: node %s has an undefined doFunc\n", nodes[i]->name); - retcode = 1; - goto validate_dag_out; - } - if (nodes[i]->numAntecedents != scount[nodes[i]->nodeNum]) { - printf("INVALID DAG: node %s has %d antecedents but appears as a succedent %d times\n", - nodes[i]->name, nodes[i]->numAntecedents, scount[nodes[i]->nodeNum]); - retcode = 1; - goto validate_dag_out; - } - if (nodes[i]->numSuccedents != acount[nodes[i]->nodeNum]) { - printf("INVALID DAG: node %s has %d succedents but appears as an antecedent %d times\n", - nodes[i]->name, nodes[i]->numSuccedents, acount[nodes[i]->nodeNum]); - retcode = 1; - goto validate_dag_out; - } - } - - if (dag_h->numCommitNodes != commitNodeCount) { - printf("INVALID DAG: incorrect commit node count. hdr->numCommitNodes (%d) found (%d) commit nodes in graph\n", - dag_h->numCommitNodes, commitNodeCount); - retcode = 1; - goto validate_dag_out; - } -validate_dag_out: - RF_Free(scount, nodecount * sizeof(int)); - RF_Free(acount, nodecount * sizeof(int)); - RF_Free(nodes, nodecount * sizeof(RF_DagNode_t *)); - if (retcode) - rf_PrintDAGList(dag_h); - - if (rf_validateVisitedDebug) - rf_ValidateVisitedBits(dag_h); - - return (retcode); - -validate_dag_bad: - rf_PrintDAGList(dag_h); - return (retcode); -} - - -/****************************************************************************** - * - * misc construction routines - * - *****************************************************************************/ - -void -rf_redirect_asm( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap) -{ - int ds = (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) ? 1 : 0; - int row = asmap->physInfo->row; - int fcol = raidPtr->reconControl[row]->fcol; - int srow = raidPtr->reconControl[row]->spareRow; - int scol = raidPtr->reconControl[row]->spareCol; - RF_PhysDiskAddr_t *pda; - - RF_ASSERT(raidPtr->status[row] == rf_rs_reconstructing); - for (pda = asmap->physInfo; pda; pda = pda->next) { - if (pda->col == fcol) { - if (rf_dagDebug) { - if (!rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, - pda->startSector)) { - RF_PANIC(); - } - } - /* printf("Remapped data for large write\n"); */ - if (ds) { - raidPtr->Layout.map->MapSector(raidPtr, pda->raidAddress, - &pda->row, &pda->col, &pda->startSector, RF_REMAP); - } else { - pda->row = srow; - pda->col = scol; - } - } - } - for (pda = asmap->parityInfo; pda; pda = pda->next) { - if (pda->col == fcol) { - if (rf_dagDebug) { - if (!rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, pda->startSector)) { - RF_PANIC(); - } - } - } - if (ds) { - (raidPtr->Layout.map->MapParity) (raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP); - } else { - pda->row = srow; - pda->col = scol; - } - } -} - - -/* this routine allocates read buffers and generates stripe maps for the - * regions of the array from the start of the stripe to the start of the - * access, and from the end of the access to the end of the stripe. It also - * computes and returns the number of DAG nodes needed to read all this data. - * Note that this routine does the wrong thing if the access is fully - * contained within one stripe unit, so we RF_ASSERT against this case at the - * start. - */ -void -rf_MapUnaccessedPortionOfStripe( - RF_Raid_t * raidPtr, - RF_RaidLayout_t * layoutPtr,/* in: layout information */ - RF_AccessStripeMap_t * asmap, /* in: access stripe map */ - RF_DagHeader_t * dag_h, /* in: header of the dag to create */ - RF_AccessStripeMapHeader_t ** new_asm_h, /* in: ptr to array of 2 - * headers, to be filled in */ - int *nRodNodes, /* out: num nodes to be generated to read - * unaccessed data */ - char **sosBuffer, /* out: pointers to newly allocated buffer */ - char **eosBuffer, - RF_AllocListElem_t * allocList) -{ - RF_RaidAddr_t sosRaidAddress, eosRaidAddress; - RF_SectorNum_t sosNumSector, eosNumSector; - - RF_ASSERT(asmap->numStripeUnitsAccessed > (layoutPtr->numDataCol / 2)); - /* generate an access map for the region of the array from start of - * stripe to start of access */ - new_asm_h[0] = new_asm_h[1] = NULL; - *nRodNodes = 0; - if (!rf_RaidAddressStripeAligned(layoutPtr, asmap->raidAddress)) { - sosRaidAddress = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - sosNumSector = asmap->raidAddress - sosRaidAddress; - RF_MallocAndAdd(*sosBuffer, rf_RaidAddressToByte(raidPtr, sosNumSector), (char *), allocList); - new_asm_h[0] = rf_MapAccess(raidPtr, sosRaidAddress, sosNumSector, *sosBuffer, RF_DONT_REMAP); - new_asm_h[0]->next = dag_h->asmList; - dag_h->asmList = new_asm_h[0]; - *nRodNodes += new_asm_h[0]->stripeMap->numStripeUnitsAccessed; - - RF_ASSERT(new_asm_h[0]->stripeMap->next == NULL); - /* we're totally within one stripe here */ - if (asmap->flags & RF_ASM_REDIR_LARGE_WRITE) - rf_redirect_asm(raidPtr, new_asm_h[0]->stripeMap); - } - /* generate an access map for the region of the array from end of - * access to end of stripe */ - if (!rf_RaidAddressStripeAligned(layoutPtr, asmap->endRaidAddress)) { - eosRaidAddress = asmap->endRaidAddress; - eosNumSector = rf_RaidAddressOfNextStripeBoundary(layoutPtr, eosRaidAddress) - eosRaidAddress; - RF_MallocAndAdd(*eosBuffer, rf_RaidAddressToByte(raidPtr, eosNumSector), (char *), allocList); - new_asm_h[1] = rf_MapAccess(raidPtr, eosRaidAddress, eosNumSector, *eosBuffer, RF_DONT_REMAP); - new_asm_h[1]->next = dag_h->asmList; - dag_h->asmList = new_asm_h[1]; - *nRodNodes += new_asm_h[1]->stripeMap->numStripeUnitsAccessed; - - RF_ASSERT(new_asm_h[1]->stripeMap->next == NULL); - /* we're totally within one stripe here */ - if (asmap->flags & RF_ASM_REDIR_LARGE_WRITE) - rf_redirect_asm(raidPtr, new_asm_h[1]->stripeMap); - } -} - - - -/* returns non-zero if the indicated ranges of stripe unit offsets overlap */ -int -rf_PDAOverlap( - RF_RaidLayout_t * layoutPtr, - RF_PhysDiskAddr_t * src, - RF_PhysDiskAddr_t * dest) -{ - RF_SectorNum_t soffs = rf_StripeUnitOffset(layoutPtr, src->startSector); - RF_SectorNum_t doffs = rf_StripeUnitOffset(layoutPtr, dest->startSector); - /* use -1 to be sure we stay within SU */ - RF_SectorNum_t send = rf_StripeUnitOffset(layoutPtr, src->startSector + src->numSector - 1); - RF_SectorNum_t dend = rf_StripeUnitOffset(layoutPtr, dest->startSector + dest->numSector - 1); - return ((RF_MAX(soffs, doffs) <= RF_MIN(send, dend)) ? 1 : 0); -} - - -/* GenerateFailedAccessASMs - * - * this routine figures out what portion of the stripe needs to be read - * to effect the degraded read or write operation. It's primary function - * is to identify everything required to recover the data, and then - * eliminate anything that is already being accessed by the user. - * - * The main result is two new ASMs, one for the region from the start of the - * stripe to the start of the access, and one for the region from the end of - * the access to the end of the stripe. These ASMs describe everything that - * needs to be read to effect the degraded access. Other results are: - * nXorBufs -- the total number of buffers that need to be XORed together to - * recover the lost data, - * rpBufPtr -- ptr to a newly-allocated buffer to hold the parity. If NULL - * at entry, not allocated. - * overlappingPDAs -- - * describes which of the non-failed PDAs in the user access - * overlap data that needs to be read to effect recovery. - * overlappingPDAs[i]==1 if and only if, neglecting the failed - * PDA, the ith pda in the input asm overlaps data that needs - * to be read for recovery. - */ - /* in: asm - ASM for the actual access, one stripe only */ - /* in: faildPDA - which component of the access has failed */ - /* in: dag_h - header of the DAG we're going to create */ - /* out: new_asm_h - the two new ASMs */ - /* out: nXorBufs - the total number of xor bufs required */ - /* out: rpBufPtr - a buffer for the parity read */ -void -rf_GenerateFailedAccessASMs( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_PhysDiskAddr_t * failedPDA, - RF_DagHeader_t * dag_h, - RF_AccessStripeMapHeader_t ** new_asm_h, - int *nXorBufs, - char **rpBufPtr, - char *overlappingPDAs, - RF_AllocListElem_t * allocList) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - - /* s=start, e=end, s=stripe, a=access, f=failed, su=stripe unit */ - RF_RaidAddr_t sosAddr, sosEndAddr, eosStartAddr, eosAddr; - - RF_SectorCount_t numSect[2], numParitySect; - RF_PhysDiskAddr_t *pda; - char *rdBuf, *bufP; - int foundit, i; - - bufP = NULL; - foundit = 0; - /* first compute the following raid addresses: start of stripe, - * (sosAddr) MIN(start of access, start of failed SU), (sosEndAddr) - * MAX(end of access, end of failed SU), (eosStartAddr) end of - * stripe (i.e. start of next stripe) (eosAddr) */ - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - sosEndAddr = RF_MIN(asmap->raidAddress, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, failedPDA->raidAddress)); - eosStartAddr = RF_MAX(asmap->endRaidAddress, rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, failedPDA->raidAddress)); - eosAddr = rf_RaidAddressOfNextStripeBoundary(layoutPtr, asmap->raidAddress); - - /* now generate access stripe maps for each of the above regions of - * the stripe. Use a dummy (NULL) buf ptr for now */ - - new_asm_h[0] = (sosAddr != sosEndAddr) ? rf_MapAccess(raidPtr, sosAddr, sosEndAddr - sosAddr, NULL, RF_DONT_REMAP) : NULL; - new_asm_h[1] = (eosStartAddr != eosAddr) ? rf_MapAccess(raidPtr, eosStartAddr, eosAddr - eosStartAddr, NULL, RF_DONT_REMAP) : NULL; - - /* walk through the PDAs and range-restrict each SU to the region of - * the SU touched on the failed PDA. also compute total data buffer - * space requirements in this step. Ignore the parity for now. */ - - numSect[0] = numSect[1] = 0; - if (new_asm_h[0]) { - new_asm_h[0]->next = dag_h->asmList; - dag_h->asmList = new_asm_h[0]; - for (pda = new_asm_h[0]->stripeMap->physInfo; pda; pda = pda->next) { - rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_NOBUFFER, 0); - numSect[0] += pda->numSector; - } - } - if (new_asm_h[1]) { - new_asm_h[1]->next = dag_h->asmList; - dag_h->asmList = new_asm_h[1]; - for (pda = new_asm_h[1]->stripeMap->physInfo; pda; pda = pda->next) { - rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_NOBUFFER, 0); - numSect[1] += pda->numSector; - } - } - numParitySect = failedPDA->numSector; - - /* allocate buffer space for the data & parity we have to read to - * recover from the failure */ - - if (numSect[0] + numSect[1] + ((rpBufPtr) ? numParitySect : 0)) { /* don't allocate parity - * buf if not needed */ - RF_MallocAndAdd(rdBuf, rf_RaidAddressToByte(raidPtr, numSect[0] + numSect[1] + numParitySect), (char *), allocList); - bufP = rdBuf; - if (rf_degDagDebug) - printf("Newly allocated buffer (%d bytes) is 0x%lx\n", - (int) rf_RaidAddressToByte(raidPtr, numSect[0] + numSect[1] + numParitySect), (unsigned long) bufP); - } - /* now walk through the pdas one last time and assign buffer pointers - * (ugh!). Again, ignore the parity. also, count nodes to find out - * how many bufs need to be xored together */ - (*nXorBufs) = 1; /* in read case, 1 is for parity. In write - * case, 1 is for failed data */ - if (new_asm_h[0]) { - for (pda = new_asm_h[0]->stripeMap->physInfo; pda; pda = pda->next) { - pda->bufPtr = bufP; - bufP += rf_RaidAddressToByte(raidPtr, pda->numSector); - } - *nXorBufs += new_asm_h[0]->stripeMap->numStripeUnitsAccessed; - } - if (new_asm_h[1]) { - for (pda = new_asm_h[1]->stripeMap->physInfo; pda; pda = pda->next) { - pda->bufPtr = bufP; - bufP += rf_RaidAddressToByte(raidPtr, pda->numSector); - } - (*nXorBufs) += new_asm_h[1]->stripeMap->numStripeUnitsAccessed; - } - if (rpBufPtr) - *rpBufPtr = bufP; /* the rest of the buffer is for - * parity */ - - /* the last step is to figure out how many more distinct buffers need - * to get xor'd to produce the missing unit. there's one for each - * user-data read node that overlaps the portion of the failed unit - * being accessed */ - - for (foundit = i = 0, pda = asmap->physInfo; pda; i++, pda = pda->next) { - if (pda == failedPDA) { - i--; - foundit = 1; - continue; - } - if (rf_PDAOverlap(layoutPtr, pda, failedPDA)) { - overlappingPDAs[i] = 1; - (*nXorBufs)++; - } - } - if (!foundit) { - RF_ERRORMSG("GenerateFailedAccessASMs: did not find failedPDA in asm list\n"); - RF_ASSERT(0); - } - if (rf_degDagDebug) { - if (new_asm_h[0]) { - printf("First asm:\n"); - rf_PrintFullAccessStripeMap(new_asm_h[0], 1); - } - if (new_asm_h[1]) { - printf("Second asm:\n"); - rf_PrintFullAccessStripeMap(new_asm_h[1], 1); - } - } -} - - -/* adjusts the offset and number of sectors in the destination pda so that - * it covers at most the region of the SU covered by the source PDA. This - * is exclusively a restriction: the number of sectors indicated by the - * target PDA can only shrink. - * - * For example: s = sectors within SU indicated by source PDA - * d = sectors within SU indicated by dest PDA - * r = results, stored in dest PDA - * - * |--------------- one stripe unit ---------------------| - * | sssssssssssssssssssssssssssssssss | - * | ddddddddddddddddddddddddddddddddddddddddddddd | - * | rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr | - * - * Another example: - * - * |--------------- one stripe unit ---------------------| - * | sssssssssssssssssssssssssssssssss | - * | ddddddddddddddddddddddd | - * | rrrrrrrrrrrrrrrr | - * - */ -void -rf_RangeRestrictPDA( - RF_Raid_t * raidPtr, - RF_PhysDiskAddr_t * src, - RF_PhysDiskAddr_t * dest, - int dobuffer, - int doraidaddr) -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_SectorNum_t soffs = rf_StripeUnitOffset(layoutPtr, src->startSector); - RF_SectorNum_t doffs = rf_StripeUnitOffset(layoutPtr, dest->startSector); - RF_SectorNum_t send = rf_StripeUnitOffset(layoutPtr, src->startSector + src->numSector - 1); /* use -1 to be sure we - * stay within SU */ - RF_SectorNum_t dend = rf_StripeUnitOffset(layoutPtr, dest->startSector + dest->numSector - 1); - RF_SectorNum_t subAddr = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, dest->startSector); /* stripe unit boundary */ - - dest->startSector = subAddr + RF_MAX(soffs, doffs); - dest->numSector = subAddr + RF_MIN(send, dend) + 1 - dest->startSector; - - if (dobuffer) - dest->bufPtr += (soffs > doffs) ? rf_RaidAddressToByte(raidPtr, soffs - doffs) : 0; - if (doraidaddr) { - dest->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, dest->raidAddress) + - rf_StripeUnitOffset(layoutPtr, dest->startSector); - } -} -/* - * Want the highest of these primes to be the largest one - * less than the max expected number of columns (won't hurt - * to be too small or too large, but won't be optimal, either) - * --jimz - */ -#define NLOWPRIMES 8 -static int lowprimes[NLOWPRIMES] = {2, 3, 5, 7, 11, 13, 17, 19}; -/***************************************************************************** - * compute the workload shift factor. (chained declustering) - * - * return nonzero if access should shift to secondary, otherwise, - * access is to primary - *****************************************************************************/ -int -rf_compute_workload_shift( - RF_Raid_t * raidPtr, - RF_PhysDiskAddr_t * pda) -{ - /* - * variables: - * d = column of disk containing primary - * f = column of failed disk - * n = number of disks in array - * sd = "shift distance" (number of columns that d is to the right of f) - * row = row of array the access is in - * v = numerator of redirection ratio - * k = denominator of redirection ratio - */ - RF_RowCol_t d, f, sd, row, n; - int k, v, ret, i; - - row = pda->row; - n = raidPtr->numCol; - - /* assign column of primary copy to d */ - d = pda->col; - - /* assign column of dead disk to f */ - for (f = 0; ((!RF_DEAD_DISK(raidPtr->Disks[row][f].status)) && (f < n)); f++); - - RF_ASSERT(f < n); - RF_ASSERT(f != d); - - sd = (f > d) ? (n + d - f) : (d - f); - RF_ASSERT(sd < n); - - /* - * v of every k accesses should be redirected - * - * v/k := (n-1-sd)/(n-1) - */ - v = (n - 1 - sd); - k = (n - 1); - -#if 1 - /* - * XXX - * Is this worth it? - * - * Now reduce the fraction, by repeatedly factoring - * out primes (just like they teach in elementary school!) - */ - for (i = 0; i < NLOWPRIMES; i++) { - if (lowprimes[i] > v) - break; - while (((v % lowprimes[i]) == 0) && ((k % lowprimes[i]) == 0)) { - v /= lowprimes[i]; - k /= lowprimes[i]; - } - } -#endif - - raidPtr->hist_diskreq[row][d]++; - if (raidPtr->hist_diskreq[row][d] > v) { - ret = 0; /* do not redirect */ - } else { - ret = 1; /* redirect */ - } - -#if 0 - printf("d=%d f=%d sd=%d v=%d k=%d ret=%d h=%d\n", d, f, sd, v, k, ret, - raidPtr->hist_diskreq[row][d]); -#endif - - if (raidPtr->hist_diskreq[row][d] >= k) { - /* reset counter */ - raidPtr->hist_diskreq[row][d] = 0; - } - return (ret); -} -/* - * Disk selection routines - */ - -/* - * Selects the disk with the shortest queue from a mirror pair. - * Both the disk I/Os queued in RAIDframe as well as those at the physical - * disk are counted as members of the "queue" - */ -void -rf_SelectMirrorDiskIdle(RF_DagNode_t * node) -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) node->dagHdr->raidPtr; - RF_RowCol_t rowData, colData, rowMirror, colMirror; - int dataQueueLength, mirrorQueueLength, usemirror; - RF_PhysDiskAddr_t *data_pda = (RF_PhysDiskAddr_t *) node->params[0].p; - RF_PhysDiskAddr_t *mirror_pda = (RF_PhysDiskAddr_t *) node->params[4].p; - RF_PhysDiskAddr_t *tmp_pda; - RF_RaidDisk_t **disks = raidPtr->Disks; - RF_DiskQueue_t **dqs = raidPtr->Queues, *dataQueue, *mirrorQueue; - - /* return the [row col] of the disk with the shortest queue */ - rowData = data_pda->row; - colData = data_pda->col; - rowMirror = mirror_pda->row; - colMirror = mirror_pda->col; - dataQueue = &(dqs[rowData][colData]); - mirrorQueue = &(dqs[rowMirror][colMirror]); - -#ifdef RF_LOCK_QUEUES_TO_READ_LEN - RF_LOCK_QUEUE_MUTEX(dataQueue, "SelectMirrorDiskIdle"); -#endif /* RF_LOCK_QUEUES_TO_READ_LEN */ - dataQueueLength = dataQueue->queueLength + dataQueue->numOutstanding; -#ifdef RF_LOCK_QUEUES_TO_READ_LEN - RF_UNLOCK_QUEUE_MUTEX(dataQueue, "SelectMirrorDiskIdle"); - RF_LOCK_QUEUE_MUTEX(mirrorQueue, "SelectMirrorDiskIdle"); -#endif /* RF_LOCK_QUEUES_TO_READ_LEN */ - mirrorQueueLength = mirrorQueue->queueLength + mirrorQueue->numOutstanding; -#ifdef RF_LOCK_QUEUES_TO_READ_LEN - RF_UNLOCK_QUEUE_MUTEX(mirrorQueue, "SelectMirrorDiskIdle"); -#endif /* RF_LOCK_QUEUES_TO_READ_LEN */ - - usemirror = 0; - if (RF_DEAD_DISK(disks[rowMirror][colMirror].status)) { - usemirror = 0; - } else - if (RF_DEAD_DISK(disks[rowData][colData].status)) { - usemirror = 1; - } else - if (raidPtr->parity_good == RF_RAID_DIRTY) { - /* Trust only the main disk */ - usemirror = 0; - } else - if (dataQueueLength < mirrorQueueLength) { - usemirror = 0; - } else - if (mirrorQueueLength < dataQueueLength) { - usemirror = 1; - } else { - /* queues are equal length. attempt - * cleverness. */ - if (SNUM_DIFF(dataQueue->last_deq_sector, data_pda->startSector) - <= SNUM_DIFF(mirrorQueue->last_deq_sector, mirror_pda->startSector)) { - usemirror = 0; - } else { - usemirror = 1; - } - } - - if (usemirror) { - /* use mirror (parity) disk, swap params 0 & 4 */ - tmp_pda = data_pda; - node->params[0].p = mirror_pda; - node->params[4].p = tmp_pda; - } else { - /* use data disk, leave param 0 unchanged */ - } - /* printf("dataQueueLength %d, mirrorQueueLength - * %d\n",dataQueueLength, mirrorQueueLength); */ -} -/* - * Do simple partitioning. This assumes that - * the data and parity disks are laid out identically. - */ -void -rf_SelectMirrorDiskPartition(RF_DagNode_t * node) -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) node->dagHdr->raidPtr; - RF_RowCol_t rowData, colData, rowMirror, colMirror; - RF_PhysDiskAddr_t *data_pda = (RF_PhysDiskAddr_t *) node->params[0].p; - RF_PhysDiskAddr_t *mirror_pda = (RF_PhysDiskAddr_t *) node->params[4].p; - RF_PhysDiskAddr_t *tmp_pda; - RF_RaidDisk_t **disks = raidPtr->Disks; - RF_DiskQueue_t **dqs = raidPtr->Queues, *dataQueue, *mirrorQueue; - int usemirror; - - /* return the [row col] of the disk with the shortest queue */ - rowData = data_pda->row; - colData = data_pda->col; - rowMirror = mirror_pda->row; - colMirror = mirror_pda->col; - dataQueue = &(dqs[rowData][colData]); - mirrorQueue = &(dqs[rowMirror][colMirror]); - - usemirror = 0; - if (RF_DEAD_DISK(disks[rowMirror][colMirror].status)) { - usemirror = 0; - } else - if (RF_DEAD_DISK(disks[rowData][colData].status)) { - usemirror = 1; - } else - if (raidPtr->parity_good == RF_RAID_DIRTY) { - /* Trust only the main disk */ - usemirror = 0; - } else - if (data_pda->startSector < - (disks[rowData][colData].numBlocks / 2)) { - usemirror = 0; - } else { - usemirror = 1; - } - - if (usemirror) { - /* use mirror (parity) disk, swap params 0 & 4 */ - tmp_pda = data_pda; - node->params[0].p = mirror_pda; - node->params[4].p = tmp_pda; - } else { - /* use data disk, leave param 0 unchanged */ - } -} diff --git a/sys/dev/raidframe/rf_dagutils.h b/sys/dev/raidframe/rf_dagutils.h deleted file mode 100644 index bad2c76..0000000 --- a/sys/dev/raidframe/rf_dagutils.h +++ /dev/null @@ -1,121 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_dagutils.h,v 1.3 1999/02/05 00:06:08 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/************************************************************************* - * - * rf_dagutils.h -- header file for utility routines for manipulating DAGs - * - *************************************************************************/ - - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_general.h> - -#ifndef _RF__RF_DAGUTILS_H_ -#define _RF__RF_DAGUTILS_H_ - -struct RF_RedFuncs_s { - int (*regular) (RF_DagNode_t *); - char *RegularName; - int (*simple) (RF_DagNode_t *); - char *SimpleName; -}; - -extern RF_RedFuncs_t rf_xorFuncs; -extern RF_RedFuncs_t rf_xorRecoveryFuncs; - -void -rf_InitNode(RF_DagNode_t * node, RF_NodeStatus_t initstatus, - int commit, - int (*doFunc) (RF_DagNode_t * node), - int (*undoFunc) (RF_DagNode_t * node), - int (*wakeFunc) (RF_DagNode_t * node, int status), - int nSucc, int nAnte, int nParam, int nResult, - RF_DagHeader_t * hdr, char *name, RF_AllocListElem_t * alist); - - void rf_FreeDAG(RF_DagHeader_t * dag_h); - - RF_PropHeader_t *rf_MakePropListEntry(RF_DagHeader_t * dag_h, int resultNum, - int paramNum, RF_PropHeader_t * next, RF_AllocListElem_t * allocList); - - int rf_ConfigureDAGs(RF_ShutdownList_t ** listp); - - RF_DagHeader_t *rf_AllocDAGHeader(void); - - void rf_FreeDAGHeader(RF_DagHeader_t * dh); - - void *rf_AllocBuffer(RF_Raid_t * raidPtr, RF_DagHeader_t * dag_h, - RF_PhysDiskAddr_t * pda, RF_AllocListElem_t * allocList); - - char *rf_NodeStatusString(RF_DagNode_t * node); - - void rf_PrintNodeInfoString(RF_DagNode_t * node); - - int rf_AssignNodeNums(RF_DagHeader_t * dag_h); - - int rf_RecurAssignNodeNums(RF_DagNode_t * node, int num, int unvisited); - - void rf_ResetDAGHeaderPointers(RF_DagHeader_t * dag_h, RF_DagHeader_t * newptr); - - void rf_RecurResetDAGHeaderPointers(RF_DagNode_t * node, RF_DagHeader_t * newptr); - - void rf_PrintDAGList(RF_DagHeader_t * dag_h); - - int rf_ValidateDAG(RF_DagHeader_t * dag_h); - - void rf_redirect_asm(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap); - - void rf_MapUnaccessedPortionOfStripe(RF_Raid_t * raidPtr, - RF_RaidLayout_t * layoutPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, - RF_AccessStripeMapHeader_t ** new_asm_h, int *nRodNodes, char **sosBuffer, - char **eosBuffer, RF_AllocListElem_t * allocList); - - int rf_PDAOverlap(RF_RaidLayout_t * layoutPtr, RF_PhysDiskAddr_t * src, - RF_PhysDiskAddr_t * dest); - - void rf_GenerateFailedAccessASMs(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_PhysDiskAddr_t * failedPDA, - RF_DagHeader_t * dag_h, RF_AccessStripeMapHeader_t ** new_asm_h, - int *nXorBufs, char **rpBufPtr, char *overlappingPDAs, - RF_AllocListElem_t * allocList); - -/* flags used by RangeRestrictPDA */ -#define RF_RESTRICT_NOBUFFER 0 -#define RF_RESTRICT_DOBUFFER 1 - - void rf_RangeRestrictPDA(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * src, - RF_PhysDiskAddr_t * dest, int dobuffer, int doraidaddr); - - int rf_compute_workload_shift(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * pda); - void rf_SelectMirrorDiskIdle(RF_DagNode_t * node); - void rf_SelectMirrorDiskPartition(RF_DagNode_t * node); - -#endif /* !_RF__RF_DAGUTILS_H_ */ diff --git a/sys/dev/raidframe/rf_debugMem.c b/sys/dev/raidframe/rf_debugMem.c deleted file mode 100644 index a138021..0000000 --- a/sys/dev/raidframe/rf_debugMem.c +++ /dev/null @@ -1,208 +0,0 @@ -/* $NetBSD: rf_debugMem.c,v 1.7 2000/01/07 03:40:59 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Daniel Stodolsky, Mark Holland, Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* debugMem.c: memory usage debugging stuff. - * Malloc, Calloc, and Free are #defined everywhere - * to do_malloc, do_calloc, and do_free. - * - * if RF_UTILITY is nonzero, it means were compiling one of the - * raidframe utility programs, such as rfctrl or smd. In this - * case, we eliminate all references to the threads package - * and to the allocation list stuff. - */ - -#include <dev/raidframe/rf_types.h> - -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_options.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_general.h> - -#if defined(__FreeBSD__) -#include <sys/kernel.h> -MALLOC_DEFINE(M_RAIDFRAME, "rfbuf", "Buffers for RAIDframe operation"); -#endif - -static long tot_mem_in_use = 0; - -/* Hash table of information about memory allocations */ -#define RF_MH_TABLESIZE 1000 - -struct mh_struct { - void *address; - int size; - int line; - char *filen; - char allocated; - struct mh_struct *next; -}; -static struct mh_struct *mh_table[RF_MH_TABLESIZE]; -RF_DECLARE_MUTEX(rf_debug_mem_mutex) - static int mh_table_initialized = 0; - - static void memory_hash_insert(void *addr, int size, int line, char *filen); - static int memory_hash_remove(void *addr, int sz); - -void -rf_record_malloc(p, size, line, filen) - void *p; - int size, line; - char *filen; -{ - RF_ASSERT(size != 0); - - /* RF_LOCK_MUTEX(rf_debug_mem_mutex); */ - memory_hash_insert(p, size, line, filen); - tot_mem_in_use += size; - /* RF_UNLOCK_MUTEX(rf_debug_mem_mutex); */ - if ((long) p == rf_memDebugAddress) { - printf("Allocate: debug address allocated from line %d file %s\n", line, filen); - } -} - -void -rf_unrecord_malloc(p, sz) - void *p; - int sz; -{ - int size; - - /* RF_LOCK_MUTEX(rf_debug_mem_mutex); */ - size = memory_hash_remove(p, sz); - tot_mem_in_use -= size; - /* RF_UNLOCK_MUTEX(rf_debug_mem_mutex); */ - if ((long) p == rf_memDebugAddress) { - printf("Free: Found debug address\n"); /* this is really only a - * flag line for gdb */ - } -} - -void -rf_print_unfreed() -{ - int i, foundone = 0; - struct mh_struct *p; - - for (i = 0; i < RF_MH_TABLESIZE; i++) { - for (p = mh_table[i]; p; p = p->next) - if (p->allocated) { - if (!foundone) - printf("\n\nThere are unfreed memory locations at program shutdown:\n"); - foundone = 1; - printf("Addr 0x%lx Size %d line %d file %s\n", - (long) p->address, p->size, p->line, p->filen); - } - } - if (tot_mem_in_use) { - printf("%ld total bytes in use\n", tot_mem_in_use); - } -} - -int -rf_ConfigureDebugMem(listp) - RF_ShutdownList_t **listp; -{ - int i, rc; - - rc = rf_create_managed_mutex(listp, &rf_debug_mem_mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return (rc); - } - if (rf_memDebug) { - for (i = 0; i < RF_MH_TABLESIZE; i++) - mh_table[i] = NULL; - mh_table_initialized = 1; - } - return (0); -} -#define HASHADDR(_a_) ( (((unsigned long) _a_)>>3) % RF_MH_TABLESIZE ) - -static void -memory_hash_insert(addr, size, line, filen) - void *addr; - int size, line; - char *filen; -{ - unsigned long bucket = HASHADDR(addr); - struct mh_struct *p; - - RF_ASSERT(mh_table_initialized); - - /* search for this address in the hash table */ - for (p = mh_table[bucket]; p && (p->address != addr); p = p->next); - if (!p) { - RF_Malloc(p, sizeof(struct mh_struct), (struct mh_struct *)); - RF_ASSERT(p); - p->next = mh_table[bucket]; - mh_table[bucket] = p; - p->address = addr; - p->allocated = 0; - } - if (p->allocated) { - printf("ERROR: reallocated address 0x%lx from line %d, file %s without intervening free\n", (long) addr, line, filen); - printf(" last allocated from line %d file %s\n", p->line, p->filen); - RF_ASSERT(0); - } - p->size = size; - p->line = line; - p->filen = filen; - p->allocated = 1; -} - -static int -memory_hash_remove(addr, sz) - void *addr; - int sz; -{ - unsigned long bucket = HASHADDR(addr); - struct mh_struct *p; - - RF_ASSERT(mh_table_initialized); - for (p = mh_table[bucket]; p && (p->address != addr); p = p->next); - if (!p) { - printf("ERROR: freeing never-allocated address 0x%lx\n", (long) addr); - RF_PANIC(); - } - if (!p->allocated) { - printf("ERROR: freeing unallocated address 0x%lx. Last allocation line %d file %s\n", (long) addr, p->line, p->filen); - RF_PANIC(); - } - if (sz > 0 && p->size != sz) { /* you can suppress this error by - * using a negative value as the size - * to free */ - printf("ERROR: incorrect size at free for address 0x%lx: is %d should be %d. Alloc at line %d of file %s\n", (unsigned long) addr, sz, p->size, p->line, p->filen); - RF_PANIC(); - } - p->allocated = 0; - return (p->size); -} diff --git a/sys/dev/raidframe/rf_debugMem.h b/sys/dev/raidframe/rf_debugMem.h deleted file mode 100644 index e6d8c60..0000000 --- a/sys/dev/raidframe/rf_debugMem.h +++ /dev/null @@ -1,88 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_debugMem.h,v 1.7 1999/09/05 01:58:11 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Daniel Stodolsky, Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * rf_debugMem.h -- memory leak debugging module - * - * IMPORTANT: if you put the lock/unlock mutex stuff back in here, you - * need to take it out of the routines in debugMem.c - * - */ - -#ifndef _RF__RF_DEBUGMEM_H_ -#define _RF__RF_DEBUGMEM_H_ - -#include <dev/raidframe/rf_alloclist.h> - -#ifdef _KERNEL -#include <sys/types.h> -#include <sys/malloc.h> - -#if defined(__FreeBSD__) -MALLOC_DECLARE(M_RAIDFRAME); -#endif - -#define RF_Malloc(_p_, _size_, _cast_) \ - { \ - _p_ = _cast_ malloc((u_long)_size_, M_RAIDFRAME, M_NOWAIT | M_ZERO); \ - if (_p_ == NULL) panic("out of memory\n"); \ - if (rf_memDebug) rf_record_malloc(_p_, _size_, __LINE__, __FILE__); \ - } - -#define RF_MallocAndAdd(__p_, __size_, __cast_, __alist_) \ - { \ - RF_Malloc(__p_, __size_, __cast_); \ - if (__alist_) rf_AddToAllocList(__alist_, __p_, __size_); \ - } - -#define RF_Calloc(_p_, _nel_, _elsz_, _cast_) \ - { \ - RF_Malloc( _p_, (_nel_) * (_elsz_), _cast_); \ - } - -#define RF_CallocAndAdd(__p,__nel,__elsz,__cast,__alist) \ - { \ - RF_Calloc(__p, __nel, __elsz, __cast); \ - if (__alist) rf_AddToAllocList(__alist, __p, (__nel)*(__elsz)); \ - } - -#define RF_Free(_p_, _sz_) \ - { \ - free((void *)(_p_), M_RAIDFRAME); \ - if (rf_memDebug) rf_unrecord_malloc(_p_, (u_int32_t) (_sz_)); \ - } - -#endif /* _KERNEL */ - -void rf_record_malloc(void *p, int size, int line, char *filen); -void rf_unrecord_malloc(void *p, int sz); -void rf_print_unfreed(void); -int rf_ConfigureDebugMem(RF_ShutdownList_t ** listp); - -#endif /* !_RF__RF_DEBUGMEM_H_ */ diff --git a/sys/dev/raidframe/rf_debugprint.c b/sys/dev/raidframe/rf_debugprint.c deleted file mode 100644 index 02adee7..0000000 --- a/sys/dev/raidframe/rf_debugprint.c +++ /dev/null @@ -1,136 +0,0 @@ -/* $NetBSD: rf_debugprint.c,v 1.3 1999/02/05 00:06:08 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * Code to do debug printfs. Calls to rf_debug_printf cause the corresponding - * information to be printed to a circular buffer rather than the screen. - * The point is to try and minimize the timing variations induced by the - * printfs, and to capture only the printf's immediately preceding a failure. - */ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_debugprint.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_options.h> - -#include <sys/param.h> - -struct RF_Entry_s { - char *cstring; - void *a1, *a2, *a3, *a4, *a5, *a6, *a7, *a8; -}; -/* space for 1k lines */ -#define BUFSHIFT 10 -#define BUFSIZE (1<<BUFSHIFT) -#define BUFMASK (BUFSIZE-1) - -static struct RF_Entry_s rf_debugprint_buf[BUFSIZE]; -static int rf_debugprint_index = 0; -RF_DECLARE_STATIC_MUTEX(rf_debug_print_mutex) - int rf_ConfigureDebugPrint(listp) - RF_ShutdownList_t **listp; -{ - int rc; - - rc = rf_create_managed_mutex(listp, &rf_debug_print_mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return (rc); - } - rf_clear_debug_print_buffer(); - return (0); -} - -void -rf_clear_debug_print_buffer() -{ - int i; - - for (i = 0; i < BUFSIZE; i++) - rf_debugprint_buf[i].cstring = NULL; - rf_debugprint_index = 0; -} - -void -rf_debug_printf(s, a1, a2, a3, a4, a5, a6, a7, a8) - char *s; - void *a1, *a2, *a3, *a4, *a5, *a6, *a7, *a8; -{ - int idx; - - if (rf_debugPrintUseBuffer) { - - RF_LOCK_MUTEX(rf_debug_print_mutex); - idx = rf_debugprint_index; - rf_debugprint_index = (rf_debugprint_index + 1) & BUFMASK; - RF_UNLOCK_MUTEX(rf_debug_print_mutex); - - rf_debugprint_buf[idx].cstring = s; - rf_debugprint_buf[idx].a1 = a1; - rf_debugprint_buf[idx].a2 = a2; - rf_debugprint_buf[idx].a3 = a3; - rf_debugprint_buf[idx].a4 = a4; - rf_debugprint_buf[idx].a5 = a5; - rf_debugprint_buf[idx].a6 = a6; - rf_debugprint_buf[idx].a7 = a7; - rf_debugprint_buf[idx].a8 = a8; - } else { - printf(s, a1, a2, a3, a4, a5, a6, a7, a8); - } -} - -void -rf_print_debug_buffer() -{ - rf_spill_debug_buffer(NULL); -} - -void -rf_spill_debug_buffer(fname) - char *fname; -{ - int i; - - if (!rf_debugPrintUseBuffer) - return; - - RF_LOCK_MUTEX(rf_debug_print_mutex); - - for (i = rf_debugprint_index + 1; i != rf_debugprint_index; i = (i + 1) & BUFMASK) - if (rf_debugprint_buf[i].cstring) - printf(rf_debugprint_buf[i].cstring, rf_debugprint_buf[i].a1, rf_debugprint_buf[i].a2, rf_debugprint_buf[i].a3, - rf_debugprint_buf[i].a4, rf_debugprint_buf[i].a5, rf_debugprint_buf[i].a6, rf_debugprint_buf[i].a7, rf_debugprint_buf[i].a8); - printf(rf_debugprint_buf[i].cstring, rf_debugprint_buf[i].a1, rf_debugprint_buf[i].a2, rf_debugprint_buf[i].a3, - rf_debugprint_buf[i].a4, rf_debugprint_buf[i].a5, rf_debugprint_buf[i].a6, rf_debugprint_buf[i].a7, rf_debugprint_buf[i].a8); - RF_UNLOCK_MUTEX(rf_debug_print_mutex); -} diff --git a/sys/dev/raidframe/rf_debugprint.h b/sys/dev/raidframe/rf_debugprint.h deleted file mode 100644 index 318f620..0000000 --- a/sys/dev/raidframe/rf_debugprint.h +++ /dev/null @@ -1,44 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_debugprint.h,v 1.3 1999/02/05 00:06:08 oster Exp $ */ -/* - * rf_debugprint.h - */ -/* - * Copyright (c) 1996 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_DEBUGPRINT_H_ -#define _RF__RF_DEBUGPRINT_H_ - -int rf_ConfigureDebugPrint(RF_ShutdownList_t ** listp); -void rf_clear_debug_print_buffer(void); -void -rf_debug_printf(char *s, void *a1, void *a2, void *a3, void *a4, - void *a5, void *a6, void *a7, void *a8); -void rf_print_debug_buffer(void); -void rf_spill_debug_buffer(char *fname); - -#endif /* !_RF__RF_DEBUGPRINT_H_ */ diff --git a/sys/dev/raidframe/rf_decluster.c b/sys/dev/raidframe/rf_decluster.c deleted file mode 100644 index 646a5ad..0000000 --- a/sys/dev/raidframe/rf_decluster.c +++ /dev/null @@ -1,747 +0,0 @@ -/* $NetBSD: rf_decluster.c,v 1.6 2001/01/26 04:40:03 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/*---------------------------------------------------------------------- - * - * rf_decluster.c -- code related to the declustered layout - * - * Created 10-21-92 (MCH) - * - * Nov 93: adding support for distributed sparing. This code is a little - * complex: the basic layout used is as follows: - * let F = (v-1)/GCD(r,v-1). The spare space for each set of - * F consecutive fulltables is grouped together and placed after - * that set of tables. - * +------------------------------+ - * | F fulltables | - * | Spare Space | - * | F fulltables | - * | Spare Space | - * | ... | - * +------------------------------+ - * - *--------------------------------------------------------------------*/ - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_raidframe.h> -#include <dev/raidframe/rf_configure.h> -#include <dev/raidframe/rf_decluster.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_utils.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_shutdown.h> - - -extern int rf_copyback_in_progress; /* debug only */ - -/* found in rf_kintf.c */ -extern int rf_GetSpareTableFromDaemon(RF_SparetWait_t * req); - -#if (RF_INCLUDE_PARITY_DECLUSTERING > 0) || (RF_INCLUDE_PARITY_DECLUSTERING_PQ > 0) - -/* configuration code */ - -int -rf_ConfigureDeclustered( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - int b, v, k, r, lambda; /* block design params */ - int i, j; - RF_RowCol_t *first_avail_slot; - RF_StripeCount_t complete_FT_count, numCompleteFullTablesPerDisk; - RF_DeclusteredConfigInfo_t *info; - RF_StripeCount_t PUsPerDisk, spareRegionDepthInPUs, numCompleteSpareRegionsPerDisk, - extraPUsPerDisk; - RF_StripeCount_t totSparePUsPerDisk; - RF_SectorNum_t diskOffsetOfLastFullTableInSUs; - RF_SectorCount_t SpareSpaceInSUs; - char *cfgBuf = (char *) (cfgPtr->layoutSpecific); - RF_StripeNum_t l, SUID; - - SUID = l = 0; - numCompleteSpareRegionsPerDisk = 0; - - /* 1. create layout specific structure */ - RF_MallocAndAdd(info, sizeof(RF_DeclusteredConfigInfo_t), (RF_DeclusteredConfigInfo_t *), raidPtr->cleanupList); - if (info == NULL) - return (ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - info->SpareTable = NULL; - - /* 2. extract parameters from the config structure */ - if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) { - (void) bcopy(cfgBuf, info->sparemap_fname, RF_SPAREMAP_NAME_LEN); - } - cfgBuf += RF_SPAREMAP_NAME_LEN; - - b = *((int *) cfgBuf); - cfgBuf += sizeof(int); - v = *((int *) cfgBuf); - cfgBuf += sizeof(int); - k = *((int *) cfgBuf); - cfgBuf += sizeof(int); - r = *((int *) cfgBuf); - cfgBuf += sizeof(int); - lambda = *((int *) cfgBuf); - cfgBuf += sizeof(int); - raidPtr->noRotate = *((int *) cfgBuf); - cfgBuf += sizeof(int); - - /* the sparemaps are generated assuming that parity is rotated, so we - * issue a warning if both distributed sparing and no-rotate are on at - * the same time */ - if ((layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) && raidPtr->noRotate) { - RF_ERRORMSG("Warning: distributed sparing specified without parity rotation.\n"); - } - if (raidPtr->numCol != v) { - RF_ERRORMSG2("RAID: config error: table element count (%d) not equal to no. of cols (%d)\n", v, raidPtr->numCol); - return (EINVAL); - } - /* 3. set up the values used in the mapping code */ - info->BlocksPerTable = b; - info->Lambda = lambda; - info->NumParityReps = info->groupSize = k; - info->SUsPerTable = b * (k - 1) * layoutPtr->SUsPerPU; /* b blks, k-1 SUs each */ - info->SUsPerFullTable = k * info->SUsPerTable; /* rot k times */ - info->PUsPerBlock = k - 1; - info->SUsPerBlock = info->PUsPerBlock * layoutPtr->SUsPerPU; - info->TableDepthInPUs = (b * k) / v; - info->FullTableDepthInPUs = info->TableDepthInPUs * k; /* k repetitions */ - - /* used only in distributed sparing case */ - info->FullTablesPerSpareRegion = (v - 1) / rf_gcd(r, v - 1); /* (v-1)/gcd fulltables */ - info->TablesPerSpareRegion = k * info->FullTablesPerSpareRegion; - info->SpareSpaceDepthPerRegionInSUs = (r * info->TablesPerSpareRegion / (v - 1)) * layoutPtr->SUsPerPU; - - /* check to make sure the block design is sufficiently small */ - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - if (info->FullTableDepthInPUs * layoutPtr->SUsPerPU + info->SpareSpaceDepthPerRegionInSUs > layoutPtr->stripeUnitsPerDisk) { - RF_ERRORMSG3("RAID: config error: Full Table depth (%d) + Spare Space (%d) larger than disk size (%d) (BD too big)\n", - (int) info->FullTableDepthInPUs, - (int) info->SpareSpaceDepthPerRegionInSUs, - (int) layoutPtr->stripeUnitsPerDisk); - return (EINVAL); - } - } else { - if (info->TableDepthInPUs * layoutPtr->SUsPerPU > layoutPtr->stripeUnitsPerDisk) { - RF_ERRORMSG2("RAID: config error: Table depth (%d) larger than disk size (%d) (BD too big)\n", - (int) (info->TableDepthInPUs * layoutPtr->SUsPerPU), \ - (int) layoutPtr->stripeUnitsPerDisk); - return (EINVAL); - } - } - - - /* compute the size of each disk, and the number of tables in the last - * fulltable (which need not be complete) */ - if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { - - PUsPerDisk = layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU; - spareRegionDepthInPUs = (info->TablesPerSpareRegion * info->TableDepthInPUs + - (info->TablesPerSpareRegion * info->TableDepthInPUs) / (v - 1)); - info->SpareRegionDepthInSUs = spareRegionDepthInPUs * layoutPtr->SUsPerPU; - - numCompleteSpareRegionsPerDisk = PUsPerDisk / spareRegionDepthInPUs; - info->NumCompleteSRs = numCompleteSpareRegionsPerDisk; - extraPUsPerDisk = PUsPerDisk % spareRegionDepthInPUs; - - /* assume conservatively that we need the full amount of spare - * space in one region in order to provide spares for the - * partial spare region at the end of the array. We set "i" - * to the number of tables in the partial spare region. This - * may actually include some fulltables. */ - extraPUsPerDisk -= (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU); - if (extraPUsPerDisk <= 0) - i = 0; - else - i = extraPUsPerDisk / info->TableDepthInPUs; - - complete_FT_count = raidPtr->numRow * (numCompleteSpareRegionsPerDisk * (info->TablesPerSpareRegion / k) + i / k); - info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable; - info->ExtraTablesPerDisk = i % k; - - /* note that in the last spare region, the spare space is - * complete even though data/parity space is not */ - totSparePUsPerDisk = (numCompleteSpareRegionsPerDisk + 1) * (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU); - info->TotSparePUsPerDisk = totSparePUsPerDisk; - - layoutPtr->stripeUnitsPerDisk = - ((complete_FT_count / raidPtr->numRow) * info->FullTableDepthInPUs + /* data & parity space */ - info->ExtraTablesPerDisk * info->TableDepthInPUs + - totSparePUsPerDisk /* spare space */ - ) * layoutPtr->SUsPerPU; - layoutPtr->dataStripeUnitsPerDisk = - (complete_FT_count * info->FullTableDepthInPUs + info->ExtraTablesPerDisk * info->TableDepthInPUs) - * layoutPtr->SUsPerPU * (k - 1) / k; - - } else { - /* non-dist spare case: force each disk to contain an - * integral number of tables */ - layoutPtr->stripeUnitsPerDisk /= (info->TableDepthInPUs * layoutPtr->SUsPerPU); - layoutPtr->stripeUnitsPerDisk *= (info->TableDepthInPUs * layoutPtr->SUsPerPU); - - /* compute the number of tables in the last fulltable, which - * need not be complete */ - complete_FT_count = - ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) / info->FullTableDepthInPUs) * raidPtr->numRow; - - info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable; - info->ExtraTablesPerDisk = - ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) / info->TableDepthInPUs) % k; - } - - raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; - - /* find the disk offset of the stripe unit where the last fulltable - * starts */ - numCompleteFullTablesPerDisk = complete_FT_count / raidPtr->numRow; - diskOffsetOfLastFullTableInSUs = numCompleteFullTablesPerDisk * info->FullTableDepthInPUs * layoutPtr->SUsPerPU; - if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { - SpareSpaceInSUs = numCompleteSpareRegionsPerDisk * info->SpareSpaceDepthPerRegionInSUs; - diskOffsetOfLastFullTableInSUs += SpareSpaceInSUs; - info->DiskOffsetOfLastSpareSpaceChunkInSUs = - diskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU; - } - info->DiskOffsetOfLastFullTableInSUs = diskOffsetOfLastFullTableInSUs; - info->numCompleteFullTablesPerDisk = numCompleteFullTablesPerDisk; - - /* 4. create and initialize the lookup tables */ - info->LayoutTable = rf_make_2d_array(b, k, raidPtr->cleanupList); - if (info->LayoutTable == NULL) - return (ENOMEM); - info->OffsetTable = rf_make_2d_array(b, k, raidPtr->cleanupList); - if (info->OffsetTable == NULL) - return (ENOMEM); - info->BlockTable = rf_make_2d_array(info->TableDepthInPUs * layoutPtr->SUsPerPU, raidPtr->numCol, raidPtr->cleanupList); - if (info->BlockTable == NULL) - return (ENOMEM); - - first_avail_slot = rf_make_1d_array(v, NULL); - if (first_avail_slot == NULL) - return (ENOMEM); - - for (i = 0; i < b; i++) - for (j = 0; j < k; j++) - info->LayoutTable[i][j] = *cfgBuf++; - - /* initialize offset table */ - for (i = 0; i < b; i++) - for (j = 0; j < k; j++) { - info->OffsetTable[i][j] = first_avail_slot[info->LayoutTable[i][j]]; - first_avail_slot[info->LayoutTable[i][j]]++; - } - - /* initialize block table */ - for (SUID = l = 0; l < layoutPtr->SUsPerPU; l++) { - for (i = 0; i < b; i++) { - for (j = 0; j < k; j++) { - info->BlockTable[(info->OffsetTable[i][j] * layoutPtr->SUsPerPU) + l] - [info->LayoutTable[i][j]] = SUID; - } - SUID++; - } - } - - rf_free_1d_array(first_avail_slot, v); - - /* 5. set up the remaining redundant-but-useful parameters */ - - raidPtr->totalSectors = (k * complete_FT_count + raidPtr->numRow * info->ExtraTablesPerDisk) * - info->SUsPerTable * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numStripe = (raidPtr->totalSectors / layoutPtr->sectorsPerStripeUnit) / (k - 1); - - /* strange evaluation order below to try and minimize overflow - * problems */ - - layoutPtr->dataSectorsPerStripe = (k - 1) * layoutPtr->sectorsPerStripeUnit; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = k - 1; - layoutPtr->numParityCol = 1; - - return (0); -} -/* declustering with distributed sparing */ -static void rf_ShutdownDeclusteredDS(RF_ThreadArg_t); -static void -rf_ShutdownDeclusteredDS(arg) - RF_ThreadArg_t arg; -{ - RF_DeclusteredConfigInfo_t *info; - RF_Raid_t *raidPtr; - - raidPtr = (RF_Raid_t *) arg; - info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - if (info->SpareTable) - rf_FreeSpareTable(raidPtr); -} - -int -rf_ConfigureDeclusteredDS( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - int rc; - - rc = rf_ConfigureDeclustered(listp, raidPtr, cfgPtr); - if (rc) - return (rc); - rc = rf_ShutdownCreate(listp, rf_ShutdownDeclusteredDS, raidPtr); - if (rc) { - RF_ERRORMSG1("Got %d adding shutdown event for DeclusteredDS\n", rc); - rf_ShutdownDeclusteredDS(raidPtr); - return (rc); - } - return (0); -} - -void -rf_MapSectorDeclustered(raidPtr, raidSector, row, col, diskSector, remap) - RF_Raid_t *raidPtr; - RF_RaidAddr_t raidSector; - RF_RowCol_t *row; - RF_RowCol_t *col; - RF_SectorNum_t *diskSector; - int remap; -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit; - RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset; - RF_StripeNum_t BlockID, BlockOffset, RepIndex; - RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; - RF_StripeNum_t base_suid = 0, outSU, SpareRegion = 0, SpareSpace = 0; - - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); - - FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array - * (across rows) */ - if (raidPtr->numRow == 1) - *row = 0; /* avoid a mod and a div in the common case */ - else { - *row = FullTableID % raidPtr->numRow; - FullTableID /= raidPtr->numRow; /* convert to fulltable ID on - * this disk */ - } - if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { - SpareRegion = FullTableID / info->FullTablesPerSpareRegion; - SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs; - } - FullTableOffset = SUID % sus_per_fulltable; - TableID = FullTableOffset / info->SUsPerTable; - TableOffset = FullTableOffset - TableID * info->SUsPerTable; - BlockID = TableOffset / info->PUsPerBlock; - BlockOffset = TableOffset - BlockID * info->PUsPerBlock; - BlockID %= info->BlocksPerTable; - RepIndex = info->PUsPerBlock - TableID; - if (!raidPtr->noRotate) - BlockOffset += ((BlockOffset >= RepIndex) ? 1 : 0); - *col = info->LayoutTable[BlockID][BlockOffset]; - - /* remap to distributed spare space if indicated */ - if (remap) { - RF_ASSERT(raidPtr->Disks[*row][*col].status == rf_ds_reconstructing || raidPtr->Disks[*row][*col].status == rf_ds_dist_spared || - (rf_copyback_in_progress && raidPtr->Disks[*row][*col].status == rf_ds_optimal)); - rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU); - } else { - - outSU = base_suid; - outSU += FullTableID * fulltable_depth; /* offs to strt of FT */ - outSU += SpareSpace; /* skip rsvd spare space */ - outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; /* offs to strt of tble */ - outSU += info->OffsetTable[BlockID][BlockOffset] * layoutPtr->SUsPerPU; /* offs to the PU */ - } - outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); /* offs to the SU within - * a PU */ - - /* convert SUs to sectors, and, if not aligned to SU boundary, add in - * offset to sector. */ - *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit); - - RF_ASSERT(*col != -1); -} - - -/* prototyping this inexplicably causes the compile of the layout table (rf_layout.c) to fail */ -void -rf_MapParityDeclustered( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit; - RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset; - RF_StripeNum_t BlockID, BlockOffset, RepIndex; - RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; - RF_StripeNum_t base_suid = 0, outSU, SpareRegion = 0, SpareSpace = 0; - - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); - - /* compute row & (possibly) spare space exactly as before */ - FullTableID = SUID / sus_per_fulltable; - if (raidPtr->numRow == 1) - *row = 0; /* avoid a mod and a div in the common case */ - else { - *row = FullTableID % raidPtr->numRow; - FullTableID /= raidPtr->numRow; /* convert to fulltable ID on - * this disk */ - } - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - SpareRegion = FullTableID / info->FullTablesPerSpareRegion; - SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs; - } - /* compute BlockID and RepIndex exactly as before */ - FullTableOffset = SUID % sus_per_fulltable; - TableID = FullTableOffset / info->SUsPerTable; - TableOffset = FullTableOffset - TableID * info->SUsPerTable; - /* TableOffset = FullTableOffset % info->SUsPerTable; */ - /* BlockID = (TableOffset / info->PUsPerBlock) % - * info->BlocksPerTable; */ - BlockID = TableOffset / info->PUsPerBlock; - /* BlockOffset = TableOffset % info->PUsPerBlock; */ - BlockOffset = TableOffset - BlockID * info->PUsPerBlock; - BlockID %= info->BlocksPerTable; - - /* the parity block is in the position indicated by RepIndex */ - RepIndex = (raidPtr->noRotate) ? info->PUsPerBlock : info->PUsPerBlock - TableID; - *col = info->LayoutTable[BlockID][RepIndex]; - - if (remap) { - RF_ASSERT(raidPtr->Disks[*row][*col].status == rf_ds_reconstructing || raidPtr->Disks[*row][*col].status == rf_ds_dist_spared || - (rf_copyback_in_progress && raidPtr->Disks[*row][*col].status == rf_ds_optimal)); - rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU); - } else { - - /* compute sector as before, except use RepIndex instead of - * BlockOffset */ - outSU = base_suid; - outSU += FullTableID * fulltable_depth; - outSU += SpareSpace; /* skip rsvd spare space */ - outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; - outSU += info->OffsetTable[BlockID][RepIndex] * layoutPtr->SUsPerPU; - } - - outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); - *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit); - - RF_ASSERT(*col != -1); -} -/* returns an array of ints identifying the disks that comprise the stripe containing the indicated address. - * the caller must _never_ attempt to modify this array. - */ -void -rf_IdentifyStripeDeclustered( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; - RF_StripeNum_t base_suid = 0; - RF_StripeNum_t SUID = rf_RaidAddressToStripeUnitID(layoutPtr, addr); - RF_StripeNum_t stripeID, FullTableID; - int tableOffset; - - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); - FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array - * (across rows) */ - *outRow = FullTableID % raidPtr->numRow; - stripeID = rf_StripeUnitIDToStripeID(layoutPtr, SUID); /* find stripe offset - * into array */ - tableOffset = (stripeID % info->BlocksPerTable); /* find offset into - * block design table */ - *diskids = info->LayoutTable[tableOffset]; -} -/* This returns the default head-separation limit, which is measured - * in "required units for reconstruction". Each time a disk fetches - * a unit, it bumps a counter. The head-sep code prohibits any disk - * from getting more than headSepLimit counter values ahead of any - * other. - * - * We assume here that the number of floating recon buffers is already - * set. There are r stripes to be reconstructed in each table, and so - * if we have a total of B buffers, we can have at most B/r tables - * under recon at any one time. In each table, lambda units are required - * from each disk, so given B buffers, the head sep limit has to be - * (lambda*B)/r units. We subtract one to avoid weird boundary cases. - * - * for example, suppose were given 50 buffers, r=19, and lambda=4 as in - * the 20.5 design. There are 19 stripes/table to be reconstructed, so - * we can have 50/19 tables concurrently under reconstruction, which means - * we can allow the fastest disk to get 50/19 tables ahead of the slower - * disk. There are lambda "required units" for each disk, so the fastest - * disk can get 4*50/19 = 10 counter values ahead of the slowest. - * - * If numBufsToAccumulate is not 1, we need to limit the head sep further - * because multiple bufs will be required for each stripe under recon. - */ -RF_HeadSepLimit_t -rf_GetDefaultHeadSepLimitDeclustered( - RF_Raid_t * raidPtr) -{ - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - - return (info->Lambda * raidPtr->numFloatingReconBufs / info->TableDepthInPUs / rf_numBufsToAccumulate); -} -/* returns the default number of recon buffers to use. The value - * is somewhat arbitrary...it's intended to be large enough to allow - * for a reasonably large head-sep limit, but small enough that you - * don't use up all your system memory with buffers. - */ -int -rf_GetDefaultNumFloatingReconBuffersDeclustered(RF_Raid_t * raidPtr) -{ - return (100 * rf_numBufsToAccumulate); -} -/* sectors in the last fulltable of the array need to be handled - * specially since this fulltable can be incomplete. this function - * changes the values of certain params to handle this. - * - * the idea here is that MapSector et. al. figure out which disk the - * addressed unit lives on by computing the modulos of the unit number - * with the number of units per fulltable, table, etc. In the last - * fulltable, there are fewer units per fulltable, so we need to adjust - * the number of user data units per fulltable to reflect this. - * - * so, we (1) convert the fulltable size and depth parameters to - * the size of the partial fulltable at the end, (2) compute the - * disk sector offset where this fulltable starts, and (3) convert - * the users stripe unit number from an offset into the array to - * an offset into the last fulltable. - */ -void -rf_decluster_adjust_params( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t * SUID, - RF_StripeCount_t * sus_per_fulltable, - RF_StripeCount_t * fulltable_depth, - RF_StripeNum_t * base_suid) -{ - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - - if (*SUID >= info->FullTableLimitSUID) { - /* new full table size is size of last full table on disk */ - *sus_per_fulltable = info->ExtraTablesPerDisk * info->SUsPerTable; - - /* new full table depth is corresponding depth */ - *fulltable_depth = info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU; - - /* set up the new base offset */ - *base_suid = info->DiskOffsetOfLastFullTableInSUs; - - /* convert users array address to an offset into the last - * fulltable */ - *SUID -= info->FullTableLimitSUID; - } -} -/* - * map a stripe ID to a parity stripe ID. - * See comment above RaidAddressToParityStripeID in layout.c. - */ -void -rf_MapSIDToPSIDDeclustered( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru) -{ - RF_DeclusteredConfigInfo_t *info; - - info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - - *psID = (stripeID / (layoutPtr->SUsPerPU * info->BlocksPerTable)) - * info->BlocksPerTable + (stripeID % info->BlocksPerTable); - *which_ru = (stripeID % (info->BlocksPerTable * layoutPtr->SUsPerPU)) - / info->BlocksPerTable; - RF_ASSERT((*which_ru) < layoutPtr->SUsPerPU / layoutPtr->SUsPerRU); -} -/* - * Called from MapSector and MapParity to retarget an access at the spare unit. - * Modifies the "col" and "outSU" parameters only. - */ -void -rf_remap_to_spare_space( - RF_RaidLayout_t * layoutPtr, - RF_DeclusteredConfigInfo_t * info, - RF_RowCol_t row, - RF_StripeNum_t FullTableID, - RF_StripeNum_t TableID, - RF_SectorNum_t BlockID, - RF_StripeNum_t base_suid, - RF_StripeNum_t SpareRegion, - RF_RowCol_t * outCol, - RF_StripeNum_t * outSU) -{ - RF_StripeNum_t ftID, spareTableStartSU, TableInSpareRegion, lastSROffset, - which_ft; - - /* - * note that FullTableID and hence SpareRegion may have gotten - * tweaked by rf_decluster_adjust_params. We detect this by - * noticing that base_suid is not 0. - */ - if (base_suid == 0) { - ftID = FullTableID; - } else { - /* - * There may be > 1.0 full tables in the last (i.e. partial) - * spare region. find out which of these we're in. - */ - lastSROffset = info->NumCompleteSRs * info->SpareRegionDepthInSUs; - which_ft = (info->DiskOffsetOfLastFullTableInSUs - lastSROffset) / (info->FullTableDepthInPUs * layoutPtr->SUsPerPU); - - /* compute the actual full table ID */ - ftID = info->DiskOffsetOfLastFullTableInSUs / (info->FullTableDepthInPUs * layoutPtr->SUsPerPU) + which_ft; - SpareRegion = info->NumCompleteSRs; - } - TableInSpareRegion = (ftID * info->NumParityReps + TableID) % info->TablesPerSpareRegion; - - *outCol = info->SpareTable[TableInSpareRegion][BlockID].spareDisk; - RF_ASSERT(*outCol != -1); - - spareTableStartSU = (SpareRegion == info->NumCompleteSRs) ? - info->DiskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU : - (SpareRegion + 1) * info->SpareRegionDepthInSUs - info->SpareSpaceDepthPerRegionInSUs; - *outSU = spareTableStartSU + info->SpareTable[TableInSpareRegion][BlockID].spareBlockOffsetInSUs; - if (*outSU >= layoutPtr->stripeUnitsPerDisk) { - printf("rf_remap_to_spare_space: invalid remapped disk SU offset %ld\n", (long) *outSU); - } -} - -#endif /* (RF_INCLUDE_PARITY_DECLUSTERING > 0) || (RF_INCLUDE_PARITY_DECLUSTERING_PQ > 0) */ - - -int -rf_InstallSpareTable( - RF_Raid_t * raidPtr, - RF_RowCol_t frow, - RF_RowCol_t fcol) -{ - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - RF_SparetWait_t *req; - int retcode; - - RF_Malloc(req, sizeof(*req), (RF_SparetWait_t *)); - req->C = raidPtr->numCol; - req->G = raidPtr->Layout.numDataCol + raidPtr->Layout.numParityCol; - req->fcol = fcol; - req->SUsPerPU = raidPtr->Layout.SUsPerPU; - req->TablesPerSpareRegion = info->TablesPerSpareRegion; - req->BlocksPerTable = info->BlocksPerTable; - req->TableDepthInPUs = info->TableDepthInPUs; - req->SpareSpaceDepthPerRegionInSUs = info->SpareSpaceDepthPerRegionInSUs; - - retcode = rf_GetSpareTableFromDaemon(req); - RF_ASSERT(!retcode); /* XXX -- fix this to recover gracefully -- - * XXX */ - return (retcode); -} -/* - * Invoked via ioctl to install a spare table in the kernel. - */ -int -rf_SetSpareTable(raidPtr, data) - RF_Raid_t *raidPtr; - void *data; -{ - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - RF_SpareTableEntry_t **ptrs; - int i, retcode; - - /* what we need to copyin is a 2-d array, so first copyin the user - * pointers to the rows in the table */ - RF_Malloc(ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **)); - retcode = copyin((caddr_t) data, (caddr_t) ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *)); - - if (retcode) - return (retcode); - - /* now allocate kernel space for the row pointers */ - RF_Malloc(info->SpareTable, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **)); - - /* now allocate kernel space for each row in the table, and copy it in - * from user space */ - for (i = 0; i < info->TablesPerSpareRegion; i++) { - RF_Malloc(info->SpareTable[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t), (RF_SpareTableEntry_t *)); - retcode = copyin(ptrs[i], info->SpareTable[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t)); - if (retcode) { - info->SpareTable = NULL; /* blow off the memory - * we've allocated */ - return (retcode); - } - } - - /* free up the temporary array we used */ - RF_Free(ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *)); - - return (0); -} - -RF_ReconUnitCount_t -rf_GetNumSpareRUsDeclustered(raidPtr) - RF_Raid_t *raidPtr; -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - - return (((RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo)->TotSparePUsPerDisk); -} - -void -rf_FreeSpareTable(raidPtr) - RF_Raid_t *raidPtr; -{ - long i; - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - RF_SpareTableEntry_t **table = info->SpareTable; - - for (i = 0; i < info->TablesPerSpareRegion; i++) { - RF_Free(table[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t)); - } - RF_Free(table, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *)); - info->SpareTable = (RF_SpareTableEntry_t **) NULL; -} diff --git a/sys/dev/raidframe/rf_decluster.h b/sys/dev/raidframe/rf_decluster.h deleted file mode 100644 index a630298..0000000 --- a/sys/dev/raidframe/rf_decluster.h +++ /dev/null @@ -1,141 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_decluster.h,v 1.3 1999/02/05 00:06:09 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/*---------------------------------------------------------------------- - * - * decluster.h -- header file for declustered layout code - * - * Adapted from raidSim version July 1994 - * Created 10-21-92 (MCH) - * - *--------------------------------------------------------------------*/ - -#ifndef _RF__RF_DECLUSTER_H_ -#define _RF__RF_DECLUSTER_H_ - -#include <dev/raidframe/rf_types.h> - -/* - * These structures define the tables used to locate the spare unit - * associated with a particular data or parity unit, and to perform - * the associated inverse mapping. - */ -struct RF_SpareTableEntry_s { - u_int spareDisk; /* disk to which this block is spared */ - u_int spareBlockOffsetInSUs; /* offset into spare table for that - * disk */ -}; -#define RF_SPAREMAP_NAME_LEN 128 - -/* this is the layout-specific info structure for the declustered layout. - */ -struct RF_DeclusteredConfigInfo_s { - RF_StripeCount_t groupSize; /* no. of stripe units per parity - * stripe */ - RF_RowCol_t **LayoutTable; /* the block design table */ - RF_RowCol_t **OffsetTable; /* the sector offset table */ - RF_RowCol_t **BlockTable; /* the block membership table */ - RF_StripeCount_t SUsPerFullTable; /* stripe units per full table */ - RF_StripeCount_t SUsPerTable; /* stripe units per table */ - RF_StripeCount_t PUsPerBlock; /* parity units per block */ - RF_StripeCount_t SUsPerBlock; /* stripe units per block */ - RF_StripeCount_t BlocksPerTable; /* block design tuples per - * table */ - RF_StripeCount_t NumParityReps; /* tables per full table */ - RF_StripeCount_t TableDepthInPUs; /* PUs on one disk in 1 table */ - RF_StripeCount_t FullTableDepthInPUs; /* PUs on one disk in 1 - * fulltable */ - RF_StripeCount_t FullTableLimitSUID; /* SU where partial fulltables - * start */ - RF_StripeCount_t ExtraTablesPerDisk; /* # of tables in last - * fulltable */ - RF_SectorNum_t DiskOffsetOfLastFullTableInSUs; /* disk offs of partial - * ft, if any */ - RF_StripeCount_t numCompleteFullTablesPerDisk; /* ft identifier of - * partial ft, if any */ - u_int Lambda; /* the pair count in the block design */ - - /* these are used only in the distributed-sparing case */ - RF_StripeCount_t FullTablesPerSpareRegion; /* # of ft's comprising - * 1 spare region */ - RF_StripeCount_t TablesPerSpareRegion; /* # of tables */ - RF_SectorCount_t SpareSpaceDepthPerRegionInSUs; /* spare - * space/disk/region */ - RF_SectorCount_t SpareRegionDepthInSUs; /* # of units/disk/region */ - RF_SectorNum_t DiskOffsetOfLastSpareSpaceChunkInSUs; /* locates sp space - * after partial ft */ - RF_StripeCount_t TotSparePUsPerDisk; /* total number of spare PUs - * per disk */ - RF_StripeCount_t NumCompleteSRs; - RF_SpareTableEntry_t **SpareTable; /* remap table for spare space */ - char sparemap_fname[RF_SPAREMAP_NAME_LEN]; /* where to find - * sparemap. not used in - * kernel */ -}; - -int -rf_ConfigureDeclustered(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int -rf_ConfigureDeclusteredDS(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); - -void -rf_MapSectorDeclustered(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapParityDeclustered(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_IdentifyStripeDeclustered(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -void -rf_MapSIDToPSIDDeclustered(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru); -int rf_InstallSpareTable(RF_Raid_t * raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol); -void rf_FreeSpareTable(RF_Raid_t * raidPtr); - -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitDeclustered(RF_Raid_t * raidPtr); -int rf_GetDefaultNumFloatingReconBuffersDeclustered(RF_Raid_t * raidPtr); - -void -rf_decluster_adjust_params(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t * SUID, RF_StripeCount_t * sus_per_fulltable, - RF_StripeCount_t * fulltable_depth, RF_StripeNum_t * base_suid); -void -rf_remap_to_spare_space( - RF_RaidLayout_t * layoutPtr, - RF_DeclusteredConfigInfo_t * info, RF_RowCol_t row, RF_StripeNum_t FullTableID, - RF_StripeNum_t TableID, RF_SectorNum_t BlockID, RF_StripeNum_t base_suid, - RF_StripeNum_t SpareRegion, RF_RowCol_t * outCol, RF_StripeNum_t * outSU); -int rf_SetSpareTable(RF_Raid_t * raidPtr, void *data); -RF_ReconUnitCount_t rf_GetNumSpareRUsDeclustered(RF_Raid_t * raidPtr); - -#endif /* !_RF__RF_DECLUSTER_H_ */ diff --git a/sys/dev/raidframe/rf_declusterPQ.c b/sys/dev/raidframe/rf_declusterPQ.c deleted file mode 100644 index dc539a3..0000000 --- a/sys/dev/raidframe/rf_declusterPQ.c +++ /dev/null @@ -1,493 +0,0 @@ -/* $NetBSD: rf_declusterPQ.c,v 1.5 2001/01/26 14:06:17 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Authors: Daniel Stodolsky, Mark Holland, Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/*-------------------------------------------------- - * rf_declusterPQ.c - * - * mapping code for declustered P & Q or declustered EvenOdd - * much code borrowed from rf_decluster.c - * - *--------------------------------------------------*/ - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_configure.h> -#include <dev/raidframe/rf_decluster.h> -#include <dev/raidframe/rf_declusterPQ.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_utils.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_general.h> - -#if (RF_INCLUDE_PARITY_DECLUSTERING_PQ > 0) || (RF_INCLUDE_EVENODD > 0) -/* configuration code */ - -int -rf_ConfigureDeclusteredPQ( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - int b, v, k, r, lambda; /* block design params */ - int i, j, l; - int *first_avail_slot; - int complete_FT_count, SUID; - RF_DeclusteredConfigInfo_t *info; - int numCompleteFullTablesPerDisk; - int PUsPerDisk, spareRegionDepthInPUs, numCompleteSpareRegionsPerDisk = 0, - extraPUsPerDisk; - int totSparePUsPerDisk; - int diskOffsetOfLastFullTableInSUs, SpareSpaceInSUs; - char *cfgBuf = (char *) (cfgPtr->layoutSpecific); - - cfgBuf += RF_SPAREMAP_NAME_LEN; - - b = *((int *) cfgBuf); - cfgBuf += sizeof(int); - v = *((int *) cfgBuf); - cfgBuf += sizeof(int); - k = *((int *) cfgBuf); - cfgBuf += sizeof(int); - r = *((int *) cfgBuf); - cfgBuf += sizeof(int); - lambda = *((int *) cfgBuf); - cfgBuf += sizeof(int); - raidPtr->noRotate = *((int *) cfgBuf); - cfgBuf += sizeof(int); - - if (k <= 2) { - printf("RAIDFRAME: k=%d, minimum value 2\n", k); - return (EINVAL); - } - /* 1. create layout specific structure */ - RF_MallocAndAdd(info, sizeof(RF_DeclusteredConfigInfo_t), (RF_DeclusteredConfigInfo_t *), raidPtr->cleanupList); - if (info == NULL) - return (ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - - /* the sparemaps are generated assuming that parity is rotated, so we - * issue a warning if both distributed sparing and no-rotate are on at - * the same time */ - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) && raidPtr->noRotate) { - RF_ERRORMSG("Warning: distributed sparing specified without parity rotation.\n"); - } - if (raidPtr->numCol != v) { - RF_ERRORMSG2("RAID: config error: table element count (%d) not equal to no. of cols (%d)\n", v, raidPtr->numCol); - return (EINVAL); - } - /* 3. set up the values used in devRaidMap */ - info->BlocksPerTable = b; - info->NumParityReps = info->groupSize = k; - info->PUsPerBlock = k - 2; /* PQ */ - info->SUsPerTable = b * info->PUsPerBlock * layoutPtr->SUsPerPU; /* b blks, k-1 SUs each */ - info->SUsPerFullTable = k * info->SUsPerTable; /* rot k times */ - info->SUsPerBlock = info->PUsPerBlock * layoutPtr->SUsPerPU; - info->TableDepthInPUs = (b * k) / v; - info->FullTableDepthInPUs = info->TableDepthInPUs * k; /* k repetitions */ - - /* used only in distributed sparing case */ - info->FullTablesPerSpareRegion = (v - 1) / rf_gcd(r, v - 1); /* (v-1)/gcd fulltables */ - info->TablesPerSpareRegion = k * info->FullTablesPerSpareRegion; - info->SpareSpaceDepthPerRegionInSUs = (r * info->TablesPerSpareRegion / (v - 1)) * layoutPtr->SUsPerPU; - - /* check to make sure the block design is sufficiently small */ - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - if (info->FullTableDepthInPUs * layoutPtr->SUsPerPU + info->SpareSpaceDepthPerRegionInSUs > layoutPtr->stripeUnitsPerDisk) { - RF_ERRORMSG3("RAID: config error: Full Table depth (%d) + Spare Space (%d) larger than disk size (%d) (BD too big)\n", - (int) info->FullTableDepthInPUs, - (int) info->SpareSpaceDepthPerRegionInSUs, - (int) layoutPtr->stripeUnitsPerDisk); - return (EINVAL); - } - } else { - if (info->TableDepthInPUs * layoutPtr->SUsPerPU > layoutPtr->stripeUnitsPerDisk) { - RF_ERRORMSG2("RAID: config error: Table depth (%d) larger than disk size (%d) (BD too big)\n", - (int) (info->TableDepthInPUs * layoutPtr->SUsPerPU), - (int) layoutPtr->stripeUnitsPerDisk); - return (EINVAL); - } - } - - - /* compute the size of each disk, and the number of tables in the last - * fulltable (which need not be complete) */ - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - - PUsPerDisk = layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU; - spareRegionDepthInPUs = (info->TablesPerSpareRegion * info->TableDepthInPUs + - (info->TablesPerSpareRegion * info->TableDepthInPUs) / (v - 1)); - info->SpareRegionDepthInSUs = spareRegionDepthInPUs * layoutPtr->SUsPerPU; - - numCompleteSpareRegionsPerDisk = PUsPerDisk / spareRegionDepthInPUs; - info->NumCompleteSRs = numCompleteSpareRegionsPerDisk; - extraPUsPerDisk = PUsPerDisk % spareRegionDepthInPUs; - - /* assume conservatively that we need the full amount of spare - * space in one region in order to provide spares for the - * partial spare region at the end of the array. We set "i" - * to the number of tables in the partial spare region. This - * may actually include some fulltables. */ - extraPUsPerDisk -= (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU); - if (extraPUsPerDisk <= 0) - i = 0; - else - i = extraPUsPerDisk / info->TableDepthInPUs; - - complete_FT_count = raidPtr->numRow * (numCompleteSpareRegionsPerDisk * (info->TablesPerSpareRegion / k) + i / k); - info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable; - info->ExtraTablesPerDisk = i % k; - - /* note that in the last spare region, the spare space is - * complete even though data/parity space is not */ - totSparePUsPerDisk = (numCompleteSpareRegionsPerDisk + 1) * (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU); - info->TotSparePUsPerDisk = totSparePUsPerDisk; - - layoutPtr->stripeUnitsPerDisk = - ((complete_FT_count / raidPtr->numRow) * info->FullTableDepthInPUs + /* data & parity space */ - info->ExtraTablesPerDisk * info->TableDepthInPUs + - totSparePUsPerDisk /* spare space */ - ) * layoutPtr->SUsPerPU; - layoutPtr->dataStripeUnitsPerDisk = - (complete_FT_count * info->FullTableDepthInPUs + info->ExtraTablesPerDisk * info->TableDepthInPUs) - * layoutPtr->SUsPerPU * (k - 1) / k; - - } else { - /* non-dist spare case: force each disk to contain an - * integral number of tables */ - layoutPtr->stripeUnitsPerDisk /= (info->TableDepthInPUs * layoutPtr->SUsPerPU); - layoutPtr->stripeUnitsPerDisk *= (info->TableDepthInPUs * layoutPtr->SUsPerPU); - - /* compute the number of tables in the last fulltable, which - * need not be complete */ - complete_FT_count = - ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) / info->FullTableDepthInPUs) * raidPtr->numRow; - - info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable; - info->ExtraTablesPerDisk = - ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) / info->TableDepthInPUs) % k; - } - - raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; - - /* find the disk offset of the stripe unit where the last fulltable - * starts */ - numCompleteFullTablesPerDisk = complete_FT_count / raidPtr->numRow; - diskOffsetOfLastFullTableInSUs = numCompleteFullTablesPerDisk * info->FullTableDepthInPUs * layoutPtr->SUsPerPU; - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - SpareSpaceInSUs = numCompleteSpareRegionsPerDisk * info->SpareSpaceDepthPerRegionInSUs; - diskOffsetOfLastFullTableInSUs += SpareSpaceInSUs; - info->DiskOffsetOfLastSpareSpaceChunkInSUs = - diskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU; - } - info->DiskOffsetOfLastFullTableInSUs = diskOffsetOfLastFullTableInSUs; - info->numCompleteFullTablesPerDisk = numCompleteFullTablesPerDisk; - - /* 4. create and initialize the lookup tables */ - info->LayoutTable = rf_make_2d_array(b, k, raidPtr->cleanupList); - if (info->LayoutTable == NULL) - return (ENOMEM); - info->OffsetTable = rf_make_2d_array(b, k, raidPtr->cleanupList); - if (info->OffsetTable == NULL) - return (ENOMEM); - info->BlockTable = rf_make_2d_array(info->TableDepthInPUs * layoutPtr->SUsPerPU, raidPtr->numCol, raidPtr->cleanupList); - if (info->BlockTable == NULL) - return (ENOMEM); - - first_avail_slot = (int *) rf_make_1d_array(v, NULL); - if (first_avail_slot == NULL) - return (ENOMEM); - - for (i = 0; i < b; i++) - for (j = 0; j < k; j++) - info->LayoutTable[i][j] = *cfgBuf++; - - /* initialize offset table */ - for (i = 0; i < b; i++) - for (j = 0; j < k; j++) { - info->OffsetTable[i][j] = first_avail_slot[info->LayoutTable[i][j]]; - first_avail_slot[info->LayoutTable[i][j]]++; - } - - /* initialize block table */ - for (SUID = l = 0; l < layoutPtr->SUsPerPU; l++) { - for (i = 0; i < b; i++) { - for (j = 0; j < k; j++) { - info->BlockTable[(info->OffsetTable[i][j] * layoutPtr->SUsPerPU) + l] - [info->LayoutTable[i][j]] = SUID; - } - SUID++; - } - } - - rf_free_1d_array(first_avail_slot, v); - - /* 5. set up the remaining redundant-but-useful parameters */ - - raidPtr->totalSectors = (k * complete_FT_count + raidPtr->numRow * info->ExtraTablesPerDisk) * - info->SUsPerTable * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numStripe = (raidPtr->totalSectors / layoutPtr->sectorsPerStripeUnit) / (k - 2); - - /* strange evaluation order below to try and minimize overflow - * problems */ - - layoutPtr->dataSectorsPerStripe = (k - 2) * layoutPtr->sectorsPerStripeUnit; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = k - 2; - layoutPtr->numParityCol = 2; - - return (0); -} - -int -rf_GetDefaultNumFloatingReconBuffersPQ(RF_Raid_t * raidPtr) -{ - int def_decl; - - def_decl = rf_GetDefaultNumFloatingReconBuffersDeclustered(raidPtr); - return (RF_MAX(3 * raidPtr->numCol, def_decl)); -} - -void -rf_MapSectorDeclusteredPQ( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit; - RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset; - RF_StripeNum_t BlockID, BlockOffset, RepIndex; - RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; - RF_StripeNum_t base_suid = 0, outSU, SpareRegion = 0, SpareSpace = 0; - - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); - - FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array - * (across rows) */ - *row = FullTableID % raidPtr->numRow; - FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this - * disk */ - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - SpareRegion = FullTableID / info->FullTablesPerSpareRegion; - SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs; - } - FullTableOffset = SUID % sus_per_fulltable; - TableID = FullTableOffset / info->SUsPerTable; - TableOffset = FullTableOffset - TableID * info->SUsPerTable; - BlockID = TableOffset / info->PUsPerBlock; - BlockOffset = TableOffset - BlockID * info->PUsPerBlock; - BlockID %= info->BlocksPerTable; - RF_ASSERT(BlockOffset < info->groupSize - 2); - /* - TableIDs go from 0 .. GroupSize-1 inclusive. - PUsPerBlock is k-2. - We want the tableIDs to rotate from the - right, so use GroupSize - */ - RepIndex = info->groupSize - 1 - TableID; - RF_ASSERT(RepIndex >= 0); - if (!raidPtr->noRotate) { - if (TableID == 0) - BlockOffset++; /* P on last drive, Q on first */ - else - BlockOffset += ((BlockOffset >= RepIndex) ? 2 : 0); /* skip over PQ */ - RF_ASSERT(BlockOffset < info->groupSize); - *col = info->LayoutTable[BlockID][BlockOffset]; - } - /* remap to distributed spare space if indicated */ - if (remap) { - rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU); - } else { - - outSU = base_suid; - outSU += FullTableID * fulltable_depth; /* offs to strt of FT */ - outSU += SpareSpace; /* skip rsvd spare space */ - outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; /* offs to strt of tble */ - outSU += info->OffsetTable[BlockID][BlockOffset] * layoutPtr->SUsPerPU; /* offs to the PU */ - } - outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); /* offs to the SU within - * a PU */ - - /* convert SUs to sectors, and, if not aligned to SU boundary, add in - * offset to sector */ - *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit); -} - - -void -rf_MapParityDeclusteredPQ( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit; - RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset; - RF_StripeNum_t BlockID, BlockOffset, RepIndex; - RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; - RF_StripeNum_t base_suid = 0, outSU, SpareRegion, SpareSpace = 0; - - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); - - /* compute row & (possibly) spare space exactly as before */ - FullTableID = SUID / sus_per_fulltable; - *row = FullTableID % raidPtr->numRow; - FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this - * disk */ - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - SpareRegion = FullTableID / info->FullTablesPerSpareRegion; - SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs; - } - /* compute BlockID and RepIndex exactly as before */ - FullTableOffset = SUID % sus_per_fulltable; - TableID = FullTableOffset / info->SUsPerTable; - TableOffset = FullTableOffset - TableID * info->SUsPerTable; - BlockID = TableOffset / info->PUsPerBlock; - BlockOffset = TableOffset - BlockID * info->PUsPerBlock; - BlockID %= info->BlocksPerTable; - - /* the parity block is in the position indicated by RepIndex */ - RepIndex = (raidPtr->noRotate) ? info->PUsPerBlock : info->groupSize - 1 - TableID; - *col = info->LayoutTable[BlockID][RepIndex]; - - if (remap) - RF_PANIC(); - - /* compute sector as before, except use RepIndex instead of - * BlockOffset */ - outSU = base_suid; - outSU += FullTableID * fulltable_depth; - outSU += SpareSpace; /* skip rsvd spare space */ - outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; - outSU += info->OffsetTable[BlockID][RepIndex] * layoutPtr->SUsPerPU; - outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); - - *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit); -} - -void -rf_MapQDeclusteredPQ( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit; - RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset; - RF_StripeNum_t BlockID, BlockOffset, RepIndex, RepIndexQ; - RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; - RF_StripeNum_t base_suid = 0, outSU, SpareRegion, SpareSpace = 0; - - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); - - /* compute row & (possibly) spare space exactly as before */ - FullTableID = SUID / sus_per_fulltable; - *row = FullTableID % raidPtr->numRow; - FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this - * disk */ - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - SpareRegion = FullTableID / info->FullTablesPerSpareRegion; - SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs; - } - /* compute BlockID and RepIndex exactly as before */ - FullTableOffset = SUID % sus_per_fulltable; - TableID = FullTableOffset / info->SUsPerTable; - TableOffset = FullTableOffset - TableID * info->SUsPerTable; - BlockID = TableOffset / info->PUsPerBlock; - BlockOffset = TableOffset - BlockID * info->PUsPerBlock; - BlockID %= info->BlocksPerTable; - - /* the q block is in the position indicated by RepIndex */ - RepIndex = (raidPtr->noRotate) ? info->PUsPerBlock : info->groupSize - 1 - TableID; - RepIndexQ = ((RepIndex == (info->groupSize - 1)) ? 0 : RepIndex + 1); - *col = info->LayoutTable[BlockID][RepIndexQ]; - - if (remap) - RF_PANIC(); - - /* compute sector as before, except use RepIndex instead of - * BlockOffset */ - outSU = base_suid; - outSU += FullTableID * fulltable_depth; - outSU += SpareSpace; /* skip rsvd spare space */ - outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; - outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); - - outSU += info->OffsetTable[BlockID][RepIndexQ] * layoutPtr->SUsPerPU; - *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit); -} -/* returns an array of ints identifying the disks that comprise the stripe containing the indicated address. - * the caller must _never_ attempt to modify this array. - */ -void -rf_IdentifyStripeDeclusteredPQ( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; - RF_StripeNum_t base_suid = 0; - RF_StripeNum_t SUID = rf_RaidAddressToStripeUnitID(layoutPtr, addr); - RF_StripeNum_t stripeID, FullTableID; - int tableOffset; - - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); - FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array - * (across rows) */ - *outRow = FullTableID % raidPtr->numRow; - stripeID = rf_StripeUnitIDToStripeID(layoutPtr, SUID); /* find stripe offset - * into array */ - tableOffset = (stripeID % info->BlocksPerTable); /* find offset into - * block design table */ - *diskids = info->LayoutTable[tableOffset]; -} -#endif /* (RF_INCLUDE_PARITY_DECLUSTERING_PQ > 0) || (RF_INCLUDE_EVENODD > 0) */ diff --git a/sys/dev/raidframe/rf_declusterPQ.h b/sys/dev/raidframe/rf_declusterPQ.h deleted file mode 100644 index 6edef0b..0000000 --- a/sys/dev/raidframe/rf_declusterPQ.h +++ /dev/null @@ -1,52 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_declusterPQ.h,v 1.3 1999/02/05 00:06:09 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Daniel Stodolsky, Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_DECLUSTERPQ_H_ -#define _RF__RF_DECLUSTERPQ_H_ - -#include <dev/raidframe/rf_types.h> - -int -rf_ConfigureDeclusteredPQ(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int rf_GetDefaultNumFloatingReconBuffersPQ(RF_Raid_t * raidPtr); -void -rf_MapSectorDeclusteredPQ(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapParityDeclusteredPQ(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapQDeclusteredPQ(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_IdentifyStripeDeclusteredPQ(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); - -#endif /* !_RF__RF_DECLUSTERPQ_H_ */ diff --git a/sys/dev/raidframe/rf_desc.h b/sys/dev/raidframe/rf_desc.h deleted file mode 100644 index 8a6951b..0000000 --- a/sys/dev/raidframe/rf_desc.h +++ /dev/null @@ -1,113 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_desc.h,v 1.5 2000/01/09 00:00:18 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_DESC_H_ -#define _RF__RF_DESC_H_ - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_dag.h> - -struct RF_RaidReconDesc_s { - RF_Raid_t *raidPtr; /* raid device descriptor */ - RF_RowCol_t row; /* row of failed disk */ - RF_RowCol_t col; /* col of failed disk */ - int state; /* how far along the reconstruction operation - * has gotten */ - RF_RaidDisk_t *spareDiskPtr; /* describes target disk for recon - * (not used in dist sparing) */ - int numDisksDone; /* the number of surviving disks that have - * completed their work */ - RF_RowCol_t srow; /* row ID of the spare disk (not used in dist - * sparing) */ - RF_RowCol_t scol; /* col ID of the spare disk (not used in dist - * sparing) */ - /* - * Prevent recon from hogging CPU - */ - RF_Etimer_t recon_exec_timer; - RF_uint64 reconExecTimerRunning; - RF_uint64 reconExecTicks; - RF_uint64 maxReconExecTicks; - -#if RF_RECON_STATS > 0 - RF_uint64 hsStallCount; /* head sep stall count */ - RF_uint64 numReconExecDelays; - RF_uint64 numReconEventWaits; -#endif /* RF_RECON_STATS > 0 */ - RF_RaidReconDesc_t *next; -}; - -struct RF_RaidAccessDesc_s { - RF_Raid_t *raidPtr; /* raid device descriptor */ - RF_IoType_t type; /* read or write */ - RF_RaidAddr_t raidAddress; /* starting address in raid address - * space */ - RF_SectorCount_t numBlocks; /* number of blocks (sectors) to - * transfer */ - RF_StripeCount_t numStripes; /* number of stripes involved in - * access */ - caddr_t bufPtr; /* pointer to data buffer */ - RF_RaidAccessFlags_t flags; /* flags controlling operation */ - int state; /* index into states telling how far along the - * RAID operation has gotten */ - RF_AccessState_t *states; /* array of states to be run */ - int status; /* pass/fail status of the last operation */ - RF_DagList_t *dagArray; /* array of dag lists, one list per stripe */ - RF_AccessStripeMapHeader_t *asmap; /* the asm for this I/O */ - void *bp; /* buf pointer for this RAID acc. ignored - * outside the kernel */ - RF_DagHeader_t **paramDAG; /* allows the DAG to be returned to - * the caller after I/O completion */ - RF_AccessStripeMapHeader_t **paramASM; /* allows the ASM to be - * returned to the caller - * after I/O completion */ - RF_AccTraceEntry_t tracerec; /* perf monitoring information for a - * user access (not for dag stats) */ - void (*callbackFunc) (RF_CBParam_t); /* callback function for this - * I/O */ - void *callbackArg; /* arg to give to callback func */ - - RF_AllocListElem_t *cleanupList; /* memory to be freed at the - * end of the access */ - - RF_RaidAccessDesc_t *next; - RF_RaidAccessDesc_t *head; - - int numPending; - - RF_DECLARE_MUTEX(mutex) /* these are used to implement - * blocking I/O */ - RF_DECLARE_COND(cond) - int async_flag; - - RF_Etimer_t timer; /* used for timing this access */ -}; -#endif /* !_RF__RF_DESC_H_ */ diff --git a/sys/dev/raidframe/rf_diskqueue.c b/sys/dev/raidframe/rf_diskqueue.c deleted file mode 100644 index c03e6cd..0000000 --- a/sys/dev/raidframe/rf_diskqueue.c +++ /dev/null @@ -1,593 +0,0 @@ -/* $NetBSD: rf_diskqueue.c,v 1.13 2000/03/04 04:22:34 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/**************************************************************************** - * - * rf_diskqueue.c -- higher-level disk queue code - * - * the routines here are a generic wrapper around the actual queueing - * routines. The code here implements thread scheduling, synchronization, - * and locking ops (see below) on top of the lower-level queueing code. - * - * to support atomic RMW, we implement "locking operations". When a - * locking op is dispatched to the lower levels of the driver, the - * queue is locked, and no further I/Os are dispatched until the queue - * receives & completes a corresponding "unlocking operation". This - * code relies on the higher layers to guarantee that a locking op - * will always be eventually followed by an unlocking op. The model - * is that the higher layers are structured so locking and unlocking - * ops occur in pairs, i.e. an unlocking op cannot be generated until - * after a locking op reports completion. There is no good way to - * check to see that an unlocking op "corresponds" to the op that - * currently has the queue locked, so we make no such attempt. Since - * by definition there can be only one locking op outstanding on a - * disk, this should not be a problem. - * - * In the kernel, we allow multiple I/Os to be concurrently dispatched - * to the disk driver. In order to support locking ops in this - * environment, when we decide to do a locking op, we stop dispatching - * new I/Os and wait until all dispatched I/Os have completed before - * dispatching the locking op. - * - * Unfortunately, the code is different in the 3 different operating - * states (user level, kernel, simulator). In the kernel, I/O is - * non-blocking, and we have no disk threads to dispatch for us. - * Therefore, we have to dispatch new I/Os to the scsi driver at the - * time of enqueue, and also at the time of completion. At user - * level, I/O is blocking, and so only the disk threads may dispatch - * I/Os. Thus at user level, all we can do at enqueue time is enqueue - * and wake up the disk thread to do the dispatch. - * - ****************************************************************************/ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_diskqueue.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_acctrace.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_configure.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_freelist.h> -#include <dev/raidframe/rf_debugprint.h> -#include <dev/raidframe/rf_shutdown.h> -#include <dev/raidframe/rf_cvscan.h> -#include <dev/raidframe/rf_sstf.h> -#include <dev/raidframe/rf_fifo.h> -#include <dev/raidframe/rf_kintf.h> - -static int init_dqd(RF_DiskQueueData_t *); -static void clean_dqd(RF_DiskQueueData_t *); -static void rf_ShutdownDiskQueueSystem(void *); - -#define Dprintf1(s,a) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf2(s,a,b) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf3(s,a,b,c) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL) - -/***************************************************************************** - * - * the disk queue switch defines all the functions used in the - * different queueing disciplines queue ID, init routine, enqueue - * routine, dequeue routine - * - ****************************************************************************/ - -static RF_DiskQueueSW_t diskqueuesw[] = { - {"fifo", /* FIFO */ - rf_FifoCreate, - rf_FifoEnqueue, - rf_FifoDequeue, - rf_FifoPeek, - rf_FifoPromote}, - - {"cvscan", /* cvscan */ - rf_CvscanCreate, - rf_CvscanEnqueue, - rf_CvscanDequeue, - rf_CvscanPeek, - rf_CvscanPromote}, - - {"sstf", /* shortest seek time first */ - rf_SstfCreate, - rf_SstfEnqueue, - rf_SstfDequeue, - rf_SstfPeek, - rf_SstfPromote}, - - {"scan", /* SCAN (two-way elevator) */ - rf_ScanCreate, - rf_SstfEnqueue, - rf_ScanDequeue, - rf_ScanPeek, - rf_SstfPromote}, - - {"cscan", /* CSCAN (one-way elevator) */ - rf_CscanCreate, - rf_SstfEnqueue, - rf_CscanDequeue, - rf_CscanPeek, - rf_SstfPromote}, - -}; -#define NUM_DISK_QUEUE_TYPES (sizeof(diskqueuesw)/sizeof(RF_DiskQueueSW_t)) - -static RF_FreeList_t *rf_dqd_freelist; - -#define RF_MAX_FREE_DQD 256 -#define RF_DQD_INC 16 -#define RF_DQD_INITIAL 64 - -#if defined(__FreeBSD__) && __FreeBSD_version > 500005 -#include <sys/bio.h> -#endif - -#include <sys/buf.h> - -static int -init_dqd(dqd) - RF_DiskQueueData_t *dqd; -{ - - dqd->bp = (RF_Buf_t) malloc(sizeof(*dqd->bp), M_RAIDFRAME, M_NOWAIT); - if (dqd->bp == NULL) { - return (ENOMEM); - } - memset(dqd->bp, 0, sizeof(*dqd->bp)); /* if you don't do it, nobody - * else will.. */ - return (0); -} - -static void -clean_dqd(dqd) - RF_DiskQueueData_t *dqd; -{ - free(dqd->bp, M_RAIDFRAME); -} -/* configures a single disk queue */ - -int -rf_ConfigureDiskQueue( - RF_Raid_t * raidPtr, - RF_DiskQueue_t * diskqueue, - RF_RowCol_t r, /* row & col -- debug only. BZZT not any - * more... */ - RF_RowCol_t c, - RF_DiskQueueSW_t * p, - RF_SectorCount_t sectPerDisk, - dev_t dev, - int maxOutstanding, - RF_ShutdownList_t ** listp, - RF_AllocListElem_t * clList) -{ - int rc; - - diskqueue->row = r; - diskqueue->col = c; - diskqueue->qPtr = p; - diskqueue->qHdr = (p->Create) (sectPerDisk, clList, listp); - diskqueue->dev = dev; - diskqueue->numOutstanding = 0; - diskqueue->queueLength = 0; - diskqueue->maxOutstanding = maxOutstanding; - diskqueue->curPriority = RF_IO_NORMAL_PRIORITY; - diskqueue->nextLockingOp = NULL; - diskqueue->unlockingOp = NULL; - diskqueue->numWaiting = 0; - diskqueue->flags = 0; - diskqueue->raidPtr = raidPtr; - diskqueue->rf_cinfo = &raidPtr->raid_cinfo[r][c]; - rc = rf_create_managed_mutex(listp, &diskqueue->mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return (rc); - } - rc = rf_create_managed_cond(listp, &diskqueue->cond); - if (rc) { - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return (rc); - } - return (0); -} - -static void -rf_ShutdownDiskQueueSystem(ignored) - void *ignored; -{ - RF_FREELIST_DESTROY_CLEAN(rf_dqd_freelist, next, (RF_DiskQueueData_t *), clean_dqd); -} - -int -rf_ConfigureDiskQueueSystem(listp) - RF_ShutdownList_t **listp; -{ - int rc; - - RF_FREELIST_CREATE(rf_dqd_freelist, RF_MAX_FREE_DQD, - RF_DQD_INC, sizeof(RF_DiskQueueData_t)); - if (rf_dqd_freelist == NULL) - return (ENOMEM); - rc = rf_ShutdownCreate(listp, rf_ShutdownDiskQueueSystem, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - rf_ShutdownDiskQueueSystem(NULL); - return (rc); - } - RF_FREELIST_PRIME_INIT(rf_dqd_freelist, RF_DQD_INITIAL, next, - (RF_DiskQueueData_t *), init_dqd); - return (0); -} - -int -rf_ConfigureDiskQueues( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - RF_DiskQueue_t **diskQueues, *spareQueues; - RF_DiskQueueSW_t *p; - RF_RowCol_t r, c; - int rc, i; - - raidPtr->maxQueueDepth = cfgPtr->maxOutstandingDiskReqs; - - for (p = NULL, i = 0; i < NUM_DISK_QUEUE_TYPES; i++) { - if (!strcmp(diskqueuesw[i].queueType, cfgPtr->diskQueueType)) { - p = &diskqueuesw[i]; - break; - } - } - if (p == NULL) { - RF_ERRORMSG2("Unknown queue type \"%s\". Using %s\n", cfgPtr->diskQueueType, diskqueuesw[0].queueType); - p = &diskqueuesw[0]; - } - raidPtr->qType = p; - RF_CallocAndAdd(diskQueues, raidPtr->numRow, sizeof(RF_DiskQueue_t *), (RF_DiskQueue_t **), raidPtr->cleanupList); - if (diskQueues == NULL) { - return (ENOMEM); - } - raidPtr->Queues = diskQueues; - for (r = 0; r < raidPtr->numRow; r++) { - RF_CallocAndAdd(diskQueues[r], raidPtr->numCol + - ((r == 0) ? RF_MAXSPARE : 0), - sizeof(RF_DiskQueue_t), (RF_DiskQueue_t *), - raidPtr->cleanupList); - if (diskQueues[r] == NULL) - return (ENOMEM); - for (c = 0; c < raidPtr->numCol; c++) { - rc = rf_ConfigureDiskQueue(raidPtr, &diskQueues[r][c], - r, c, p, - raidPtr->sectorsPerDisk, - raidPtr->Disks[r][c].dev, - cfgPtr->maxOutstandingDiskReqs, - listp, raidPtr->cleanupList); - if (rc) - return (rc); - } - } - - spareQueues = &raidPtr->Queues[0][raidPtr->numCol]; - for (r = 0; r < raidPtr->numSpare; r++) { - rc = rf_ConfigureDiskQueue(raidPtr, &spareQueues[r], - 0, raidPtr->numCol + r, p, - raidPtr->sectorsPerDisk, - raidPtr->Disks[0][raidPtr->numCol + r].dev, - cfgPtr->maxOutstandingDiskReqs, listp, - raidPtr->cleanupList); - if (rc) - return (rc); - } - return (0); -} -/* Enqueue a disk I/O - * - * Unfortunately, we have to do things differently in the different - * environments (simulator, user-level, kernel). - * At user level, all I/O is blocking, so we have 1 or more threads/disk - * and the thread that enqueues is different from the thread that dequeues. - * In the kernel, I/O is non-blocking and so we'd like to have multiple - * I/Os outstanding on the physical disks when possible. - * - * when any request arrives at a queue, we have two choices: - * dispatch it to the lower levels - * queue it up - * - * kernel rules for when to do what: - * locking request: queue empty => dispatch and lock queue, - * else queue it - * unlocking req : always dispatch it - * normal req : queue empty => dispatch it & set priority - * queue not full & priority is ok => dispatch it - * else queue it - * - * user-level rules: - * always enqueue. In the special case of an unlocking op, enqueue - * in a special way that will cause the unlocking op to be the next - * thing dequeued. - * - * simulator rules: - * Do the same as at user level, with the sleeps and wakeups suppressed. - */ -void -rf_DiskIOEnqueue(queue, req, pri) - RF_DiskQueue_t *queue; - RF_DiskQueueData_t *req; - int pri; -{ - RF_ETIMER_START(req->qtime); - RF_ASSERT(req->type == RF_IO_TYPE_NOP || req->numSector); - req->priority = pri; - - if (rf_queueDebug && (req->numSector == 0)) { - printf("Warning: Enqueueing zero-sector access\n"); - } - /* - * kernel - */ - RF_LOCK_QUEUE_MUTEX(queue, "DiskIOEnqueue"); - /* locking request */ - if (RF_LOCKING_REQ(req)) { - if (RF_QUEUE_EMPTY(queue)) { - Dprintf3("Dispatching pri %d locking op to r %d c %d (queue empty)\n", pri, queue->row, queue->col); - RF_LOCK_QUEUE(queue); - rf_DispatchKernelIO(queue, req); - } else { - queue->queueLength++; /* increment count of number - * of requests waiting in this - * queue */ - Dprintf3("Enqueueing pri %d locking op to r %d c %d (queue not empty)\n", pri, queue->row, queue->col); - req->queue = (void *) queue; - (queue->qPtr->Enqueue) (queue->qHdr, req, pri); - } - } - /* unlocking request */ - else - if (RF_UNLOCKING_REQ(req)) { /* we'll do the actual unlock - * when this I/O completes */ - Dprintf3("Dispatching pri %d unlocking op to r %d c %d\n", pri, queue->row, queue->col); - RF_ASSERT(RF_QUEUE_LOCKED(queue)); - rf_DispatchKernelIO(queue, req); - } - /* normal request */ - else - if (RF_OK_TO_DISPATCH(queue, req)) { - Dprintf3("Dispatching pri %d regular op to r %d c %d (ok to dispatch)\n", pri, queue->row, queue->col); - rf_DispatchKernelIO(queue, req); - } else { - queue->queueLength++; /* increment count of - * number of requests - * waiting in this queue */ - Dprintf3("Enqueueing pri %d regular op to r %d c %d (not ok to dispatch)\n", pri, queue->row, queue->col); - req->queue = (void *) queue; - (queue->qPtr->Enqueue) (queue->qHdr, req, pri); - } - RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOEnqueue"); -} - - -/* get the next set of I/Os started, kernel version only */ -void -rf_DiskIOComplete(queue, req, status) - RF_DiskQueue_t *queue; - RF_DiskQueueData_t *req; - int status; -{ - int done = 0; - - RF_LOCK_QUEUE_MUTEX(queue, "DiskIOComplete"); - - /* unlock the queue: (1) after an unlocking req completes (2) after a - * locking req fails */ - if (RF_UNLOCKING_REQ(req) || (RF_LOCKING_REQ(req) && status)) { - Dprintf2("DiskIOComplete: unlocking queue at r %d c %d\n", queue->row, queue->col); - RF_ASSERT(RF_QUEUE_LOCKED(queue) && (queue->unlockingOp == NULL)); - RF_UNLOCK_QUEUE(queue); - } - queue->numOutstanding--; - RF_ASSERT(queue->numOutstanding >= 0); - - /* dispatch requests to the disk until we find one that we can't. */ - /* no reason to continue once we've filled up the queue */ - /* no reason to even start if the queue is locked */ - - while (!done && !RF_QUEUE_FULL(queue) && !RF_QUEUE_LOCKED(queue)) { - if (queue->nextLockingOp) { - req = queue->nextLockingOp; - queue->nextLockingOp = NULL; - Dprintf3("DiskIOComplete: a pri %d locking req was pending at r %d c %d\n", req->priority, queue->row, queue->col); - } else { - req = (queue->qPtr->Dequeue) (queue->qHdr); - if (req != NULL) { - Dprintf3("DiskIOComplete: extracting pri %d req from queue at r %d c %d\n", req->priority, queue->row, queue->col); - } else { - Dprintf1("DiskIOComplete: no more requests to extract.\n", ""); - } - } - if (req) { - queue->queueLength--; /* decrement count of number - * of requests waiting in this - * queue */ - RF_ASSERT(queue->queueLength >= 0); - } - if (!req) - done = 1; - else - if (RF_LOCKING_REQ(req)) { - if (RF_QUEUE_EMPTY(queue)) { /* dispatch it */ - Dprintf3("DiskIOComplete: dispatching pri %d locking req to r %d c %d (queue empty)\n", req->priority, queue->row, queue->col); - RF_LOCK_QUEUE(queue); - rf_DispatchKernelIO(queue, req); - done = 1; - } else { /* put it aside to wait for - * the queue to drain */ - Dprintf3("DiskIOComplete: postponing pri %d locking req to r %d c %d\n", req->priority, queue->row, queue->col); - RF_ASSERT(queue->nextLockingOp == NULL); - queue->nextLockingOp = req; - done = 1; - } - } else - if (RF_UNLOCKING_REQ(req)) { /* should not happen: - * unlocking ops should - * not get queued */ - RF_ASSERT(RF_QUEUE_LOCKED(queue)); /* support it anyway for - * the future */ - Dprintf3("DiskIOComplete: dispatching pri %d unl req to r %d c %d (SHOULD NOT SEE THIS)\n", req->priority, queue->row, queue->col); - rf_DispatchKernelIO(queue, req); - done = 1; - } else - if (RF_OK_TO_DISPATCH(queue, req)) { - Dprintf3("DiskIOComplete: dispatching pri %d regular req to r %d c %d (ok to dispatch)\n", req->priority, queue->row, queue->col); - rf_DispatchKernelIO(queue, req); - } else { /* we can't dispatch it, - * so just re-enqueue - * it. */ - /* potential trouble here if - * disk queues batch reqs */ - Dprintf3("DiskIOComplete: re-enqueueing pri %d regular req to r %d c %d\n", req->priority, queue->row, queue->col); - queue->queueLength++; - (queue->qPtr->Enqueue) (queue->qHdr, req, req->priority); - done = 1; - } - } - - RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOComplete"); -} -/* promotes accesses tagged with the given parityStripeID from low priority - * to normal priority. This promotion is optional, meaning that a queue - * need not implement it. If there is no promotion routine associated with - * a queue, this routine does nothing and returns -1. - */ -int -rf_DiskIOPromote(queue, parityStripeID, which_ru) - RF_DiskQueue_t *queue; - RF_StripeNum_t parityStripeID; - RF_ReconUnitNum_t which_ru; -{ - int retval; - - if (!queue->qPtr->Promote) - return (-1); - RF_LOCK_QUEUE_MUTEX(queue, "DiskIOPromote"); - retval = (queue->qPtr->Promote) (queue->qHdr, parityStripeID, which_ru); - RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOPromote"); - return (retval); -} - -RF_DiskQueueData_t * -rf_CreateDiskQueueData( - RF_IoType_t typ, - RF_SectorNum_t ssect, - RF_SectorCount_t nsect, - caddr_t buf, - RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru, - int (*wakeF) (void *, int), - void *arg, - RF_DiskQueueData_t * next, - RF_AccTraceEntry_t * tracerec, - void *raidPtr, - RF_DiskQueueDataFlags_t flags, - void *kb_proc) -{ - RF_DiskQueueData_t *p; - - RF_FREELIST_GET_INIT(rf_dqd_freelist, p, next, (RF_DiskQueueData_t *), init_dqd); - - p->sectorOffset = ssect + rf_protectedSectors; - p->numSector = nsect; - p->type = typ; - p->buf = buf; - p->parityStripeID = parityStripeID; - p->which_ru = which_ru; - p->CompleteFunc = wakeF; - p->argument = arg; - p->next = next; - p->tracerec = tracerec; - p->priority = RF_IO_NORMAL_PRIORITY; - p->AuxFunc = NULL; - p->buf2 = NULL; - p->raidPtr = raidPtr; - p->flags = flags; - p->b_proc = kb_proc; - return (p); -} - -RF_DiskQueueData_t * -rf_CreateDiskQueueDataFull( - RF_IoType_t typ, - RF_SectorNum_t ssect, - RF_SectorCount_t nsect, - caddr_t buf, - RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru, - int (*wakeF) (void *, int), - void *arg, - RF_DiskQueueData_t * next, - RF_AccTraceEntry_t * tracerec, - int priority, - int (*AuxFunc) (void *,...), - caddr_t buf2, - void *raidPtr, - RF_DiskQueueDataFlags_t flags, - void *kb_proc) -{ - RF_DiskQueueData_t *p; - - RF_FREELIST_GET_INIT(rf_dqd_freelist, p, next, (RF_DiskQueueData_t *), init_dqd); - - p->sectorOffset = ssect + rf_protectedSectors; - p->numSector = nsect; - p->type = typ; - p->buf = buf; - p->parityStripeID = parityStripeID; - p->which_ru = which_ru; - p->CompleteFunc = wakeF; - p->argument = arg; - p->next = next; - p->tracerec = tracerec; - p->priority = priority; - p->AuxFunc = AuxFunc; - p->buf2 = buf2; - p->raidPtr = raidPtr; - p->flags = flags; - p->b_proc = kb_proc; - return (p); -} - -void -rf_FreeDiskQueueData(p) - RF_DiskQueueData_t *p; -{ - RF_FREELIST_FREE_CLEAN(rf_dqd_freelist, p, next, clean_dqd); -} diff --git a/sys/dev/raidframe/rf_diskqueue.h b/sys/dev/raidframe/rf_diskqueue.h deleted file mode 100644 index 7b162b0..0000000 --- a/sys/dev/raidframe/rf_diskqueue.h +++ /dev/null @@ -1,208 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_diskqueue.h,v 1.5 2000/02/13 04:53:57 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************************** - * - * rf_diskqueue.h -- header file for disk queues - * - * see comments in rf_diskqueue.c - * - ****************************************************************************************/ - - -#ifndef _RF__RF_DISKQUEUE_H_ -#define _RF__RF_DISKQUEUE_H_ - -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_acctrace.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_etimer.h> - -#include <dev/raidframe/rf_bsd.h> - -#define RF_IO_NORMAL_PRIORITY 1 -#define RF_IO_LOW_PRIORITY 0 - -/* the data held by a disk queue entry */ -struct RF_DiskQueueData_s { - RF_SectorNum_t sectorOffset; /* sector offset into the disk */ - RF_SectorCount_t numSector; /* number of sectors to read/write */ - RF_IoType_t type; /* read/write/nop */ - caddr_t buf; /* buffer pointer */ - RF_StripeNum_t parityStripeID; /* the RAID parity stripe ID this - * access is for */ - RF_ReconUnitNum_t which_ru; /* which RU within this parity stripe */ - int priority; /* the priority of this request */ - int (*CompleteFunc) (void *, int); /* function to be called upon - * completion */ - int (*AuxFunc) (void *,...); /* function called upon - * completion of the first I/O - * of a Read_Op_Write pair */ - void *argument; /* argument to be passed to CompleteFunc */ - RF_Raid_t *raidPtr; /* needed for simulation */ - RF_AccTraceEntry_t *tracerec; /* perf mon only */ - RF_Etimer_t qtime; /* perf mon only - time request is in queue */ - long entryTime; - RF_DiskQueueData_t *next; - RF_DiskQueueData_t *prev; - caddr_t buf2; /* for read-op-write */ - dev_t dev; /* the device number for in-kernel version */ - RF_DiskQueue_t *queue; /* the disk queue to which this req is - * targeted */ - RF_DiskQueueDataFlags_t flags; /* flags controlling operation */ - - struct proc *b_proc; /* the b_proc from the original bp passed into - * the driver for this I/O */ - /* XXX Should this be changed to the opaque - * RF_Thread_t ? */ - RF_Buf_t bp; /* a bp to use to get this I/O done */ -}; -#define RF_LOCK_DISK_QUEUE 0x01 -#define RF_UNLOCK_DISK_QUEUE 0x02 - -/* note: "Create" returns type-specific queue header pointer cast to (void *) */ -struct RF_DiskQueueSW_s { - RF_DiskQueueType_t queueType; - void *(*Create) (RF_SectorCount_t, RF_AllocListElem_t *, RF_ShutdownList_t **); /* creation routine -- - * one call per queue in - * system */ - void (*Enqueue) (void *, RF_DiskQueueData_t *, int); /* enqueue routine */ - RF_DiskQueueData_t *(*Dequeue) (void *); /* dequeue routine */ - RF_DiskQueueData_t *(*Peek) (void *); /* peek at head of queue */ - - /* the rest are optional: they improve performance, but the driver - * will deal with it if they don't exist */ - int (*Promote) (void *, RF_StripeNum_t, RF_ReconUnitNum_t); /* promotes priority of - * tagged accesses */ -}; - -struct RF_DiskQueue_s { - RF_DiskQueueSW_t *qPtr; /* access point to queue functions */ - void *qHdr; /* queue header, of whatever type */ - RF_DECLARE_MUTEX(mutex) /* mutex locking data structures */ - RF_DECLARE_COND(cond) /* condition variable for - * synchronization */ - long numOutstanding; /* number of I/Os currently outstanding on - * disk */ - long maxOutstanding; /* max # of I/Os that can be outstanding on a - * disk (in-kernel only) */ - int curPriority; /* the priority of accs all that are currently - * outstanding */ - long queueLength; /* number of requests in queue */ - RF_DiskQueueData_t *nextLockingOp; /* a locking op that has - * arrived at the head of the - * queue & is waiting for - * drainage */ - RF_DiskQueueData_t *unlockingOp; /* used at user level to - * communicate unlocking op - * b/w user (or dag exec) & - * disk threads */ - int numWaiting; /* number of threads waiting on this variable. - * user-level only */ - RF_DiskQueueFlags_t flags; /* terminate, locked */ - RF_Raid_t *raidPtr; /* associated array */ - dev_t dev; /* device number for kernel version */ - RF_SectorNum_t last_deq_sector; /* last sector number dequeued or - * dispatched */ - int row, col; /* debug only */ - struct raidcinfo *rf_cinfo; /* disks component info.. */ -}; -#define RF_DQ_LOCKED 0x02 /* no new accs allowed until queue is - * explicitly unlocked */ - -/* macros setting & returning information about queues and requests */ -#define RF_QUEUE_LOCKED(_q) ((_q)->flags & RF_DQ_LOCKED) -#define RF_QUEUE_EMPTY(_q) (((_q)->numOutstanding == 0) && ((_q)->nextLockingOp == NULL) && !RF_QUEUE_LOCKED(_q)) -#define RF_QUEUE_FULL(_q) ((_q)->numOutstanding == (_q)->maxOutstanding) - -#define RF_LOCK_QUEUE(_q) (_q)->flags |= RF_DQ_LOCKED -#define RF_UNLOCK_QUEUE(_q) (_q)->flags &= ~RF_DQ_LOCKED - -#define RF_LOCK_QUEUE_MUTEX(_q_,_wh_) RF_LOCK_MUTEX((_q_)->mutex) -#define RF_UNLOCK_QUEUE_MUTEX(_q_,_wh_) RF_UNLOCK_MUTEX((_q_)->mutex) - -#define RF_LOCKING_REQ(_r) ((_r)->flags & RF_LOCK_DISK_QUEUE) -#define RF_UNLOCKING_REQ(_r) ((_r)->flags & RF_UNLOCK_DISK_QUEUE) - -/* whether it is ok to dispatch a regular request */ -#define RF_OK_TO_DISPATCH(_q_,_r_) \ - (RF_QUEUE_EMPTY(_q_) || \ - (!RF_QUEUE_FULL(_q_) && ((_r_)->priority >= (_q_)->curPriority))) - -int rf_ConfigureDiskQueueSystem(RF_ShutdownList_t ** listp); - -void rf_TerminateDiskQueues(RF_Raid_t * raidPtr); - -int -rf_ConfigureDiskQueues(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); - -void rf_DiskIOEnqueue(RF_DiskQueue_t * queue, RF_DiskQueueData_t * req, int pri); - - -void rf_DiskIOComplete(RF_DiskQueue_t * queue, RF_DiskQueueData_t * req, int status); - -int -rf_DiskIOPromote(RF_DiskQueue_t * queue, RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru); - -RF_DiskQueueData_t * -rf_CreateDiskQueueData(RF_IoType_t typ, RF_SectorNum_t ssect, - RF_SectorCount_t nsect, caddr_t buf, - RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru, - int (*wakeF) (void *, int), - void *arg, RF_DiskQueueData_t * next, - RF_AccTraceEntry_t * tracerec, - void *raidPtr, RF_DiskQueueDataFlags_t flags, - void *kb_proc); - -RF_DiskQueueData_t * -rf_CreateDiskQueueDataFull(RF_IoType_t typ, RF_SectorNum_t ssect, - RF_SectorCount_t nsect, caddr_t buf, - RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru, - int (*wakeF) (void *, int), - void *arg, RF_DiskQueueData_t * next, - RF_AccTraceEntry_t * tracerec, - int priority, int (*AuxFunc) (void *,...), - caddr_t buf2, void *raidPtr, - RF_DiskQueueDataFlags_t flags, void *kb_proc); - -void -rf_FreeDiskQueueData(RF_DiskQueueData_t * p); - -int -rf_ConfigureDiskQueue(RF_Raid_t *, RF_DiskQueue_t *, RF_RowCol_t, - RF_RowCol_t, RF_DiskQueueSW_t *, - RF_SectorCount_t, dev_t, int, - RF_ShutdownList_t **, - RF_AllocListElem_t *); -#endif /* !_RF__RF_DISKQUEUE_H_ */ diff --git a/sys/dev/raidframe/rf_disks.c b/sys/dev/raidframe/rf_disks.c deleted file mode 100644 index 14f72c2..0000000 --- a/sys/dev/raidframe/rf_disks.c +++ /dev/null @@ -1,1140 +0,0 @@ -/* $NetBSD: rf_disks.c,v 1.34 2000/12/05 01:35:56 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/*- - * Copyright (c) 1999 The NetBSD Foundation, Inc. - * All rights reserved. - * - * This code is derived from software contributed to The NetBSD Foundation - * by Greg Oster - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. - * 4. Neither the name of The NetBSD Foundation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/*************************************************************** - * rf_disks.c -- code to perform operations on the actual disks - ***************************************************************/ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_utils.h> -#include <dev/raidframe/rf_configure.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_options.h> -#include <dev/raidframe/rf_kintf.h> -#include <dev/raidframe/rf_bsd.h> - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/proc.h> -#if defined(__NetBSD__) -#include <sys/ioctl.h> -#elif defined(__FreeBSD__) -#include <sys/ioccom.h> -#include <sys/filio.h> -#endif -#include <sys/fcntl.h> -#include <sys/vnode.h> - -static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *); -static void rf_print_label_status( RF_Raid_t *, int, int, char *, - RF_ComponentLabel_t *); -static int rf_check_label_vitals( RF_Raid_t *, int, int, char *, - RF_ComponentLabel_t *, int, int ); - -#define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f) -#define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g) - -/************************************************************************** - * - * initialize the disks comprising the array - * - * We want the spare disks to have regular row,col numbers so that we can - * easily substitue a spare for a failed disk. But, the driver code assumes - * throughout that the array contains numRow by numCol _non-spare_ disks, so - * it's not clear how to fit in the spares. This is an unfortunate holdover - * from raidSim. The quick and dirty fix is to make row zero bigger than the - * rest, and put all the spares in it. This probably needs to get changed - * eventually. - * - **************************************************************************/ - -int -rf_ConfigureDisks( listp, raidPtr, cfgPtr ) - RF_ShutdownList_t **listp; - RF_Raid_t *raidPtr; - RF_Config_t *cfgPtr; -{ - RF_RaidDisk_t **disks; - RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; - RF_RowCol_t r, c; - int bs, ret; - unsigned i, count, foundone = 0, numFailuresThisRow; - int force; - - force = cfgPtr->force; - - ret = rf_AllocDiskStructures(raidPtr, cfgPtr); - if (ret) - goto fail; - - disks = raidPtr->Disks; - - for (r = 0; r < raidPtr->numRow; r++) { - numFailuresThisRow = 0; - for (c = 0; c < raidPtr->numCol; c++) { - ret = rf_ConfigureDisk(raidPtr, - &cfgPtr->devnames[r][c][0], - &disks[r][c], r, c); - - if (ret) - goto fail; - - if (disks[r][c].status == rf_ds_optimal) { - raidread_component_label( - raidPtr->raid_cinfo[r][c].ci_dev, - raidPtr->raid_cinfo[r][c].ci_vp, - &raidPtr->raid_cinfo[r][c].ci_label); - } - - if (disks[r][c].status != rf_ds_optimal) { - numFailuresThisRow++; - } else { - if (disks[r][c].numBlocks < min_numblks) - min_numblks = disks[r][c].numBlocks; - DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", - r, c, disks[r][c].devname, - (long int) disks[r][c].numBlocks, - disks[r][c].blockSize, - (long int) disks[r][c].numBlocks * - disks[r][c].blockSize / 1024 / 1024); - } - } - /* XXX fix for n-fault tolerant */ - /* XXX this should probably check to see how many failures - we can handle for this configuration! */ - if (numFailuresThisRow > 0) - raidPtr->status[r] = rf_rs_degraded; - } - - /* all disks must be the same size & have the same block size, bs must - * be a power of 2 */ - bs = 0; - for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) { - for (c = 0; !foundone && c < raidPtr->numCol; c++) { - if (disks[r][c].status == rf_ds_optimal) { - bs = disks[r][c].blockSize; - foundone = 1; - } - } - } - if (!foundone) { - RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n"); - ret = EINVAL; - goto fail; - } - for (count = 0, i = 1; i; i <<= 1) - if (bs & i) - count++; - if (count != 1) { - RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs); - ret = EINVAL; - goto fail; - } - - if (rf_CheckLabels( raidPtr, cfgPtr )) { - printf("raid%d: There were fatal errors\n", raidPtr->raidid); - if (force != 0) { - printf("raid%d: Fatal errors being ignored.\n", - raidPtr->raidid); - } else { - ret = EINVAL; - goto fail; - } - } - - for (r = 0; r < raidPtr->numRow; r++) { - for (c = 0; c < raidPtr->numCol; c++) { - if (disks[r][c].status == rf_ds_optimal) { - if (disks[r][c].blockSize != bs) { - RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c); - ret = EINVAL; - goto fail; - } - if (disks[r][c].numBlocks != min_numblks) { - RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n", - r, c, (int) min_numblks); - disks[r][c].numBlocks = min_numblks; - } - } - } - } - - raidPtr->sectorsPerDisk = min_numblks; - raidPtr->logBytesPerSector = ffs(bs) - 1; - raidPtr->bytesPerSector = bs; - raidPtr->sectorMask = bs - 1; - return (0); - -fail: - - rf_UnconfigureVnodes( raidPtr ); - - return (ret); -} - - -/**************************************************************************** - * set up the data structures describing the spare disks in the array - * recall from the above comment that the spare disk descriptors are stored - * in row zero, which is specially expanded to hold them. - ****************************************************************************/ -int -rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr ) - RF_ShutdownList_t ** listp; - RF_Raid_t * raidPtr; - RF_Config_t * cfgPtr; -{ - int i, ret; - unsigned int bs; - RF_RaidDisk_t *disks; - int num_spares_done; - - num_spares_done = 0; - - /* The space for the spares should have already been allocated by - * ConfigureDisks() */ - - disks = &raidPtr->Disks[0][raidPtr->numCol]; - for (i = 0; i < raidPtr->numSpare; i++) { - ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0], - &disks[i], 0, raidPtr->numCol + i); - if (ret) - goto fail; - if (disks[i].status != rf_ds_optimal) { - RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", - &cfgPtr->spare_names[i][0]); - } else { - disks[i].status = rf_ds_spare; /* change status to - * spare */ - DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i, - disks[i].devname, - (long int) disks[i].numBlocks, disks[i].blockSize, - (long int) disks[i].numBlocks * - disks[i].blockSize / 1024 / 1024); - } - num_spares_done++; - } - - /* check sizes and block sizes on spare disks */ - bs = 1 << raidPtr->logBytesPerSector; - for (i = 0; i < raidPtr->numSpare; i++) { - if (disks[i].blockSize != bs) { - RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs); - ret = EINVAL; - goto fail; - } - if (disks[i].numBlocks < raidPtr->sectorsPerDisk) { - RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n", - disks[i].devname, disks[i].blockSize, - (long int) raidPtr->sectorsPerDisk); - ret = EINVAL; - goto fail; - } else - if (disks[i].numBlocks > raidPtr->sectorsPerDisk) { - RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk); - - disks[i].numBlocks = raidPtr->sectorsPerDisk; - } - } - - return (0); - -fail: - - /* Release the hold on the main components. We've failed to allocate - * a spare, and since we're failing, we need to free things.. - - XXX failing to allocate a spare is *not* that big of a deal... - We *can* survive without it, if need be, esp. if we get hot - adding working. - - If we don't fail out here, then we need a way to remove this spare... - that should be easier to do here than if we are "live"... - - */ - - rf_UnconfigureVnodes( raidPtr ); - - return (ret); -} - -static int -rf_AllocDiskStructures(raidPtr, cfgPtr) - RF_Raid_t *raidPtr; - RF_Config_t *cfgPtr; -{ - RF_RaidDisk_t **disks; - int ret; - int r; - - RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *), - (RF_RaidDisk_t **), raidPtr->cleanupList); - if (disks == NULL) { - ret = ENOMEM; - goto fail; - } - raidPtr->Disks = disks; - /* get space for the device-specific stuff... */ - RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow, - sizeof(struct raidcinfo *), (struct raidcinfo **), - raidPtr->cleanupList); - if (raidPtr->raid_cinfo == NULL) { - ret = ENOMEM; - goto fail; - } - - for (r = 0; r < raidPtr->numRow; r++) { - /* We allocate RF_MAXSPARE on the first row so that we - have room to do hot-swapping of spares */ - RF_CallocAndAdd(disks[r], raidPtr->numCol - + ((r == 0) ? RF_MAXSPARE : 0), - sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *), - raidPtr->cleanupList); - if (disks[r] == NULL) { - ret = ENOMEM; - goto fail; - } - /* get more space for device specific stuff.. */ - RF_CallocAndAdd(raidPtr->raid_cinfo[r], - raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0), - sizeof(struct raidcinfo), (struct raidcinfo *), - raidPtr->cleanupList); - if (raidPtr->raid_cinfo[r] == NULL) { - ret = ENOMEM; - goto fail; - } - } - return(0); -fail: - rf_UnconfigureVnodes( raidPtr ); - - return(ret); -} - - -/* configure a single disk during auto-configuration at boot */ -int -rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config) - RF_Raid_t *raidPtr; - RF_Config_t *cfgPtr; - RF_AutoConfig_t *auto_config; -{ - RF_RaidDisk_t **disks; - RF_RaidDisk_t *diskPtr; - RF_RowCol_t r, c; - RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; - int bs, ret; - int numFailuresThisRow; - int force; - RF_AutoConfig_t *ac; - int parity_good; - int mod_counter; - int mod_counter_found; - - rf_printf(0, "Starting autoconfiguration of RAID set...\n"); - force = cfgPtr->force; - - ret = rf_AllocDiskStructures(raidPtr, cfgPtr); - if (ret) - goto fail; - - disks = raidPtr->Disks; - - /* assume the parity will be fine.. */ - parity_good = RF_RAID_CLEAN; - - /* Check for mod_counters that are too low */ - mod_counter_found = 0; - mod_counter = 0; - ac = auto_config; - while(ac!=NULL) { - if (mod_counter_found==0) { - mod_counter = ac->clabel->mod_counter; - mod_counter_found = 1; - } else { - if (ac->clabel->mod_counter > mod_counter) { - mod_counter = ac->clabel->mod_counter; - } - } - ac->flag = 0; /* clear the general purpose flag */ - ac = ac->next; - } - - bs = 0; - for (r = 0; r < raidPtr->numRow; r++) { - numFailuresThisRow = 0; - for (c = 0; c < raidPtr->numCol; c++) { - diskPtr = &disks[r][c]; - - /* find this row/col in the autoconfig */ - rf_printf(1, "Looking for %d,%d in autoconfig\n",r,c); - ac = auto_config; - while(ac!=NULL) { - if (ac->clabel==NULL) { - /* big-time bad news. */ - goto fail; - } - if ((ac->clabel->row == r) && - (ac->clabel->column == c) && - (ac->clabel->mod_counter == mod_counter)) { - /* it's this one... */ - /* flag it as 'used', so we don't - free it later. */ - ac->flag = 1; - rf_printf(1, "Found: %s at %d,%d\n", - ac->devname, r, c); - break; - } - ac=ac->next; - } - - if (ac==NULL) { - /* we didn't find an exact match with a - correct mod_counter above... can we - find one with an incorrect mod_counter - to use instead? (this one, if we find - it, will be marked as failed once the - set configures) - */ - - ac = auto_config; - while(ac!=NULL) { - if (ac->clabel==NULL) { - /* big-time bad news. */ - goto fail; - } - if ((ac->clabel->row == r) && - (ac->clabel->column == c)) { - /* it's this one... - flag it as 'used', so we - don't free it later. */ - ac->flag = 1; - rf_printf(1, "Found(low mod_counter): %s at %d,%d\n", - ac->devname,r,c); - - break; - } - ac=ac->next; - } - } - - - - if (ac!=NULL) { - /* Found it. Configure it.. */ - diskPtr->blockSize = ac->clabel->blockSize; - diskPtr->numBlocks = ac->clabel->numBlocks; - /* Note: rf_protectedSectors is already - factored into numBlocks here */ - raidPtr->raid_cinfo[r][c].ci_vp = ac->vp; - raidPtr->raid_cinfo[r][c].ci_dev = ac->dev; - - memcpy(&raidPtr->raid_cinfo[r][c].ci_label, - ac->clabel, sizeof(*ac->clabel)); - sprintf(diskPtr->devname, "/dev/%s", - ac->devname); - - /* note the fact that this component was - autoconfigured. You'll need this info - later. Trust me :) */ - diskPtr->auto_configured = 1; - diskPtr->dev = ac->dev; - - /* - * we allow the user to specify that - * only a fraction of the disks should - * be used this is just for debug: it - * speeds up the parity scan - */ - - diskPtr->numBlocks = diskPtr->numBlocks * - rf_sizePercentage / 100; - - /* XXX these will get set multiple times, - but since we're autoconfiguring, they'd - better be always the same each time! - If not, this is the least of your worries */ - - bs = diskPtr->blockSize; - min_numblks = diskPtr->numBlocks; - - /* this gets done multiple times, but that's - fine -- the serial number will be the same - for all components, guaranteed */ - raidPtr->serial_number = - ac->clabel->serial_number; - /* check the last time the label - was modified */ - if (ac->clabel->mod_counter != - mod_counter) { - /* Even though we've filled in all - of the above, we don't trust - this component since it's - modification counter is not - in sync with the rest, and we really - consider it to be failed. */ - disks[r][c].status = rf_ds_failed; - numFailuresThisRow++; - } else { - if (ac->clabel->clean != - RF_RAID_CLEAN) { - parity_good = RF_RAID_DIRTY; - } - } - } else { - /* Didn't find it at all!! - Component must really be dead */ - disks[r][c].status = rf_ds_failed; - sprintf(disks[r][c].devname,"component%d", - r * raidPtr->numCol + c); - numFailuresThisRow++; - } - } - /* XXX fix for n-fault tolerant */ - /* XXX this should probably check to see how many failures - we can handle for this configuration! */ - if (numFailuresThisRow > 0) - raidPtr->status[r] = rf_rs_degraded; - } - - /* close the device for the ones that didn't get used */ - - ac = auto_config; - while(ac!=NULL) { - if (ac->flag == 0) { -#if defined(__NetBSD__) - vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); -#elif defined(__FreeBSD__) - vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY, - raidPtr->engine_thread); -#endif - VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED, 0); - vput(ac->vp); - ac->vp = NULL; - rf_printf(1, "Released %s from auto-config set.\n", - ac->devname); - } - ac = ac->next; - } - - raidPtr->mod_counter = mod_counter; - - /* note the state of the parity, if any */ - raidPtr->parity_good = parity_good; - raidPtr->sectorsPerDisk = min_numblks; - raidPtr->logBytesPerSector = ffs(bs) - 1; - raidPtr->bytesPerSector = bs; - raidPtr->sectorMask = bs - 1; - return (0); - -fail: - - rf_UnconfigureVnodes( raidPtr ); - - return (ret); - -} - -/* configure a single disk in the array */ -int -rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col) - RF_Raid_t *raidPtr; - char *buf; - RF_RaidDisk_t *diskPtr; - RF_RowCol_t row; - RF_RowCol_t col; -{ - char *p; - int retcode; - - int error; - - retcode = 0; - p = rf_find_non_white(buf); - if (p[strlen(p) - 1] == '\n') { - /* strip off the newline */ - p[strlen(p) - 1] = '\0'; - } - (void) strcpy(diskPtr->devname, p); - - /* Let's start by claiming the component is fine and well... */ - diskPtr->status = rf_ds_optimal; - - raidPtr->raid_cinfo[row][col].ci_vp = NULL; - raidPtr->raid_cinfo[row][col].ci_dev = NULL; - - error = raid_getcomponentsize(raidPtr, row, col); - if (error) { - printf("raidlookup on device: %s failed!\n", diskPtr->devname); - if (error == ENXIO) { - /* the component isn't there... must be dead :-( */ - diskPtr->status = rf_ds_failed; - return (error); - } - } - return (0); -} - -static void -rf_print_label_status( raidPtr, row, column, dev_name, ci_label ) - RF_Raid_t *raidPtr; - int row; - int column; - char *dev_name; - RF_ComponentLabel_t *ci_label; -{ - - printf("raid%d: Component %s being configured at row: %d col: %d\n", - raidPtr->raidid, dev_name, row, column ); - printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", - ci_label->row, ci_label->column, - ci_label->num_rows, ci_label->num_columns); - printf(" Version: %d Serial Number: %d Mod Counter: %d\n", - ci_label->version, ci_label->serial_number, - ci_label->mod_counter); - printf(" Clean: %s Status: %d\n", - ci_label->clean ? "Yes" : "No", ci_label->status ); -} - -static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label, - serial_number, mod_counter ) - RF_Raid_t *raidPtr; - int row; - int column; - char *dev_name; - RF_ComponentLabel_t *ci_label; - int serial_number; - int mod_counter; -{ - int fatal_error = 0; - - if (serial_number != ci_label->serial_number) { - printf("%s has a different serial number: %d %d\n", - dev_name, serial_number, ci_label->serial_number); - fatal_error = 1; - } - if (mod_counter != ci_label->mod_counter) { - printf("%s has a different modfication count: %d %d\n", - dev_name, mod_counter, ci_label->mod_counter); - } - - if (row != ci_label->row) { - printf("Row out of alignment for: %s\n", dev_name); - fatal_error = 1; - } - if (column != ci_label->column) { - printf("Column out of alignment for: %s\n", dev_name); - fatal_error = 1; - } - if (raidPtr->numRow != ci_label->num_rows) { - printf("Number of rows do not match for: %s\n", dev_name); - fatal_error = 1; - } - if (raidPtr->numCol != ci_label->num_columns) { - printf("Number of columns do not match for: %s\n", dev_name); - fatal_error = 1; - } - if (ci_label->clean == 0) { - /* it's not clean, but that's not fatal */ - printf("%s is not clean!\n", dev_name); - } - return(fatal_error); -} - - -/* - - rf_CheckLabels() - check all the component labels for consistency. - Return an error if there is anything major amiss. - - */ - -int -rf_CheckLabels( raidPtr, cfgPtr ) - RF_Raid_t *raidPtr; - RF_Config_t *cfgPtr; -{ - int r,c; - char *dev_name; - RF_ComponentLabel_t *ci_label; - int serial_number = 0; - int mod_number = 0; - int fatal_error = 0; - int mod_values[4]; - int mod_count[4]; - int ser_values[4]; - int ser_count[4]; - int num_ser; - int num_mod; - int i; - int found; - int hosed_row; - int hosed_column; - int too_fatal; - int parity_good; - int force; - - hosed_row = -1; - hosed_column = -1; - too_fatal = 0; - force = cfgPtr->force; - - /* - We're going to try to be a little intelligent here. If one - component's label is bogus, and we can identify that it's the - *only* one that's gone, we'll mark it as "failed" and allow - the configuration to proceed. This will be the *only* case - that we'll proceed if there would be (otherwise) fatal errors. - - Basically we simply keep a count of how many components had - what serial number. If all but one agree, we simply mark - the disagreeing component as being failed, and allow - things to come up "normally". - - We do this first for serial numbers, and then for "mod_counter". - - */ - - num_ser = 0; - num_mod = 0; - for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) { - for (c = 0; c < raidPtr->numCol; c++) { - ci_label = &raidPtr->raid_cinfo[r][c].ci_label; - found=0; - for(i=0;i<num_ser;i++) { - if (ser_values[i] == ci_label->serial_number) { - ser_count[i]++; - found=1; - break; - } - } - if (!found) { - ser_values[num_ser] = ci_label->serial_number; - ser_count[num_ser] = 1; - num_ser++; - if (num_ser>2) { - fatal_error = 1; - break; - } - } - found=0; - for(i=0;i<num_mod;i++) { - if (mod_values[i] == ci_label->mod_counter) { - mod_count[i]++; - found=1; - break; - } - } - if (!found) { - mod_values[num_mod] = ci_label->mod_counter; - mod_count[num_mod] = 1; - num_mod++; - if (num_mod>2) { - fatal_error = 1; - break; - } - } - } - } - rf_printf(1, "raid%d: Summary of serial numbers:\n", raidPtr->raidid); - for(i=0;i<num_ser;i++) { - rf_printf(1, "%d %d\n", ser_values[i], ser_count[i]); - } - rf_printf(1, "raid%d: Summary of mod counters:\n", raidPtr->raidid); - for(i=0;i<num_mod;i++) { - rf_printf(1, "%d %d\n", mod_values[i], mod_count[i]); - } - serial_number = ser_values[0]; - if (num_ser == 2) { - if ((ser_count[0] == 1) || (ser_count[1] == 1)) { - /* Locate the maverick component */ - if (ser_count[1] > ser_count[0]) { - serial_number = ser_values[1]; - } - for (r = 0; r < raidPtr->numRow; r++) { - for (c = 0; c < raidPtr->numCol; c++) { - ci_label = &raidPtr->raid_cinfo[r][c].ci_label; - if (serial_number != - ci_label->serial_number) { - hosed_row = r; - hosed_column = c; - break; - } - } - } - printf("Hosed component: %s\n", - &cfgPtr->devnames[hosed_row][hosed_column][0]); - if (!force) { - /* we'll fail this component, as if there are - other major errors, we arn't forcing things - and we'll abort the config anyways */ - raidPtr->Disks[hosed_row][hosed_column].status - = rf_ds_failed; - raidPtr->numFailures++; - raidPtr->status[hosed_row] = rf_rs_degraded; - } - } else { - too_fatal = 1; - } - if (cfgPtr->parityConfig == '0') { - /* We've identified two different serial numbers. - RAID 0 can't cope with that, so we'll punt */ - too_fatal = 1; - } - - } - - /* record the serial number for later. If we bail later, setting - this doesn't matter, otherwise we've got the best guess at the - correct serial number */ - raidPtr->serial_number = serial_number; - - mod_number = mod_values[0]; - if (num_mod == 2) { - if ((mod_count[0] == 1) || (mod_count[1] == 1)) { - /* Locate the maverick component */ - if (mod_count[1] > mod_count[0]) { - mod_number = mod_values[1]; - } else if (mod_count[1] < mod_count[0]) { - mod_number = mod_values[0]; - } else { - /* counts of different modification values - are the same. Assume greater value is - the correct one, all other things - considered */ - if (mod_values[0] > mod_values[1]) { - mod_number = mod_values[0]; - } else { - mod_number = mod_values[1]; - } - - } - for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) { - for (c = 0; c < raidPtr->numCol; c++) { - ci_label = &raidPtr->raid_cinfo[r][c].ci_label; - if (mod_number != - ci_label->mod_counter) { - if ( ( hosed_row == r ) && - ( hosed_column == c )) { - /* same one. Can - deal with it. */ - } else { - hosed_row = r; - hosed_column = c; - if (num_ser != 1) { - too_fatal = 1; - break; - } - } - } - } - } - printf("Hosed component: %s\n", - &cfgPtr->devnames[hosed_row][hosed_column][0]); - if (!force) { - /* we'll fail this component, as if there are - other major errors, we arn't forcing things - and we'll abort the config anyways */ - if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) { - raidPtr->Disks[hosed_row][hosed_column].status - = rf_ds_failed; - raidPtr->numFailures++; - raidPtr->status[hosed_row] = rf_rs_degraded; - } - } - } else { - too_fatal = 1; - } - if (cfgPtr->parityConfig == '0') { - /* We've identified two different mod counters. - RAID 0 can't cope with that, so we'll punt */ - too_fatal = 1; - } - } - - raidPtr->mod_counter = mod_number; - - if (too_fatal) { - /* we've had both a serial number mismatch, and a mod_counter - mismatch -- and they involved two different components!! - Bail -- make things fail so that the user must force - the issue... */ - hosed_row = -1; - hosed_column = -1; - } - - if (num_ser > 2) { - printf("raid%d: Too many different serial numbers!\n", - raidPtr->raidid); - } - - if (num_mod > 2) { - printf("raid%d: Too many different mod counters!\n", - raidPtr->raidid); - } - - /* we start by assuming the parity will be good, and flee from - that notion at the slightest sign of trouble */ - - parity_good = RF_RAID_CLEAN; - for (r = 0; r < raidPtr->numRow; r++) { - for (c = 0; c < raidPtr->numCol; c++) { - dev_name = &cfgPtr->devnames[r][c][0]; - ci_label = &raidPtr->raid_cinfo[r][c].ci_label; - - if ((r == hosed_row) && (c == hosed_column)) { - printf("raid%d: Ignoring %s\n", - raidPtr->raidid, dev_name); - } else { - rf_print_label_status( raidPtr, r, c, - dev_name, ci_label ); - if (rf_check_label_vitals( raidPtr, r, c, - dev_name, ci_label, - serial_number, - mod_number )) { - fatal_error = 1; - } - if (ci_label->clean != RF_RAID_CLEAN) { - parity_good = RF_RAID_DIRTY; - } - } - } - } - if (fatal_error) { - parity_good = RF_RAID_DIRTY; - } - - /* we note the state of the parity */ - raidPtr->parity_good = parity_good; - - return(fatal_error); -} - -int -rf_add_hot_spare(raidPtr, sparePtr) - RF_Raid_t *raidPtr; - RF_SingleComponent_t *sparePtr; -{ - RF_RaidDisk_t *disks; - RF_DiskQueue_t *spareQueues; - int ret; - unsigned int bs; - int spare_number; - -#if 0 - printf("Just in rf_add_hot_spare: %d\n",raidPtr->numSpare); - printf("Num col: %d\n",raidPtr->numCol); -#endif - if (raidPtr->numSpare >= RF_MAXSPARE) { - RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare); - return(EINVAL); - } - - RF_LOCK_MUTEX(raidPtr->mutex); - - /* the beginning of the spares... */ - disks = &raidPtr->Disks[0][raidPtr->numCol]; - - spare_number = raidPtr->numSpare; - - ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name, - &disks[spare_number], 0, - raidPtr->numCol + spare_number); - - if (ret) - goto fail; - if (disks[spare_number].status != rf_ds_optimal) { - RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", - sparePtr->component_name); - ret=EINVAL; - goto fail; - } else { - disks[spare_number].status = rf_ds_spare; - DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number, - disks[spare_number].devname, - (long int) disks[spare_number].numBlocks, - disks[spare_number].blockSize, - (long int) disks[spare_number].numBlocks * - disks[spare_number].blockSize / 1024 / 1024); - } - - - /* check sizes and block sizes on the spare disk */ - bs = 1 << raidPtr->logBytesPerSector; - if (disks[spare_number].blockSize != bs) { - RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs); - ret = EINVAL; - goto fail; - } - if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) { - RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n", - disks[spare_number].devname, - disks[spare_number].blockSize, - (long int) raidPtr->sectorsPerDisk); - ret = EINVAL; - goto fail; - } else { - if (disks[spare_number].numBlocks > - raidPtr->sectorsPerDisk) { - RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname, - (long int) raidPtr->sectorsPerDisk); - - disks[spare_number].numBlocks = raidPtr->sectorsPerDisk; - } - } - - spareQueues = &raidPtr->Queues[0][raidPtr->numCol]; - ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number], - 0, raidPtr->numCol + spare_number, - raidPtr->qType, - raidPtr->sectorsPerDisk, - raidPtr->Disks[0][raidPtr->numCol + - spare_number].dev, - raidPtr->maxOutstanding, - &raidPtr->shutdownList, - raidPtr->cleanupList); - - - raidPtr->numSpare++; - RF_UNLOCK_MUTEX(raidPtr->mutex); - return (0); - -fail: - RF_UNLOCK_MUTEX(raidPtr->mutex); - return(ret); -} - -int -rf_remove_hot_spare(raidPtr,sparePtr) - RF_Raid_t *raidPtr; - RF_SingleComponent_t *sparePtr; -{ - int spare_number; - - - if (raidPtr->numSpare==0) { - printf("No spares to remove!\n"); - return(EINVAL); - } - - spare_number = sparePtr->column; - - return(EINVAL); /* XXX not implemented yet */ -#if 0 - if (spare_number < 0 || spare_number > raidPtr->numSpare) { - return(EINVAL); - } - - /* verify that this spare isn't in use... */ - - - - - /* it's gone.. */ - - raidPtr->numSpare--; - - return(0); -#endif -} - - -int -rf_delete_component(raidPtr,component) - RF_Raid_t *raidPtr; - RF_SingleComponent_t *component; -{ - RF_RaidDisk_t *disks; - - if ((component->row < 0) || - (component->row >= raidPtr->numRow) || - (component->column < 0) || - (component->column >= raidPtr->numCol)) { - return(EINVAL); - } - - disks = &raidPtr->Disks[component->row][component->column]; - - /* 1. This component must be marked as 'failed' */ - - return(EINVAL); /* Not implemented yet. */ -} - -int -rf_incorporate_hot_spare(raidPtr,component) - RF_Raid_t *raidPtr; - RF_SingleComponent_t *component; -{ - - /* Issues here include how to 'move' this in if there is IO - taking place (e.g. component queues and such) */ - - return(EINVAL); /* Not implemented yet. */ -} diff --git a/sys/dev/raidframe/rf_disks.h b/sys/dev/raidframe/rf_disks.h deleted file mode 100644 index b57c4f8..0000000 --- a/sys/dev/raidframe/rf_disks.h +++ /dev/null @@ -1,108 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_disks.h,v 1.8 2000/03/27 03:25:17 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * rf_disks.h -- header file for code related to physical disks - */ - -#ifndef _RF__RF_DISKS_H_ -#define _RF__RF_DISKS_H_ - -#include <sys/types.h> - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_bsd.h> - -/* - * A physical disk can be in one of several states: - * IF YOU ADD A STATE, CHECK TO SEE IF YOU NEED TO MODIFY RF_DEAD_DISK() BELOW. - */ -enum RF_DiskStatus_e { - rf_ds_optimal, /* no problems */ - rf_ds_failed, /* reconstruction ongoing */ - rf_ds_reconstructing, /* reconstruction complete to spare, dead disk - * not yet replaced */ - rf_ds_dist_spared, /* reconstruction complete to distributed - * spare space, dead disk not yet replaced */ - rf_ds_spared, /* reconstruction complete to distributed - * spare space, dead disk not yet replaced */ - rf_ds_spare, /* an available spare disk */ - rf_ds_used_spare /* a spare which has been used, and hence is - * not available */ -}; -typedef enum RF_DiskStatus_e RF_DiskStatus_t; - -struct RF_RaidDisk_s { - char devname[56]; /* name of device file */ - RF_DiskStatus_t status; /* whether it is up or down */ - RF_RowCol_t spareRow; /* if in status "spared", this identifies the - * spare disk */ - RF_RowCol_t spareCol; /* if in status "spared", this identifies the - * spare disk */ - RF_SectorCount_t numBlocks; /* number of blocks, obtained via READ - * CAPACITY */ - int blockSize; - RF_SectorCount_t partitionSize; /* The *actual* and *full* size of - the partition, from the disklabel */ - int auto_configured;/* 1 if this component was autoconfigured. - 0 otherwise. */ - dev_t dev; -}; -/* - * An RF_DiskOp_t ptr is really a pointer to a UAGT_CCB, but I want - * to isolate the cam layer from all other layers, so I typecast to/from - * RF_DiskOp_t * (i.e. void *) at the interfaces. - */ -typedef void RF_DiskOp_t; - -/* if a disk is in any of these states, it is inaccessible */ -#define RF_DEAD_DISK(_dstat_) (((_dstat_) == rf_ds_spared) || \ - ((_dstat_) == rf_ds_reconstructing) || ((_dstat_) == rf_ds_failed) || \ - ((_dstat_) == rf_ds_dist_spared)) - -#ifdef _KERNEL -#include <dev/raidframe/rf_bsd.h> - -int rf_ConfigureDisks(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int rf_ConfigureSpareDisks(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int rf_ConfigureDisk(RF_Raid_t * raidPtr, char *buf, RF_RaidDisk_t * diskPtr, - RF_RowCol_t row, RF_RowCol_t col); -int rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, - RF_AutoConfig_t *auto_config); -int rf_CheckLabels( RF_Raid_t *, RF_Config_t *); -int rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr); -int rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr); -int rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component); -int rf_incorporate_hot_spare(RF_Raid_t *raidPtr, - RF_SingleComponent_t *component); -#endif /* _KERNEL */ -#endif /* !_RF__RF_DISKS_H_ */ diff --git a/sys/dev/raidframe/rf_driver.c b/sys/dev/raidframe/rf_driver.c deleted file mode 100644 index 9534132..0000000 --- a/sys/dev/raidframe/rf_driver.c +++ /dev/null @@ -1,1050 +0,0 @@ -/* $NetBSD: rf_driver.c,v 1.39 2000/12/15 02:12:58 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/*- - * Copyright (c) 1999 The NetBSD Foundation, Inc. - * All rights reserved. - * - * This code is derived from software contributed to The NetBSD Foundation - * by Greg Oster - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. - * 4. Neither the name of The NetBSD Foundation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, Khalil Amiri, Claudson Bornstein, William V. Courtright II, - * Robby Findler, Daniel Stodolsky, Rachad Youssef, Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/****************************************************************************** - * - * rf_driver.c -- main setup, teardown, and access routines for the RAID driver - * - * all routines are prefixed with rf_ (raidframe), to avoid conficts. - * - ******************************************************************************/ - - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/systm.h> -#if defined(__NetBSD__) -#include <sys/ioctl.h> -#elif defined(__FreeBSD__) -#include <sys/ioccom.h> -#include <sys/filio.h> -#endif -#include <sys/fcntl.h> -#include <sys/vnode.h> - - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_threadstuff.h> - -#include <sys/errno.h> - -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_aselect.h> -#include <dev/raidframe/rf_diskqueue.h> -#include <dev/raidframe/rf_parityscan.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_utils.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_acctrace.h> -#include <dev/raidframe/rf_configure.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_desc.h> -#include <dev/raidframe/rf_states.h> -#include <dev/raidframe/rf_freelist.h> -#include <dev/raidframe/rf_decluster.h> -#include <dev/raidframe/rf_map.h> -#include <dev/raidframe/rf_revent.h> -#include <dev/raidframe/rf_callback.h> -#include <dev/raidframe/rf_engine.h> -#include <dev/raidframe/rf_memchunk.h> -#include <dev/raidframe/rf_mcpair.h> -#include <dev/raidframe/rf_nwayxor.h> -#include <dev/raidframe/rf_debugprint.h> -#include <dev/raidframe/rf_copyback.h> -#include <dev/raidframe/rf_driver.h> -#include <dev/raidframe/rf_options.h> -#include <dev/raidframe/rf_shutdown.h> -#include <dev/raidframe/rf_kintf.h> - -#if defined(__FreeBSD__) && __FreeBSD_version > 500005 -#include <sys/bio.h> -#endif - -#include <sys/buf.h> - -/* rad == RF_RaidAccessDesc_t */ -static RF_FreeList_t *rf_rad_freelist; -#define RF_MAX_FREE_RAD 128 -#define RF_RAD_INC 16 -#define RF_RAD_INITIAL 32 - -/* debug variables */ -char rf_panicbuf[2048]; /* a buffer to hold an error msg when we panic */ - -/* main configuration routines */ -static int raidframe_booted = 0; - -static void rf_ConfigureDebug(RF_Config_t * cfgPtr); -static void set_debug_option(char *name, long val); -static void rf_UnconfigureArray(void); -static int init_rad(RF_RaidAccessDesc_t *); -static void clean_rad(RF_RaidAccessDesc_t *); -static void rf_ShutdownRDFreeList(void *); -static int rf_ConfigureRDFreeList(RF_ShutdownList_t **); - -RF_DECLARE_MUTEX(rf_printf_mutex) /* debug only: avoids interleaved - * printfs by different stripes */ - -#define SIGNAL_QUIESCENT_COND(_raid_) wakeup(&((_raid_)->accesses_suspended)) -#define WAIT_FOR_QUIESCENCE(_raid_) \ - RF_LTSLEEP(&((_raid_)->accesses_suspended), PRIBIO, \ - "raidframe quiesce", 0, &((_raid_)->access_suspend_mutex)) - -#if defined(__FreeBSD__) && __FreeBSD_version > 500005 -#define IO_BUF_ERR(bp, err) { \ - bp->bio_flags |= BIO_ERROR; \ - bp->bio_resid = bp->bio_bcount; \ - bp->bio_error = err; \ - biodone(bp); \ -}; -#else -#define IO_BUF_ERR(bp, err) { \ - bp->b_flags |= B_ERROR; \ - bp->b_resid = bp->b_bcount; \ - bp->b_error = err; \ - biodone(bp); \ -} -#endif - -static int configureCount = 0; /* number of active configurations */ -static int configInProgress = 0; /* configuration is in progress and code - * needs to be serialized. */ -static int isconfigged = 0; /* is basic raidframe (non per-array) - * stuff configged */ -RF_DECLARE_STATIC_MUTEX(configureMutex) /* used to lock the configuration - * stuff */ -static RF_ShutdownList_t *globalShutdown; /* non array-specific - * stuff */ - -/* called at system boot time */ -int -rf_BootRaidframe() -{ - int rc; - - if (raidframe_booted) - return (EBUSY); - raidframe_booted = 1; - - rc = rf_mutex_init(&configureMutex, __FUNCTION__); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - RF_PANIC(); - } - configureCount = 0; - isconfigged = 0; - globalShutdown = NULL; - return (0); -} -/* - * This function is really just for debugging user-level stuff: it - * frees up all memory, other RAIDframe resources which might otherwise - * be kept around. This is used with systems like "sentinel" to detect - * memory leaks. - */ -int -rf_UnbootRaidframe() -{ - int rc; - - RF_LOCK_MUTEX(configureMutex); - if (configureCount) { - RF_UNLOCK_MUTEX(configureMutex); - return (EBUSY); - } - raidframe_booted = 0; - RF_UNLOCK_MUTEX(configureMutex); - rc = rf_mutex_destroy(&configureMutex); - if (rc) { - RF_ERRORMSG3("Unable to destroy mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - RF_PANIC(); - } - return (0); -} -/* - * Called whenever an array is shutdown - */ -static void -rf_UnconfigureArray() -{ - int rc; - - RF_LOCK_MUTEX(configureMutex); - if (--configureCount == 0) { /* if no active configurations, shut - * everything down */ - isconfigged = 0; - - rc = rf_ShutdownList(&globalShutdown); - if (rc) { - RF_ERRORMSG1("RAIDFRAME: unable to do global shutdown, rc=%d\n", rc); - } - - /* - * We must wait until now, because the AllocList module - * uses the DebugMem module. - */ - if (rf_memDebug) - rf_print_unfreed(); - } - RF_UNLOCK_MUTEX(configureMutex); -} - -/* - * Called to shut down an array. - */ -int -rf_Shutdown(raidPtr) - RF_Raid_t *raidPtr; -{ - - if (!raidPtr->valid) { - RF_ERRORMSG("Attempt to shut down unconfigured RAIDframe driver. Aborting shutdown\n"); - return (EINVAL); - } - /* - * wait for outstanding IOs to land - * As described in rf_raid.h, we use the rad_freelist lock - * to protect the per-array info about outstanding descs - * since we need to do freelist locking anyway, and this - * cuts down on the amount of serialization we've got going - * on. - */ - RF_FREELIST_DO_LOCK(rf_rad_freelist); - if (raidPtr->waitShutdown) { - RF_FREELIST_DO_UNLOCK(rf_rad_freelist); - return (EBUSY); - } - raidPtr->waitShutdown = 1; - while (raidPtr->nAccOutstanding) { - RF_WAIT_COND(raidPtr->outstandingCond, RF_FREELIST_MUTEX_OF(rf_rad_freelist)); - } - RF_FREELIST_DO_UNLOCK(rf_rad_freelist); - - /* Wait for any parity re-writes to stop... */ - while (raidPtr->parity_rewrite_in_progress) { - printf("Waiting for parity re-write to exit...\n"); - tsleep(&raidPtr->parity_rewrite_in_progress, PRIBIO, - "rfprwshutdown", 0); - } - - raidPtr->valid = 0; - - rf_update_component_labels(raidPtr, RF_FINAL_COMPONENT_UPDATE); - - rf_UnconfigureVnodes(raidPtr); - - rf_ShutdownList(&raidPtr->shutdownList); - - rf_UnconfigureArray(); - - return (0); -} - - -#define DO_INIT_CONFIGURE(f) { \ - rc = f (&globalShutdown); \ - if (rc) { \ - RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \ - rf_ShutdownList(&globalShutdown); \ - RF_LOCK_MUTEX(configureMutex); \ - configInProgress = 0; \ - configureCount--; \ - RF_UNLOCK_MUTEX(configureMutex); \ - return(rc); \ - } \ -} - -#define DO_RAID_FAIL() { \ - rf_UnconfigureVnodes(raidPtr); \ - rf_ShutdownList(&raidPtr->shutdownList); \ - rf_UnconfigureArray(); \ -} - -#define DO_RAID_INIT_CONFIGURE(f) { \ - rc = f (&raidPtr->shutdownList, raidPtr, cfgPtr); \ - if (rc) { \ - RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \ - DO_RAID_FAIL(); \ - return(rc); \ - } \ -} - -#define DO_RAID_MUTEX(_m_) { \ - rc = rf_create_managed_mutex(&raidPtr->shutdownList, (_m_)); \ - if (rc) { \ - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", \ - __FILE__, __LINE__, rc); \ - DO_RAID_FAIL(); \ - return(rc); \ - } \ -} - -#define DO_RAID_COND(_c_) { \ - rc = rf_create_managed_cond(&raidPtr->shutdownList, (_c_)); \ - if (rc) { \ - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", \ - __FILE__, __LINE__, rc); \ - DO_RAID_FAIL(); \ - return(rc); \ - } \ -} - -int -rf_Configure(raidPtr, cfgPtr, ac) - RF_Raid_t *raidPtr; - RF_Config_t *cfgPtr; - RF_AutoConfig_t *ac; -{ - RF_RowCol_t row, col; - int i, rc; - - /* XXX This check can probably be removed now, since - RAIDFRAME_CONFIGURE now checks to make sure that the - RAID set is not already valid - */ - if (raidPtr->valid) { - RF_ERRORMSG("RAIDframe configuration not shut down. Aborting configure.\n"); - return (EINVAL); - } - RF_LOCK_MUTEX(configureMutex); - if (configInProgress == 1) { - RF_UNLOCK_MUTEX(configureMutex); - return (EBUSY); - } - configureCount++; - if (isconfigged == 0) { - configInProgress = 1; - RF_UNLOCK_MUTEX(configureMutex); - rc = rf_create_managed_mutex(&globalShutdown, &rf_printf_mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownList(&globalShutdown); - return (rc); - } - /* initialize globals */ - printf("RAIDFRAME: protectedSectors is %ld\n", - rf_protectedSectors); - - rf_clear_debug_print_buffer(); - - DO_INIT_CONFIGURE(rf_ConfigureAllocList); - - /* - * Yes, this does make debugging general to the whole - * system instead of being array specific. Bummer, drag. - */ - rf_ConfigureDebug(cfgPtr); - DO_INIT_CONFIGURE(rf_ConfigureDebugMem); - DO_INIT_CONFIGURE(rf_ConfigureAccessTrace); - DO_INIT_CONFIGURE(rf_ConfigureMapModule); - DO_INIT_CONFIGURE(rf_ConfigureReconEvent); - DO_INIT_CONFIGURE(rf_ConfigureCallback); - DO_INIT_CONFIGURE(rf_ConfigureMemChunk); - DO_INIT_CONFIGURE(rf_ConfigureRDFreeList); - DO_INIT_CONFIGURE(rf_ConfigureNWayXor); - DO_INIT_CONFIGURE(rf_ConfigureStripeLockFreeList); - DO_INIT_CONFIGURE(rf_ConfigureMCPair); - DO_INIT_CONFIGURE(rf_ConfigureDAGs); - DO_INIT_CONFIGURE(rf_ConfigureDAGFuncs); - DO_INIT_CONFIGURE(rf_ConfigureDebugPrint); - DO_INIT_CONFIGURE(rf_ConfigureReconstruction); - DO_INIT_CONFIGURE(rf_ConfigureCopyback); - DO_INIT_CONFIGURE(rf_ConfigureDiskQueueSystem); - - RF_LOCK_MUTEX(configureMutex); - isconfigged = 1; - configInProgress = 0; - } - RF_UNLOCK_MUTEX(configureMutex); - - DO_RAID_MUTEX(&raidPtr->mutex); - /* set up the cleanup list. Do this after ConfigureDebug so that - * value of memDebug will be set */ - - rf_MakeAllocList(raidPtr->cleanupList); - if (raidPtr->cleanupList == NULL) { - DO_RAID_FAIL(); - return (ENOMEM); - } - rc = rf_ShutdownCreate(&raidPtr->shutdownList, - (void (*) (void *)) rf_FreeAllocList, - raidPtr->cleanupList); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - DO_RAID_FAIL(); - return (rc); - } - raidPtr->numRow = cfgPtr->numRow; - raidPtr->numCol = cfgPtr->numCol; - raidPtr->numSpare = cfgPtr->numSpare; - - /* XXX we don't even pretend to support more than one row in the - * kernel... */ - if (raidPtr->numRow != 1) { - RF_ERRORMSG("Only one row supported in kernel.\n"); - DO_RAID_FAIL(); - return (EINVAL); - } - RF_CallocAndAdd(raidPtr->status, raidPtr->numRow, sizeof(RF_RowStatus_t), - (RF_RowStatus_t *), raidPtr->cleanupList); - if (raidPtr->status == NULL) { - DO_RAID_FAIL(); - return (ENOMEM); - } - RF_CallocAndAdd(raidPtr->reconControl, raidPtr->numRow, - sizeof(RF_ReconCtrl_t *), (RF_ReconCtrl_t **), raidPtr->cleanupList); - if (raidPtr->reconControl == NULL) { - DO_RAID_FAIL(); - return (ENOMEM); - } - for (i = 0; i < raidPtr->numRow; i++) { - raidPtr->status[i] = rf_rs_optimal; - raidPtr->reconControl[i] = NULL; - } - - DO_RAID_INIT_CONFIGURE(rf_ConfigureEngine); - DO_RAID_INIT_CONFIGURE(rf_ConfigureStripeLocks); - - DO_RAID_COND(&raidPtr->outstandingCond); - - raidPtr->nAccOutstanding = 0; - raidPtr->waitShutdown = 0; - - DO_RAID_MUTEX(&raidPtr->access_suspend_mutex); - DO_RAID_COND(&raidPtr->quiescent_cond); - - DO_RAID_COND(&raidPtr->waitForReconCond); - - DO_RAID_MUTEX(&raidPtr->recon_done_proc_mutex); - - if (ac!=NULL) { - /* We have an AutoConfig structure.. Don't do the - normal disk configuration... call the auto config - stuff */ - rf_AutoConfigureDisks(raidPtr, cfgPtr, ac); - } else { - DO_RAID_INIT_CONFIGURE(rf_ConfigureDisks); - DO_RAID_INIT_CONFIGURE(rf_ConfigureSpareDisks); - } - /* do this after ConfigureDisks & ConfigureSpareDisks to be sure dev - * no. is set */ - DO_RAID_INIT_CONFIGURE(rf_ConfigureDiskQueues); - - DO_RAID_INIT_CONFIGURE(rf_ConfigureLayout); - - DO_RAID_INIT_CONFIGURE(rf_ConfigurePSStatus); - - for (row = 0; row < raidPtr->numRow; row++) { - for (col = 0; col < raidPtr->numCol; col++) { - /* - * XXX better distribution - */ - raidPtr->hist_diskreq[row][col] = 0; - } - } - - raidPtr->numNewFailures = 0; - raidPtr->copyback_in_progress = 0; - raidPtr->parity_rewrite_in_progress = 0; - raidPtr->recon_in_progress = 0; - raidPtr->maxOutstanding = cfgPtr->maxOutstandingDiskReqs; - - /* autoconfigure and root_partition will actually get filled in - after the config is done */ - raidPtr->autoconfigure = 0; - raidPtr->root_partition = 0; - raidPtr->last_unit = raidPtr->raidid; - raidPtr->config_order = 0; - - if (rf_keepAccTotals) { - raidPtr->keep_acc_totals = 1; - } - rf_StartUserStats(raidPtr); - - raidPtr->valid = 1; - return (0); -} - -static int -init_rad(desc) - RF_RaidAccessDesc_t *desc; -{ - int rc; - - rc = rf_mutex_init(&desc->mutex, __FUNCTION__); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return (rc); - } - rc = rf_cond_init(&desc->cond); - if (rc) { - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_mutex_destroy(&desc->mutex); - return (rc); - } - return (0); -} - -static void -clean_rad(desc) - RF_RaidAccessDesc_t *desc; -{ - rf_mutex_destroy(&desc->mutex); - rf_cond_destroy(&desc->cond); -} - -static void -rf_ShutdownRDFreeList(ignored) - void *ignored; -{ - RF_FREELIST_DESTROY_CLEAN(rf_rad_freelist, next, (RF_RaidAccessDesc_t *), clean_rad); -} - -static int -rf_ConfigureRDFreeList(listp) - RF_ShutdownList_t **listp; -{ - int rc; - - RF_FREELIST_CREATE(rf_rad_freelist, RF_MAX_FREE_RAD, - RF_RAD_INC, sizeof(RF_RaidAccessDesc_t)); - if (rf_rad_freelist == NULL) { - return (ENOMEM); - } - rc = rf_ShutdownCreate(listp, rf_ShutdownRDFreeList, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownRDFreeList(NULL); - return (rc); - } - RF_FREELIST_PRIME_INIT(rf_rad_freelist, RF_RAD_INITIAL, next, - (RF_RaidAccessDesc_t *), init_rad); - return (0); -} - -RF_RaidAccessDesc_t * -rf_AllocRaidAccDesc( - RF_Raid_t * raidPtr, - RF_IoType_t type, - RF_RaidAddr_t raidAddress, - RF_SectorCount_t numBlocks, - caddr_t bufPtr, - void *bp, - RF_DagHeader_t ** paramDAG, - RF_AccessStripeMapHeader_t ** paramASM, - RF_RaidAccessFlags_t flags, - void (*cbF) (RF_Buf_t), - void *cbA, - RF_AccessState_t * states) -{ - RF_RaidAccessDesc_t *desc; - - RF_FREELIST_GET_INIT_NOUNLOCK(rf_rad_freelist, desc, next, (RF_RaidAccessDesc_t *), init_rad); - if (raidPtr->waitShutdown) { - /* - * Actually, we're shutting the array down. Free the desc - * and return NULL. - */ - RF_FREELIST_DO_UNLOCK(rf_rad_freelist); - RF_FREELIST_FREE_CLEAN(rf_rad_freelist, desc, next, clean_rad); - return (NULL); - } - raidPtr->nAccOutstanding++; - RF_FREELIST_DO_UNLOCK(rf_rad_freelist); - - desc->raidPtr = (void *) raidPtr; - desc->type = type; - desc->raidAddress = raidAddress; - desc->numBlocks = numBlocks; - desc->bufPtr = bufPtr; - desc->bp = bp; - desc->paramDAG = paramDAG; - desc->paramASM = paramASM; - desc->flags = flags; - desc->states = states; - desc->state = 0; - - desc->status = 0; - bzero((char *) &desc->tracerec, sizeof(RF_AccTraceEntry_t)); - desc->callbackFunc = (void (*) (RF_CBParam_t)) cbF; /* XXX */ - desc->callbackArg = cbA; - desc->next = NULL; - desc->head = desc; - desc->numPending = 0; - desc->cleanupList = NULL; - rf_MakeAllocList(desc->cleanupList); - return (desc); -} - -void -rf_FreeRaidAccDesc(RF_RaidAccessDesc_t * desc) -{ - RF_Raid_t *raidPtr = desc->raidPtr; - - RF_ASSERT(desc); - - rf_FreeAllocList(desc->cleanupList); - RF_FREELIST_FREE_CLEAN_NOUNLOCK(rf_rad_freelist, desc, next, clean_rad); - raidPtr->nAccOutstanding--; - if (raidPtr->waitShutdown) { - RF_SIGNAL_COND(raidPtr->outstandingCond); - } - RF_FREELIST_DO_UNLOCK(rf_rad_freelist); -} -/********************************************************************* - * Main routine for performing an access. - * Accesses are retried until a DAG can not be selected. This occurs - * when either the DAG library is incomplete or there are too many - * failures in a parity group. - ********************************************************************/ -int -rf_DoAccess( - RF_Raid_t * raidPtr, - RF_IoType_t type, - int async_flag, - RF_RaidAddr_t raidAddress, - RF_SectorCount_t numBlocks, - caddr_t bufPtr, - void *bp_in, - RF_DagHeader_t ** paramDAG, - RF_AccessStripeMapHeader_t ** paramASM, - RF_RaidAccessFlags_t flags, - RF_RaidAccessDesc_t ** paramDesc, - void (*cbF) (RF_Buf_t), - void *cbA) -/* -type should be read or write -async_flag should be RF_TRUE or RF_FALSE -bp_in is a buf pointer. void * to facilitate ignoring it outside the kernel -*/ -{ - RF_RaidAccessDesc_t *desc; - caddr_t lbufPtr = bufPtr; - RF_Buf_t bp = (RF_Buf_t) bp_in; - - raidAddress += rf_raidSectorOffset; - - if (!raidPtr->valid) { - RF_ERRORMSG("RAIDframe driver not successfully configured. Rejecting access.\n"); - IO_BUF_ERR(bp, EINVAL); - return (EINVAL); - } - - if (rf_accessDebug) { - - printf("logBytes is: %d %d %d\n", raidPtr->raidid, - raidPtr->logBytesPerSector, - (int) rf_RaidAddressToByte(raidPtr, numBlocks)); - printf("raid%d: %s raidAddr %d (stripeid %d-%d) numBlocks %d (%d bytes) buf 0x%lx\n", raidPtr->raidid, - (type == RF_IO_TYPE_READ) ? "READ" : "WRITE", (int) raidAddress, - (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress), - (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress + numBlocks - 1), - (int) numBlocks, - (int) rf_RaidAddressToByte(raidPtr, numBlocks), - (long) bufPtr); - } - if (raidAddress + numBlocks > raidPtr->totalSectors) { - - printf("DoAccess: raid addr %lu too large to access %lu sectors. Max legal addr is %lu\n", - (u_long) raidAddress, (u_long) numBlocks, (u_long) raidPtr->totalSectors); - - IO_BUF_ERR(bp, ENOSPC); - return (ENOSPC); - } - desc = rf_AllocRaidAccDesc(raidPtr, type, raidAddress, - numBlocks, lbufPtr, bp, paramDAG, paramASM, - flags, cbF, cbA, raidPtr->Layout.map->states); - - if (desc == NULL) { - return (ENOMEM); - } - RF_ETIMER_START(desc->tracerec.tot_timer); - - desc->async_flag = async_flag; - - rf_ContinueRaidAccess(desc); - - return (0); -} -/* force the array into reconfigured mode without doing reconstruction */ -int -rf_SetReconfiguredMode(raidPtr, row, col) - RF_Raid_t *raidPtr; - int row; - int col; -{ - if (!(raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - printf("Can't set reconfigured mode in dedicated-spare array\n"); - RF_PANIC(); - } - RF_LOCK_MUTEX(raidPtr->mutex); - raidPtr->numFailures++; - raidPtr->Disks[row][col].status = rf_ds_dist_spared; - raidPtr->status[row] = rf_rs_reconfigured; - rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE); - /* install spare table only if declustering + distributed sparing - * architecture. */ - if (raidPtr->Layout.map->flags & RF_BD_DECLUSTERED) - rf_InstallSpareTable(raidPtr, row, col); - RF_UNLOCK_MUTEX(raidPtr->mutex); - return (0); -} - -extern int fail_row, fail_col, fail_time; -extern int delayed_recon; - -int -rf_FailDisk( - RF_Raid_t * raidPtr, - int frow, - int fcol, - int initRecon) -{ - printf("raid%d: Failing disk r%d c%d\n", raidPtr->raidid, frow, fcol); - RF_LOCK_MUTEX(raidPtr->mutex); - raidPtr->numFailures++; - raidPtr->Disks[frow][fcol].status = rf_ds_failed; - raidPtr->status[frow] = rf_rs_degraded; - rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE); - RF_UNLOCK_MUTEX(raidPtr->mutex); - if (initRecon) - rf_ReconstructFailedDisk(raidPtr, frow, fcol); - return (0); -} -/* releases a thread that is waiting for the array to become quiesced. - * access_suspend_mutex should be locked upon calling this - */ -void -rf_SignalQuiescenceLock(raidPtr, reconDesc) - RF_Raid_t *raidPtr; - RF_RaidReconDesc_t *reconDesc; -{ - if (rf_quiesceDebug) { - printf("raid%d: Signalling quiescence lock\n", - raidPtr->raidid); - } - raidPtr->access_suspend_release = 1; - - if (raidPtr->waiting_for_quiescence) { - SIGNAL_QUIESCENT_COND(raidPtr); - } -} -/* suspends all new requests to the array. No effect on accesses that are in flight. */ -int -rf_SuspendNewRequestsAndWait(raidPtr) - RF_Raid_t *raidPtr; -{ - if (rf_quiesceDebug) - printf("Suspending new reqs\n"); - - RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); - raidPtr->accesses_suspended++; - raidPtr->waiting_for_quiescence = (raidPtr->accs_in_flight == 0) ? 0 : 1; - - if (raidPtr->waiting_for_quiescence) { - raidPtr->access_suspend_release = 0; - while (!raidPtr->access_suspend_release) { - printf("Suspending: Waiting for Quiescence\n"); - WAIT_FOR_QUIESCENCE(raidPtr); - raidPtr->waiting_for_quiescence = 0; - } - } - printf("Quiescence reached..\n"); - - RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); - return (raidPtr->waiting_for_quiescence); -} -/* wake up everyone waiting for quiescence to be released */ -void -rf_ResumeNewRequests(raidPtr) - RF_Raid_t *raidPtr; -{ - RF_CallbackDesc_t *t, *cb; - - if (rf_quiesceDebug) - printf("Resuming new reqs\n"); - - RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); - raidPtr->accesses_suspended--; - if (raidPtr->accesses_suspended == 0) - cb = raidPtr->quiesce_wait_list; - else - cb = NULL; - raidPtr->quiesce_wait_list = NULL; - RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); - - while (cb) { - t = cb; - cb = cb->next; - (t->callbackFunc) (t->callbackArg); - rf_FreeCallbackDesc(t); - } -} -/***************************************************************************************** - * - * debug routines - * - ****************************************************************************************/ - -static void -set_debug_option(name, val) - char *name; - long val; -{ - RF_DebugName_t *p; - - for (p = rf_debugNames; p->name; p++) { - if (!strcmp(p->name, name)) { - *(p->ptr) = val; - printf("[Set debug variable %s to %ld]\n", name, val); - return; - } - } - RF_ERRORMSG1("Unknown debug string \"%s\"\n", name); -} - - -/* would like to use sscanf here, but apparently not available in kernel */ -/*ARGSUSED*/ -static void -rf_ConfigureDebug(cfgPtr) - RF_Config_t *cfgPtr; -{ - char *val_p, *name_p, *white_p; - long val; - int i; - - rf_ResetDebugOptions(); - for (i = 0; cfgPtr->debugVars[i][0] && i < RF_MAXDBGV; i++) { - name_p = rf_find_non_white(&cfgPtr->debugVars[i][0]); - white_p = rf_find_white(name_p); /* skip to start of 2nd - * word */ - val_p = rf_find_non_white(white_p); - if (*val_p == '0' && *(val_p + 1) == 'x') - val = rf_htoi(val_p + 2); - else - val = rf_atoi(val_p); - *white_p = '\0'; - set_debug_option(name_p, val); - } -} -/* performance monitoring stuff */ - -#define TIMEVAL_TO_US(t) (((long) t.tv_sec) * 1000000L + (long) t.tv_usec) - -#if !defined(_KERNEL) && !defined(SIMULATE) - -/* - * Throughput stats currently only used in user-level RAIDframe - */ - -static int -rf_InitThroughputStats( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - int rc; - - /* these used by user-level raidframe only */ - rc = rf_create_managed_mutex(listp, &raidPtr->throughputstats.mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return (rc); - } - raidPtr->throughputstats.sum_io_us = 0; - raidPtr->throughputstats.num_ios = 0; - raidPtr->throughputstats.num_out_ios = 0; - return (0); -} - -void -rf_StartThroughputStats(RF_Raid_t * raidPtr) -{ - RF_LOCK_MUTEX(raidPtr->throughputstats.mutex); - raidPtr->throughputstats.num_ios++; - raidPtr->throughputstats.num_out_ios++; - if (raidPtr->throughputstats.num_out_ios == 1) - RF_GETTIME(raidPtr->throughputstats.start); - RF_UNLOCK_MUTEX(raidPtr->throughputstats.mutex); -} - -static void -rf_StopThroughputStats(RF_Raid_t * raidPtr) -{ - struct timeval diff; - - RF_LOCK_MUTEX(raidPtr->throughputstats.mutex); - raidPtr->throughputstats.num_out_ios--; - if (raidPtr->throughputstats.num_out_ios == 0) { - RF_GETTIME(raidPtr->throughputstats.stop); - RF_TIMEVAL_DIFF(&raidPtr->throughputstats.start, &raidPtr->throughputstats.stop, &diff); - raidPtr->throughputstats.sum_io_us += TIMEVAL_TO_US(diff); - } - RF_UNLOCK_MUTEX(raidPtr->throughputstats.mutex); -} - -static void -rf_PrintThroughputStats(RF_Raid_t * raidPtr) -{ - RF_ASSERT(raidPtr->throughputstats.num_out_ios == 0); - if (raidPtr->throughputstats.sum_io_us != 0) { - printf("[Througphut: %8.2f IOs/second]\n", raidPtr->throughputstats.num_ios - / (raidPtr->throughputstats.sum_io_us / 1000000.0)); - } -} -#endif /* !KERNEL && !SIMULATE */ - -void -rf_StartUserStats(RF_Raid_t * raidPtr) -{ - RF_GETTIME(raidPtr->userstats.start); - raidPtr->userstats.sum_io_us = 0; - raidPtr->userstats.num_ios = 0; - raidPtr->userstats.num_sect_moved = 0; -} - -void -rf_StopUserStats(RF_Raid_t * raidPtr) -{ - RF_GETTIME(raidPtr->userstats.stop); -} - -void -rf_UpdateUserStats(raidPtr, rt, numsect) - RF_Raid_t *raidPtr; - int rt; /* resp time in us */ - int numsect; /* number of sectors for this access */ -{ - raidPtr->userstats.sum_io_us += rt; - raidPtr->userstats.num_ios++; - raidPtr->userstats.num_sect_moved += numsect; -} - -void -rf_PrintUserStats(RF_Raid_t * raidPtr) -{ - long elapsed_us, mbs, mbs_frac; - struct timeval diff; - - RF_TIMEVAL_DIFF(&raidPtr->userstats.start, &raidPtr->userstats.stop, &diff); - elapsed_us = TIMEVAL_TO_US(diff); - - /* 2000 sectors per megabyte, 10000000 microseconds per second */ - if (elapsed_us) - mbs = (raidPtr->userstats.num_sect_moved / 2000) / (elapsed_us / 1000000); - else - mbs = 0; - - /* this computes only the first digit of the fractional mb/s moved */ - if (elapsed_us) { - mbs_frac = ((raidPtr->userstats.num_sect_moved / 200) / (elapsed_us / 1000000)) - - (mbs * 10); - } else { - mbs_frac = 0; - } - - printf("Number of I/Os: %ld\n", raidPtr->userstats.num_ios); - printf("Elapsed time (us): %ld\n", elapsed_us); - printf("User I/Os per second: %ld\n", RF_DB0_CHECK(raidPtr->userstats.num_ios, (elapsed_us / 1000000))); - printf("Average user response time: %ld us\n", RF_DB0_CHECK(raidPtr->userstats.sum_io_us, raidPtr->userstats.num_ios)); - printf("Total sectors moved: %ld\n", raidPtr->userstats.num_sect_moved); - printf("Average access size (sect): %ld\n", RF_DB0_CHECK(raidPtr->userstats.num_sect_moved, raidPtr->userstats.num_ios)); - printf("Achieved data rate: %ld.%ld MB/sec\n", mbs, mbs_frac); -} - - -void -rf_print_panic_message(line,file) - int line; - char *file; -{ - sprintf(rf_panicbuf,"raidframe error at line %d file %s", - line, file); -} - -void -rf_print_assert_panic_message(line,file,condition) - int line; - char *file; - char *condition; -{ - sprintf(rf_panicbuf, - "raidframe error at line %d file %s (failed asserting %s)\n", - line, file, condition); -} diff --git a/sys/dev/raidframe/rf_driver.h b/sys/dev/raidframe/rf_driver.h deleted file mode 100644 index 8b156c5..0000000 --- a/sys/dev/raidframe/rf_driver.h +++ /dev/null @@ -1,79 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_driver.h,v 1.4 2000/02/13 04:53:57 oster Exp $ */ -/* - * rf_driver.h - */ -/* - * Copyright (c) 1996 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_DRIVER_H_ -#define _RF__RF_DRIVER_H_ - -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_bsd.h> - -#if _KERNEL -RF_DECLARE_EXTERN_MUTEX(rf_printf_mutex) -int rf_BootRaidframe(void); -int rf_UnbootRaidframe(void); -int rf_Shutdown(RF_Raid_t * raidPtr); -int rf_Configure(RF_Raid_t * raidPtr, RF_Config_t * cfgPtr, - RF_AutoConfig_t *ac); -RF_RaidAccessDesc_t *rf_AllocRaidAccDesc(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_RaidAddr_t raidAddress, - RF_SectorCount_t numBlocks, - caddr_t bufPtr, - void *bp, RF_DagHeader_t ** paramDAG, - RF_AccessStripeMapHeader_t ** paramASM, - RF_RaidAccessFlags_t flags, - void (*cbF) (RF_Buf_t), - void *cbA, - RF_AccessState_t * states); -void rf_FreeRaidAccDesc(RF_RaidAccessDesc_t * desc); -int rf_DoAccess(RF_Raid_t * raidPtr, RF_IoType_t type, int async_flag, - RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks, - caddr_t bufPtr, void *bp_in, RF_DagHeader_t ** paramDAG, - RF_AccessStripeMapHeader_t ** paramASM, - RF_RaidAccessFlags_t flags, - RF_RaidAccessDesc_t ** paramDesc, - void (*cbF) (RF_Buf_t), void *cbA); -int rf_SetReconfiguredMode(RF_Raid_t * raidPtr, RF_RowCol_t row, - RF_RowCol_t col); -int rf_FailDisk(RF_Raid_t * raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol, - int initRecon); -void rf_SignalQuiescenceLock(RF_Raid_t * raidPtr, - RF_RaidReconDesc_t * reconDesc); -int rf_SuspendNewRequestsAndWait(RF_Raid_t * raidPtr); -void rf_ResumeNewRequests(RF_Raid_t * raidPtr); -void rf_StartThroughputStats(RF_Raid_t * raidPtr); -void rf_StartUserStats(RF_Raid_t * raidPtr); -void rf_StopUserStats(RF_Raid_t * raidPtr); -void rf_UpdateUserStats(RF_Raid_t * raidPtr, int rt, int numsect); -void rf_PrintUserStats(RF_Raid_t * raidPtr); -#endif /* _KERNEL */ -#endif /* !_RF__RF_DRIVER_H_ */ diff --git a/sys/dev/raidframe/rf_engine.c b/sys/dev/raidframe/rf_engine.c deleted file mode 100644 index d49ec20..0000000 --- a/sys/dev/raidframe/rf_engine.c +++ /dev/null @@ -1,812 +0,0 @@ -/* $NetBSD: rf_engine.c,v 1.10 2000/08/20 16:51:03 thorpej Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: William V. Courtright II, Mark Holland, Rachad Youssef - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/**************************************************************************** - * * - * engine.c -- code for DAG execution engine * - * * - * Modified to work as follows (holland): * - * A user-thread calls into DispatchDAG, which fires off the nodes that * - * are direct successors to the header node. DispatchDAG then returns, * - * and the rest of the I/O continues asynchronously. As each node * - * completes, the node execution function calls FinishNode(). FinishNode * - * scans the list of successors to the node and increments the antecedent * - * counts. Each node that becomes enabled is placed on a central node * - * queue. A dedicated dag-execution thread grabs nodes off of this * - * queue and fires them. * - * * - * NULL nodes are never fired. * - * * - * Terminator nodes are never fired, but rather cause the callback * - * associated with the DAG to be invoked. * - * * - * If a node fails, the dag either rolls forward to the completion or * - * rolls back, undoing previously-completed nodes and fails atomically. * - * The direction of recovery is determined by the location of the failed * - * node in the graph. If the failure occured before the commit node in * - * the graph, backward recovery is used. Otherwise, forward recovery is * - * used. * - * * - ****************************************************************************/ - -#include <dev/raidframe/rf_threadstuff.h> - -#include <sys/errno.h> - -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_engine.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_shutdown.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_kintf.h> - -static void DAGExecutionThread(RF_ThreadArg_t arg); - -#define DO_INIT(_l_,_r_) { \ - int _rc; \ - _rc = rf_create_managed_mutex(_l_,&(_r_)->node_queue_mutex); \ - if (_rc) { \ - return(_rc); \ - } \ - _rc = rf_create_managed_cond(_l_,&(_r_)->node_queue_cond); \ - if (_rc) { \ - return(_rc); \ - } \ -} - -/* synchronization primitives for this file. DO_WAIT should be enclosed in a while loop. */ - -/* - * XXX Is this spl-ing really necessary? - */ -#define DO_LOCK(_r_) \ -do { \ - ks = splbio(); \ - RF_LOCK_MUTEX((_r_)->node_queue_mutex); \ -} while (0) - -#define DO_UNLOCK(_r_) \ -do { \ - RF_UNLOCK_MUTEX((_r_)->node_queue_mutex); \ - splx(ks); \ -} while (0) - -#define DO_WAIT(_r_) \ - RF_WAIT_COND((_r_)->node_queue, (_r_)->node_queue_mutex) - -#define DO_SIGNAL(_r_) \ - RF_BROADCAST_COND((_r_)->node_queue) /* XXX RF_SIGNAL_COND? */ - -static void rf_ShutdownEngine(void *); - -static void -rf_ShutdownEngine(arg) - void *arg; -{ - RF_Raid_t *raidPtr; - - raidPtr = (RF_Raid_t *) arg; - raidPtr->shutdown_engine = 1; - DO_SIGNAL(raidPtr); -} - -int -rf_ConfigureEngine( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - int rc; - - DO_INIT(listp, raidPtr); - - raidPtr->node_queue = NULL; - raidPtr->dags_in_flight = 0; - - rc = rf_init_managed_threadgroup(listp, &raidPtr->engine_tg); - if (rc) - return (rc); - - /* we create the execution thread only once per system boot. no need - * to check return code b/c the kernel panics if it can't create the - * thread. */ - if (rf_engineDebug) { - printf("raid%d: Creating engine thread\n", raidPtr->raidid); - } - if (RF_CREATE_THREAD(raidPtr->engine_thread, DAGExecutionThread, raidPtr,"raid")) { - RF_ERRORMSG("RAIDFRAME: Unable to create engine thread\n"); - return (ENOMEM); - } - if (rf_engineDebug) { - printf("raid%d: Created engine thread\n", raidPtr->raidid); - } - RF_THREADGROUP_STARTED(&raidPtr->engine_tg); - /* XXX something is missing here... */ -#ifdef debug - printf("Skipping the WAIT_START!!\n"); -#endif -#if 1 - printf("Waiting for DAG engine to start\n"); - RF_THREADGROUP_WAIT_START(&raidPtr->engine_tg); -#endif - /* engine thread is now running and waiting for work */ - if (rf_engineDebug) { - printf("raid%d: Engine thread running and waiting for events\n", raidPtr->raidid); - } - rc = rf_ShutdownCreate(listp, rf_ShutdownEngine, raidPtr); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownEngine(NULL); - } - return (rc); -} - -static int -BranchDone(RF_DagNode_t * node) -{ - int i; - - /* return true if forward execution is completed for a node and it's - * succedents */ - switch (node->status) { - case rf_wait: - /* should never be called in this state */ - RF_PANIC(); - break; - case rf_fired: - /* node is currently executing, so we're not done */ - return (RF_FALSE); - case rf_good: - for (i = 0; i < node->numSuccedents; i++) /* for each succedent */ - if (!BranchDone(node->succedents[i])) /* recursively check - * branch */ - return RF_FALSE; - return RF_TRUE; /* node and all succedent branches aren't in - * fired state */ - break; - case rf_bad: - /* succedents can't fire */ - return (RF_TRUE); - case rf_recover: - /* should never be called in this state */ - RF_PANIC(); - break; - case rf_undone: - case rf_panic: - /* XXX need to fix this case */ - /* for now, assume that we're done */ - return (RF_TRUE); - break; - default: - /* illegal node status */ - RF_PANIC(); - break; - } -} - -static int -NodeReady(RF_DagNode_t * node) -{ - int ready; - - switch (node->dagHdr->status) { - case rf_enable: - case rf_rollForward: - if ((node->status == rf_wait) && (node->numAntecedents == node->numAntDone)) - ready = RF_TRUE; - else - ready = RF_FALSE; - break; - case rf_rollBackward: - RF_ASSERT(node->numSuccDone <= node->numSuccedents); - RF_ASSERT(node->numSuccFired <= node->numSuccedents); - RF_ASSERT(node->numSuccFired <= node->numSuccDone); - if ((node->status == rf_good) && (node->numSuccDone == node->numSuccedents)) - ready = RF_TRUE; - else - ready = RF_FALSE; - break; - default: - printf("Execution engine found illegal DAG status in NodeReady\n"); - RF_PANIC(); - break; - } - - return (ready); -} - - - -/* user context and dag-exec-thread context: - * Fire a node. The node's status field determines which function, do or undo, - * to be fired. - * This routine assumes that the node's status field has alread been set to - * "fired" or "recover" to indicate the direction of execution. - */ -static void -FireNode(RF_DagNode_t * node) -{ - switch (node->status) { - case rf_fired: - /* fire the do function of a node */ - if (rf_engineDebug) { - printf("raid%d: Firing node 0x%lx (%s)\n", - node->dagHdr->raidPtr->raidid, - (unsigned long) node, node->name); - } - if (node->flags & RF_DAGNODE_FLAG_YIELD) { -#if defined(__NetBSD__) || defined(__FreeBSD__) && defined(_KERNEL) - /* thread_block(); */ - /* printf("Need to block the thread here...\n"); */ - /* XXX thread_block is actually mentioned in - * /usr/include/vm/vm_extern.h */ -#else - thread_block(); -#endif - } - (*(node->doFunc)) (node); - break; - case rf_recover: - /* fire the undo function of a node */ - if (rf_engineDebug) { - printf("raid%d: Firing (undo) node 0x%lx (%s)\n", - node->dagHdr->raidPtr->raidid, - (unsigned long) node, node->name); - } - if (node->flags & RF_DAGNODE_FLAG_YIELD) -#if defined(__NetBSD__) || defined(__FreeBSD__) && defined(_KERNEL) - /* thread_block(); */ - /* printf("Need to block the thread here...\n"); */ - /* XXX thread_block is actually mentioned in - * /usr/include/vm/vm_extern.h */ -#else - thread_block(); -#endif - (*(node->undoFunc)) (node); - break; - default: - RF_PANIC(); - break; - } -} - - - -/* user context: - * Attempt to fire each node in a linear array. - * The entire list is fired atomically. - */ -static void -FireNodeArray( - int numNodes, - RF_DagNode_t ** nodeList) -{ - RF_DagStatus_t dstat; - RF_DagNode_t *node; - int i, j; - - /* first, mark all nodes which are ready to be fired */ - for (i = 0; i < numNodes; i++) { - node = nodeList[i]; - dstat = node->dagHdr->status; - RF_ASSERT((node->status == rf_wait) || (node->status == rf_good)); - if (NodeReady(node)) { - if ((dstat == rf_enable) || (dstat == rf_rollForward)) { - RF_ASSERT(node->status == rf_wait); - if (node->commitNode) - node->dagHdr->numCommits++; - node->status = rf_fired; - for (j = 0; j < node->numAntecedents; j++) - node->antecedents[j]->numSuccFired++; - } else { - RF_ASSERT(dstat == rf_rollBackward); - RF_ASSERT(node->status == rf_good); - RF_ASSERT(node->commitNode == RF_FALSE); /* only one commit node - * per graph */ - node->status = rf_recover; - } - } - } - /* now, fire the nodes */ - for (i = 0; i < numNodes; i++) { - if ((nodeList[i]->status == rf_fired) || (nodeList[i]->status == rf_recover)) - FireNode(nodeList[i]); - } -} - - -/* user context: - * Attempt to fire each node in a linked list. - * The entire list is fired atomically. - */ -static void -FireNodeList(RF_DagNode_t * nodeList) -{ - RF_DagNode_t *node, *next; - RF_DagStatus_t dstat; - int j; - - if (nodeList) { - /* first, mark all nodes which are ready to be fired */ - for (node = nodeList; node; node = next) { - next = node->next; - dstat = node->dagHdr->status; - RF_ASSERT((node->status == rf_wait) || (node->status == rf_good)); - if (NodeReady(node)) { - if ((dstat == rf_enable) || (dstat == rf_rollForward)) { - RF_ASSERT(node->status == rf_wait); - if (node->commitNode) - node->dagHdr->numCommits++; - node->status = rf_fired; - for (j = 0; j < node->numAntecedents; j++) - node->antecedents[j]->numSuccFired++; - } else { - RF_ASSERT(dstat == rf_rollBackward); - RF_ASSERT(node->status == rf_good); - RF_ASSERT(node->commitNode == RF_FALSE); /* only one commit node - * per graph */ - node->status = rf_recover; - } - } - } - /* now, fire the nodes */ - for (node = nodeList; node; node = next) { - next = node->next; - if ((node->status == rf_fired) || (node->status == rf_recover)) - FireNode(node); - } - } -} -/* interrupt context: - * for each succedent - * propagate required results from node to succedent - * increment succedent's numAntDone - * place newly-enable nodes on node queue for firing - * - * To save context switches, we don't place NIL nodes on the node queue, - * but rather just process them as if they had fired. Note that NIL nodes - * that are the direct successors of the header will actually get fired by - * DispatchDAG, which is fine because no context switches are involved. - * - * Important: when running at user level, this can be called by any - * disk thread, and so the increment and check of the antecedent count - * must be locked. I used the node queue mutex and locked down the - * entire function, but this is certainly overkill. - */ -static void -PropagateResults( - RF_DagNode_t * node, - int context) -{ - RF_DagNode_t *s, *a; - RF_Raid_t *raidPtr; - int i, ks; - RF_DagNode_t *finishlist = NULL; /* a list of NIL nodes to be - * finished */ - RF_DagNode_t *skiplist = NULL; /* list of nodes with failed truedata - * antecedents */ - RF_DagNode_t *firelist = NULL; /* a list of nodes to be fired */ - RF_DagNode_t *q = NULL, *qh = NULL, *next; - int j, skipNode; - - raidPtr = node->dagHdr->raidPtr; - - DO_LOCK(raidPtr); - - /* debug - validate fire counts */ - for (i = 0; i < node->numAntecedents; i++) { - a = *(node->antecedents + i); - RF_ASSERT(a->numSuccFired >= a->numSuccDone); - RF_ASSERT(a->numSuccFired <= a->numSuccedents); - a->numSuccDone++; - } - - switch (node->dagHdr->status) { - case rf_enable: - case rf_rollForward: - for (i = 0; i < node->numSuccedents; i++) { - s = *(node->succedents + i); - RF_ASSERT(s->status == rf_wait); - (s->numAntDone)++; - if (s->numAntDone == s->numAntecedents) { - /* look for NIL nodes */ - if (s->doFunc == rf_NullNodeFunc) { - /* don't fire NIL nodes, just process - * them */ - s->next = finishlist; - finishlist = s; - } else { - /* look to see if the node is to be - * skipped */ - skipNode = RF_FALSE; - for (j = 0; j < s->numAntecedents; j++) - if ((s->antType[j] == rf_trueData) && (s->antecedents[j]->status == rf_bad)) - skipNode = RF_TRUE; - if (skipNode) { - /* this node has one or more - * failed true data - * dependencies, so skip it */ - s->next = skiplist; - skiplist = s; - } else - /* add s to list of nodes (q) - * to execute */ - if (context != RF_INTR_CONTEXT) { - /* we only have to - * enqueue if we're at - * intr context */ - s->next = firelist; /* put node on a list to - * be fired after we - * unlock */ - firelist = s; - } else { /* enqueue the node for - * the dag exec thread - * to fire */ - RF_ASSERT(NodeReady(s)); - if (q) { - q->next = s; - q = s; - } else { - qh = q = s; - qh->next = NULL; - } - } - } - } - } - - if (q) { - /* xfer our local list of nodes to the node queue */ - q->next = raidPtr->node_queue; - raidPtr->node_queue = qh; - DO_SIGNAL(raidPtr); - } - DO_UNLOCK(raidPtr); - - for (; skiplist; skiplist = next) { - next = skiplist->next; - skiplist->status = rf_skipped; - for (i = 0; i < skiplist->numAntecedents; i++) { - skiplist->antecedents[i]->numSuccFired++; - } - if (skiplist->commitNode) { - skiplist->dagHdr->numCommits++; - } - rf_FinishNode(skiplist, context); - } - for (; finishlist; finishlist = next) { - /* NIL nodes: no need to fire them */ - next = finishlist->next; - finishlist->status = rf_good; - for (i = 0; i < finishlist->numAntecedents; i++) { - finishlist->antecedents[i]->numSuccFired++; - } - if (finishlist->commitNode) - finishlist->dagHdr->numCommits++; - /* - * Okay, here we're calling rf_FinishNode() on nodes that - * have the null function as their work proc. Such a node - * could be the terminal node in a DAG. If so, it will - * cause the DAG to complete, which will in turn free - * memory used by the DAG, which includes the node in - * question. Thus, we must avoid referencing the node - * at all after calling rf_FinishNode() on it. - */ - rf_FinishNode(finishlist, context); /* recursive call */ - } - /* fire all nodes in firelist */ - FireNodeList(firelist); - break; - - case rf_rollBackward: - for (i = 0; i < node->numAntecedents; i++) { - a = *(node->antecedents + i); - RF_ASSERT(a->status == rf_good); - RF_ASSERT(a->numSuccDone <= a->numSuccedents); - RF_ASSERT(a->numSuccDone <= a->numSuccFired); - - if (a->numSuccDone == a->numSuccFired) { - if (a->undoFunc == rf_NullNodeFunc) { - /* don't fire NIL nodes, just process - * them */ - a->next = finishlist; - finishlist = a; - } else { - if (context != RF_INTR_CONTEXT) { - /* we only have to enqueue if - * we're at intr context */ - a->next = firelist; /* put node on a list to - * be fired after we - * unlock */ - firelist = a; - } else { /* enqueue the node for - * the dag exec thread - * to fire */ - RF_ASSERT(NodeReady(a)); - if (q) { - q->next = a; - q = a; - } else { - qh = q = a; - qh->next = NULL; - } - } - } - } - } - if (q) { - /* xfer our local list of nodes to the node queue */ - q->next = raidPtr->node_queue; - raidPtr->node_queue = qh; - DO_SIGNAL(raidPtr); - } - DO_UNLOCK(raidPtr); - for (; finishlist; finishlist = next) { /* NIL nodes: no need to - * fire them */ - next = finishlist->next; - finishlist->status = rf_good; - /* - * Okay, here we're calling rf_FinishNode() on nodes that - * have the null function as their work proc. Such a node - * could be the first node in a DAG. If so, it will - * cause the DAG to complete, which will in turn free - * memory used by the DAG, which includes the node in - * question. Thus, we must avoid referencing the node - * at all after calling rf_FinishNode() on it. - */ - rf_FinishNode(finishlist, context); /* recursive call */ - } - /* fire all nodes in firelist */ - FireNodeList(firelist); - - break; - default: - printf("Engine found illegal DAG status in PropagateResults()\n"); - RF_PANIC(); - break; - } -} - - - -/* - * Process a fired node which has completed - */ -static void -ProcessNode( - RF_DagNode_t * node, - int context) -{ - RF_Raid_t *raidPtr; - - raidPtr = node->dagHdr->raidPtr; - - switch (node->status) { - case rf_good: - /* normal case, don't need to do anything */ - break; - case rf_bad: - if ((node->dagHdr->numCommits > 0) || (node->dagHdr->numCommitNodes == 0)) { - node->dagHdr->status = rf_rollForward; /* crossed commit - * barrier */ - if (rf_engineDebug || 1) { - printf("raid%d: node (%s) returned fail, rolling forward\n", raidPtr->raidid, node->name); - } - } else { - node->dagHdr->status = rf_rollBackward; /* never reached commit - * barrier */ - if (rf_engineDebug || 1) { - printf("raid%d: node (%s) returned fail, rolling backward\n", raidPtr->raidid, node->name); - } - } - break; - case rf_undone: - /* normal rollBackward case, don't need to do anything */ - break; - case rf_panic: - /* an undo node failed!!! */ - printf("UNDO of a node failed!!!/n"); - break; - default: - printf("node finished execution with an illegal status!!!\n"); - RF_PANIC(); - break; - } - - /* enqueue node's succedents (antecedents if rollBackward) for - * execution */ - PropagateResults(node, context); -} - - - -/* user context or dag-exec-thread context: - * This is the first step in post-processing a newly-completed node. - * This routine is called by each node execution function to mark the node - * as complete and fire off any successors that have been enabled. - */ -int -rf_FinishNode( - RF_DagNode_t * node, - int context) -{ - /* as far as I can tell, retcode is not used -wvcii */ - int retcode = RF_FALSE; - node->dagHdr->numNodesCompleted++; - ProcessNode(node, context); - - return (retcode); -} - - -/* user context: - * submit dag for execution, return non-zero if we have to wait for completion. - * if and only if we return non-zero, we'll cause cbFunc to get invoked with - * cbArg when the DAG has completed. - * - * for now we always return 1. If the DAG does not cause any I/O, then the callback - * may get invoked before DispatchDAG returns. There's code in state 5 of ContinueRaidAccess - * to handle this. - * - * All we do here is fire the direct successors of the header node. The - * DAG execution thread does the rest of the dag processing. - */ -int -rf_DispatchDAG( - RF_DagHeader_t * dag, - void (*cbFunc) (void *), - void *cbArg) -{ - RF_Raid_t *raidPtr; - - raidPtr = dag->raidPtr; - if (dag->tracerec) { - RF_ETIMER_START(dag->tracerec->timer); - } - if (rf_engineDebug || rf_validateDAGDebug) { - if (rf_ValidateDAG(dag)) - RF_PANIC(); - } - if (rf_engineDebug) { - printf("raid%d: Entering DispatchDAG\n", raidPtr->raidid); - } - raidPtr->dags_in_flight++; /* debug only: blow off proper - * locking */ - dag->cbFunc = cbFunc; - dag->cbArg = cbArg; - dag->numNodesCompleted = 0; - dag->status = rf_enable; - FireNodeArray(dag->numSuccedents, dag->succedents); - return (1); -} -/* dedicated kernel thread: - * the thread that handles all DAG node firing. - * To minimize locking and unlocking, we grab a copy of the entire node queue and then set the - * node queue to NULL before doing any firing of nodes. This way we only have to release the - * lock once. Of course, it's probably rare that there's more than one node in the queue at - * any one time, but it sometimes happens. - * - * In the kernel, this thread runs at spl0 and is not swappable. I copied these - * characteristics from the aio_completion_thread. - */ - -static void -DAGExecutionThread(RF_ThreadArg_t arg) -{ - RF_DagNode_t *nd, *local_nq, *term_nq, *fire_nq; - RF_Raid_t *raidPtr; - int ks; - - raidPtr = (RF_Raid_t *) arg; - - if (rf_engineDebug) { - printf("raid%d: Engine thread is running\n", raidPtr->raidid); - } - - mtx_lock(&Giant); - - RF_THREADGROUP_RUNNING(&raidPtr->engine_tg); - - DO_LOCK(raidPtr); - while (!raidPtr->shutdown_engine) { - - while (raidPtr->node_queue != NULL) { - local_nq = raidPtr->node_queue; - fire_nq = NULL; - term_nq = NULL; - raidPtr->node_queue = NULL; - DO_UNLOCK(raidPtr); - - /* first, strip out the terminal nodes */ - while (local_nq) { - nd = local_nq; - local_nq = local_nq->next; - switch (nd->dagHdr->status) { - case rf_enable: - case rf_rollForward: - if (nd->numSuccedents == 0) { - /* end of the dag, add to - * callback list */ - nd->next = term_nq; - term_nq = nd; - } else { - /* not the end, add to the - * fire queue */ - nd->next = fire_nq; - fire_nq = nd; - } - break; - case rf_rollBackward: - if (nd->numAntecedents == 0) { - /* end of the dag, add to the - * callback list */ - nd->next = term_nq; - term_nq = nd; - } else { - /* not the end, add to the - * fire queue */ - nd->next = fire_nq; - fire_nq = nd; - } - break; - default: - RF_PANIC(); - break; - } - } - - /* execute callback of dags which have reached the - * terminal node */ - while (term_nq) { - nd = term_nq; - term_nq = term_nq->next; - nd->next = NULL; - (nd->dagHdr->cbFunc) (nd->dagHdr->cbArg); - raidPtr->dags_in_flight--; /* debug only */ - } - - /* fire remaining nodes */ - FireNodeList(fire_nq); - - DO_LOCK(raidPtr); - } - while (!raidPtr->shutdown_engine && raidPtr->node_queue == NULL) - DO_WAIT(raidPtr); - } - DO_UNLOCK(raidPtr); - - RF_THREADGROUP_DONE(&raidPtr->engine_tg); - - RF_THREAD_EXIT(0); -} diff --git a/sys/dev/raidframe/rf_engine.h b/sys/dev/raidframe/rf_engine.h deleted file mode 100644 index c758c05..0000000 --- a/sys/dev/raidframe/rf_engine.h +++ /dev/null @@ -1,48 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_engine.h,v 1.3 1999/02/05 00:06:11 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: William V. Courtright II, Mark Holland, Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/********************************************************** - * * - * engine.h -- header file for execution engine functions * - * * - **********************************************************/ - -#ifndef _RF__RF_ENGINE_H_ -#define _RF__RF_ENGINE_H_ - -int -rf_ConfigureEngine(RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, RF_Config_t * cfgPtr); - -int rf_FinishNode(RF_DagNode_t * node, int context); /* return finished node - * to engine */ - -int rf_DispatchDAG(RF_DagHeader_t * dag, void (*cbFunc) (void *), void *cbArg); /* execute dag */ - -#endif /* !_RF__RF_ENGINE_H_ */ diff --git a/sys/dev/raidframe/rf_etimer.h b/sys/dev/raidframe/rf_etimer.h deleted file mode 100644 index e66e01b..0000000 --- a/sys/dev/raidframe/rf_etimer.h +++ /dev/null @@ -1,95 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_etimer.h,v 1.4 1999/08/13 03:26:55 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_TIMER_H_ -#define _RF__RF_TIMER_H_ - -#include <dev/raidframe/rf_options.h> -#include <dev/raidframe/rf_utils.h> - -#include <sys/time.h> - -struct RF_Etimer_s { - struct timeval st; - struct timeval et; - struct timeval diff; -}; - -#if defined(_KERNEL) -#include <sys/kernel.h> - -#if defined(__NetBSD__) -#define RF_ETIMER_START(_t_) \ - { \ - int s; \ - bzero(&(_t_), sizeof (_t_)); \ - s = splclock(); \ - (_t_).st = mono_time; \ - splx(s); \ - } -#elif defined(__FreeBSD__) -#define RF_ETIMER_START(_t_) \ - { \ - int s; \ - bzero(&(_t_), sizeof (_t_)); \ - s = splclock(); \ - getmicrouptime(&(_t_).st); \ - splx(s); \ - } -#endif - -#if defined(__NetBSD__) -#define RF_ETIMER_STOP(_t_) \ - { \ - int s; \ - s = splclock(); \ - (_t_).et = mono_time; \ - splx(s); \ - } -#elif defined(__FreeBSD__) -#define RF_ETIMER_STOP(_t_) \ - { \ - int s; \ - s = splclock(); \ - getmicrouptime(&(_t_).et); \ - splx(s); \ - } -#endif - -#define RF_ETIMER_EVAL(_t_) \ - { \ - RF_TIMEVAL_DIFF(&(_t_).st, &(_t_).et, &(_t_).diff) \ - } - -#define RF_ETIMER_VAL_US(_t_) (RF_TIMEVAL_TO_US((_t_).diff)) -#define RF_ETIMER_VAL_MS(_t_) (RF_TIMEVAL_TO_US((_t_).diff)/1000) - -#endif /* _KERNEL */ - -#endif /* !_RF__RF_TIMER_H_ */ diff --git a/sys/dev/raidframe/rf_evenodd.c b/sys/dev/raidframe/rf_evenodd.c deleted file mode 100644 index 334ba0b..0000000 --- a/sys/dev/raidframe/rf_evenodd.c +++ /dev/null @@ -1,559 +0,0 @@ -/* $NetBSD: rf_evenodd.c,v 1.4 2000/01/07 03:40:59 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Chang-Ming Wu - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************************** - * - * rf_evenodd.c -- implements EVENODD array architecture - * - ****************************************************************************************/ - -#include <dev/raidframe/rf_archs.h> - -#if RF_INCLUDE_EVENODD > 0 - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagffrd.h> -#include <dev/raidframe/rf_dagffwr.h> -#include <dev/raidframe/rf_dagdegrd.h> -#include <dev/raidframe/rf_dagdegwr.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_evenodd.h> -#include <dev/raidframe/rf_configure.h> -#include <dev/raidframe/rf_parityscan.h> -#include <dev/raidframe/rf_utils.h> -#include <dev/raidframe/rf_map.h> -#include <dev/raidframe/rf_pq.h> -#include <dev/raidframe/rf_mcpair.h> -#include <dev/raidframe/rf_evenodd.h> -#include <dev/raidframe/rf_evenodd_dagfuncs.h> -#include <dev/raidframe/rf_evenodd_dags.h> -#include <dev/raidframe/rf_engine.h> -#include <dev/raidframe/rf_kintf.h> - -typedef struct RF_EvenOddConfigInfo_s { - RF_RowCol_t **stripeIdentifier; /* filled in at config time & used by - * IdentifyStripe */ -} RF_EvenOddConfigInfo_t; - -int -rf_ConfigureEvenOdd(listp, raidPtr, cfgPtr) - RF_ShutdownList_t **listp; - RF_Raid_t *raidPtr; - RF_Config_t *cfgPtr; -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_EvenOddConfigInfo_t *info; - RF_RowCol_t i, j, startdisk; - - RF_MallocAndAdd(info, sizeof(RF_EvenOddConfigInfo_t), (RF_EvenOddConfigInfo_t *), raidPtr->cleanupList); - layoutPtr->layoutSpecificInfo = (void *) info; - - RF_ASSERT(raidPtr->numRow == 1); - - info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, raidPtr->numCol, raidPtr->cleanupList); - startdisk = 0; - for (i = 0; i < raidPtr->numCol; i++) { - for (j = 0; j < raidPtr->numCol; j++) { - info->stripeIdentifier[i][j] = (startdisk + j) % raidPtr->numCol; - } - if ((startdisk -= 2) < 0) - startdisk += raidPtr->numCol; - } - - /* fill in the remaining layout parameters */ - layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = raidPtr->numCol - 2; /* ORIG: - * layoutPtr->numDataCol - * = raidPtr->numCol-1; */ -#if RF_EO_MATRIX_DIM > 17 - if (raidPtr->numCol <= 17) { - printf("Number of stripe units in a parity stripe is smaller than 17. Please\n"); - printf("define the macro RF_EO_MATRIX_DIM in file rf_evenodd_dagfuncs.h to \n"); - printf("be 17 to increase performance. \n"); - return (EINVAL); - } -#elif RF_EO_MATRIX_DIM == 17 - if (raidPtr->numCol > 17) { - printf("Number of stripe units in a parity stripe is bigger than 17. Please\n"); - printf("define the macro RF_EO_MATRIX_DIM in file rf_evenodd_dagfuncs.h to \n"); - printf("be 257 for encoding and decoding functions to work. \n"); - return (EINVAL); - } -#endif - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numParityCol = 2; - layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk; - raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; - - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - - return (0); -} - -int -rf_GetDefaultNumFloatingReconBuffersEvenOdd(RF_Raid_t * raidPtr) -{ - return (20); -} - -RF_HeadSepLimit_t -rf_GetDefaultHeadSepLimitEvenOdd(RF_Raid_t * raidPtr) -{ - return (10); -} - -void -rf_IdentifyStripeEvenOdd( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) -{ - RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr); - RF_EvenOddConfigInfo_t *info = (RF_EvenOddConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - - *outRow = 0; - *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol]; -} -/* The layout of stripe unit on the disks are: c0 c1 c2 c3 c4 - - 0 1 2 E P - 5 E P 3 4 - P 6 7 8 E - 10 11 E P 9 - E P 12 13 14 - .... - - We use the MapSectorRAID5 to map data information because the routine can be shown to map exactly - the layout of data stripe unit as shown above although we have 2 redundant information now. - But for E and P, we use rf_MapEEvenOdd and rf_MapParityEvenOdd which are different method from raid-5. -*/ - - -void -rf_MapParityEvenOdd( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - RF_StripeNum_t endSUIDofthisStrip = (SUID / raidPtr->Layout.numDataCol + 1) * raidPtr->Layout.numDataCol - 1; - - *row = 0; - *col = (endSUIDofthisStrip + 2) % raidPtr->numCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); -} - -void -rf_MapEEvenOdd( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - RF_StripeNum_t endSUIDofthisStrip = (SUID / raidPtr->Layout.numDataCol + 1) * raidPtr->Layout.numDataCol - 1; - - *row = 0; - *col = (endSUIDofthisStrip + 1) % raidPtr->numCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); -} - -void -rf_EODagSelect( - RF_Raid_t * raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t * asmap, - RF_VoidFuncPtr * createFunc) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - unsigned ndfail = asmap->numDataFailed; - unsigned npfail = asmap->numParityFailed + asmap->numQFailed; - unsigned ntfail = npfail + ndfail; - - RF_ASSERT(RF_IO_IS_R_OR_W(type)); - if (ntfail > 2) { - RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n"); - /* *infoFunc = */ *createFunc = NULL; - return; - } - /* ok, we can do this I/O */ - if (type == RF_IO_TYPE_READ) { - switch (ndfail) { - case 0: - /* fault free read */ - *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; /* same as raid 5 */ - break; - case 1: - /* lost a single data unit */ - /* two cases: (1) parity is not lost. do a normal raid - * 5 reconstruct read. (2) parity is lost. do a - * reconstruct read using "e". */ - if (ntfail == 2) { /* also lost redundancy */ - if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) - *createFunc = (RF_VoidFuncPtr) rf_EO_110_CreateReadDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_EO_101_CreateReadDAG; - } else { - /* P and E are ok. But is there a failure in - * some unaccessed data unit? */ - if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2) - *createFunc = (RF_VoidFuncPtr) rf_EO_200_CreateReadDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_EO_100_CreateReadDAG; - } - break; - case 2: - /* *createFunc = rf_EO_200_CreateReadDAG; */ - *createFunc = NULL; - break; - } - return; - } - /* a write */ - switch (ntfail) { - case 0: /* fault free */ - if (rf_suppressLocksAndLargeWrites || - (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) || - (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) { - - *createFunc = (RF_VoidFuncPtr) rf_EOCreateSmallWriteDAG; - } else { - *createFunc = (RF_VoidFuncPtr) rf_EOCreateLargeWriteDAG; - } - break; - - case 1: /* single disk fault */ - if (npfail == 1) { - RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q)); - if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) { /* q died, treat like - * normal mode raid5 - * write. */ - if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) - || (asmap->parityInfo->next != NULL) || rf_NumFailedDataUnitsInStripe(raidPtr, asmap)) - *createFunc = (RF_VoidFuncPtr) rf_EO_001_CreateSmallWriteDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_EO_001_CreateLargeWriteDAG; - } else {/* parity died, small write only updating Q */ - if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) - || (asmap->qInfo->next != NULL) || rf_NumFailedDataUnitsInStripe(raidPtr, asmap)) - *createFunc = (RF_VoidFuncPtr) rf_EO_010_CreateSmallWriteDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_EO_010_CreateLargeWriteDAG; - } - } else { /* data missing. Do a P reconstruct write if - * only a single data unit is lost in the - * stripe, otherwise a reconstruct write which - * employnig both P and E units. */ - if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2) { - if (asmap->numStripeUnitsAccessed == 1) - *createFunc = (RF_VoidFuncPtr) rf_EO_200_CreateWriteDAG; - else - *createFunc = NULL; /* No direct support for - * this case now, like - * that in Raid-5 */ - } else { - if (asmap->numStripeUnitsAccessed != 1 && asmap->failedPDAs[0]->numSector != layoutPtr->sectorsPerStripeUnit) - *createFunc = NULL; /* No direct support for - * this case now, like - * that in Raid-5 */ - else - *createFunc = (RF_VoidFuncPtr) rf_EO_100_CreateWriteDAG; - } - } - break; - - case 2: /* two disk faults */ - switch (npfail) { - case 2: /* both p and q dead */ - *createFunc = (RF_VoidFuncPtr) rf_EO_011_CreateWriteDAG; - break; - case 1: /* either p or q and dead data */ - RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA); - RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)); - if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q) { - if (asmap->numStripeUnitsAccessed != 1 && asmap->failedPDAs[0]->numSector != layoutPtr->sectorsPerStripeUnit) - *createFunc = NULL; /* In both PQ and - * EvenOdd, no direct - * support for this case - * now, like that in - * Raid-5 */ - else - *createFunc = (RF_VoidFuncPtr) rf_EO_101_CreateWriteDAG; - } else { - if (asmap->numStripeUnitsAccessed != 1 && asmap->failedPDAs[0]->numSector != layoutPtr->sectorsPerStripeUnit) - *createFunc = NULL; /* No direct support for - * this case, like that - * in Raid-5 */ - else - *createFunc = (RF_VoidFuncPtr) rf_EO_110_CreateWriteDAG; - } - break; - case 0: /* double data loss */ - /* if(asmap->failedPDAs[0]->numSector + - * asmap->failedPDAs[1]->numSector == 2 * - * layoutPtr->sectorsPerStripeUnit ) createFunc = - * rf_EOCreateLargeWriteDAG; else */ - *createFunc = NULL; /* currently, in Evenodd, No - * support for simultaneous - * access of both failed SUs */ - break; - } - break; - - default: /* more than 2 disk faults */ - *createFunc = NULL; - RF_PANIC(); - } - return; -} - - -int -rf_VerifyParityEvenOdd(raidPtr, raidAddr, parityPDA, correct_it, flags) - RF_Raid_t *raidPtr; - RF_RaidAddr_t raidAddr; - RF_PhysDiskAddr_t *parityPDA; - int correct_it; - RF_RaidAccessFlags_t flags; -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_RaidAddr_t startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr); - RF_SectorCount_t numsector = parityPDA->numSector; - int numbytes = rf_RaidAddressToByte(raidPtr, numsector); - int bytesPerStripe = numbytes * layoutPtr->numDataCol; - RF_DagHeader_t *rd_dag_h, *wr_dag_h; /* read, write dag */ - RF_DagNode_t *blockNode, *unblockNode, *wrBlock, *wrUnblock; - RF_AccessStripeMapHeader_t *asm_h; - RF_AccessStripeMap_t *asmap; - RF_AllocListElem_t *alloclist; - RF_PhysDiskAddr_t *pda; - char *pbuf, *buf, *end_p, *p; - char *redundantbuf2; - int redundantTwoErr = 0, redundantOneErr = 0; - int parity_cant_correct = RF_FALSE, red2_cant_correct = RF_FALSE, - parity_corrected = RF_FALSE, red2_corrected = RF_FALSE; - int i, retcode; - RF_ReconUnitNum_t which_ru; - RF_StripeNum_t psID = rf_RaidAddressToParityStripeID(layoutPtr, raidAddr, &which_ru); - int stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; - RF_AccTraceEntry_t tracerec; - RF_MCPair_t *mcpair; - - retcode = RF_PARITY_OKAY; - - mcpair = rf_AllocMCPair(); - rf_MakeAllocList(alloclist); - RF_MallocAndAdd(buf, numbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol), (char *), alloclist); - RF_CallocAndAdd(pbuf, 1, numbytes, (char *), alloclist); /* use calloc to make - * sure buffer is zeroed */ - end_p = buf + bytesPerStripe; - RF_CallocAndAdd(redundantbuf2, 1, numbytes, (char *), alloclist); /* use calloc to make - * sure buffer is zeroed */ - - rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, numbytes, buf, rf_DiskReadFunc, rf_DiskReadUndoFunc, - "Rod", alloclist, flags, RF_IO_NORMAL_PRIORITY); - blockNode = rd_dag_h->succedents[0]; - unblockNode = blockNode->succedents[0]->succedents[0]; - - /* map the stripe and fill in the PDAs in the dag */ - asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, buf, RF_DONT_REMAP); - asmap = asm_h->stripeMap; - - for (pda = asmap->physInfo, i = 0; i < layoutPtr->numDataCol; i++, pda = pda->next) { - RF_ASSERT(pda); - rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); - RF_ASSERT(pda->numSector != 0); - if (rf_TryToRedirectPDA(raidPtr, pda, 0)) - goto out; /* no way to verify parity if disk is - * dead. return w/ good status */ - blockNode->succedents[i]->params[0].p = pda; - blockNode->succedents[i]->params[2].v = psID; - blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - - RF_ASSERT(!asmap->parityInfo->next); - rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->parityInfo, 0, 1); - RF_ASSERT(asmap->parityInfo->numSector != 0); - if (rf_TryToRedirectPDA(raidPtr, asmap->parityInfo, 1)) - goto out; - blockNode->succedents[layoutPtr->numDataCol]->params[0].p = asmap->parityInfo; - - RF_ASSERT(!asmap->qInfo->next); - rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->qInfo, 0, 1); - RF_ASSERT(asmap->qInfo->numSector != 0); - if (rf_TryToRedirectPDA(raidPtr, asmap->qInfo, 1)) - goto out; - /* if disk is dead, b/c no reconstruction is implemented right now, - * the function "rf_TryToRedirectPDA" always return one, which cause - * go to out and return w/ good status */ - blockNode->succedents[layoutPtr->numDataCol + 1]->params[0].p = asmap->qInfo; - - /* fire off the DAG */ - bzero((char *) &tracerec, sizeof(tracerec)); - rd_dag_h->tracerec = &tracerec; - - if (rf_verifyParityDebug) { - printf("Parity verify read dag:\n"); - rf_PrintDAGList(rd_dag_h); - } - RF_LOCK_MUTEX(mcpair->mutex); - mcpair->flag = 0; - rf_DispatchDAG(rd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, - (void *) mcpair); - while (!mcpair->flag) - RF_WAIT_COND(mcpair->cond, mcpair->mutex); - RF_UNLOCK_MUTEX(mcpair->mutex); - if (rd_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to verify parity: can't read the stripe\n"); - retcode = RF_PARITY_COULD_NOT_VERIFY; - goto out; - } - for (p = buf, i = 0; p < end_p; p += numbytes, i++) { - rf_e_encToBuf(raidPtr, i, p, RF_EO_MATRIX_DIM - 2, redundantbuf2, numsector); - /* the corresponding columes in EvenOdd encoding Matrix for - * these p pointers which point to the databuffer in a full - * stripe are sequentially from 0 to layoutPtr->numDataCol-1 */ - rf_bxor(p, pbuf, numbytes, NULL); - } - RF_ASSERT(i == layoutPtr->numDataCol); - - for (i = 0; i < numbytes; i++) { - if (pbuf[i] != buf[bytesPerStripe + i]) { - if (!correct_it) { - RF_ERRORMSG3("Parity verify error: byte %d of parity is 0x%x should be 0x%x\n", - i, (u_char) buf[bytesPerStripe + i], (u_char) pbuf[i]); - } - } - redundantOneErr = 1; - break; - } - - for (i = 0; i < numbytes; i++) { - if (redundantbuf2[i] != buf[bytesPerStripe + numbytes + i]) { - if (!correct_it) { - RF_ERRORMSG3("Parity verify error: byte %d of second redundant information is 0x%x should be 0x%x\n", - i, (u_char) buf[bytesPerStripe + numbytes + i], (u_char) redundantbuf2[i]); - } - redundantTwoErr = 1; - break; - } - } - if (redundantOneErr || redundantTwoErr) - retcode = RF_PARITY_BAD; - - /* correct the first redundant disk, ie parity if it is error */ - if (redundantOneErr && correct_it) { - wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, pbuf, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - "Wnp", alloclist, flags, RF_IO_NORMAL_PRIORITY); - wrBlock = wr_dag_h->succedents[0]; - wrUnblock = wrBlock->succedents[0]->succedents[0]; - wrBlock->succedents[0]->params[0].p = asmap->parityInfo; - wrBlock->succedents[0]->params[2].v = psID; - wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - bzero((char *) &tracerec, sizeof(tracerec)); - wr_dag_h->tracerec = &tracerec; - if (rf_verifyParityDebug) { - printf("Parity verify write dag:\n"); - rf_PrintDAGList(wr_dag_h); - } - RF_LOCK_MUTEX(mcpair->mutex); - mcpair->flag = 0; - rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, - (void *) mcpair); - while (!mcpair->flag) - RF_WAIT_COND(mcpair->cond, mcpair->mutex); - RF_UNLOCK_MUTEX(mcpair->mutex); - if (wr_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to correct parity in VerifyParity: can't write the stripe\n"); - parity_cant_correct = RF_TRUE; - } else { - parity_corrected = RF_TRUE; - } - rf_FreeDAG(wr_dag_h); - } - if (redundantTwoErr && correct_it) { - wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, redundantbuf2, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - "Wnred2", alloclist, flags, RF_IO_NORMAL_PRIORITY); - wrBlock = wr_dag_h->succedents[0]; - wrUnblock = wrBlock->succedents[0]->succedents[0]; - wrBlock->succedents[0]->params[0].p = asmap->qInfo; - wrBlock->succedents[0]->params[2].v = psID; - wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - bzero((char *) &tracerec, sizeof(tracerec)); - wr_dag_h->tracerec = &tracerec; - if (rf_verifyParityDebug) { - printf("Dag of write new second redundant information in parity verify :\n"); - rf_PrintDAGList(wr_dag_h); - } - RF_LOCK_MUTEX(mcpair->mutex); - mcpair->flag = 0; - rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, - (void *) mcpair); - while (!mcpair->flag) - RF_WAIT_COND(mcpair->cond, mcpair->mutex); - RF_UNLOCK_MUTEX(mcpair->mutex); - if (wr_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to correct second redundant information in VerifyParity: can't write the stripe\n"); - red2_cant_correct = RF_TRUE; - } else { - red2_corrected = RF_TRUE; - } - rf_FreeDAG(wr_dag_h); - } - if ((redundantOneErr && parity_cant_correct) || - (redundantTwoErr && red2_cant_correct)) - retcode = RF_PARITY_COULD_NOT_CORRECT; - if ((retcode = RF_PARITY_BAD) && parity_corrected && red2_corrected) - retcode = RF_PARITY_CORRECTED; - - -out: - rf_FreeAccessStripeMap(asm_h); - rf_FreeAllocList(alloclist); - rf_FreeDAG(rd_dag_h); - rf_FreeMCPair(mcpair); - return (retcode); -} -#endif /* RF_INCLUDE_EVENODD > 0 */ diff --git a/sys/dev/raidframe/rf_evenodd.h b/sys/dev/raidframe/rf_evenodd.h deleted file mode 100644 index 4babdec..0000000 --- a/sys/dev/raidframe/rf_evenodd.h +++ /dev/null @@ -1,55 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_evenodd.h,v 1.2 1999/02/05 00:06:11 oster Exp $ */ -/* - * Copyright (c) 1995, 1996 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Chang-Ming Wu - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_EVENODD_H_ -#define _RF__RF_EVENODD_H_ - -/* extern declerations of the failure mode functions. */ -int -rf_ConfigureEvenOdd(RF_ShutdownList_t ** shutdownListp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int rf_GetDefaultNumFloatingReconBuffersEvenOdd(RF_Raid_t * raidPtr); -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitEvenOdd(RF_Raid_t * raidPtr); -void -rf_IdentifyStripeEvenOdd(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outrow); -void -rf_MapParityEvenOdd(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapEEvenOdd(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_EODagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); -int -rf_VerifyParityEvenOdd(RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t * parityPDA, int correct_it, RF_RaidAccessFlags_t flags); - -#endif /* !_RF__RF_EVENODD_H_ */ diff --git a/sys/dev/raidframe/rf_evenodd_dagfuncs.c b/sys/dev/raidframe/rf_evenodd_dagfuncs.c deleted file mode 100644 index 2e39a53..0000000 --- a/sys/dev/raidframe/rf_evenodd_dagfuncs.c +++ /dev/null @@ -1,977 +0,0 @@ -/* $NetBSD: rf_evenodd_dagfuncs.c,v 1.7 2001/01/26 03:50:53 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: ChangMing Wu - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * Code for RAID-EVENODD architecture. - */ - -#include <dev/raidframe/rf_archs.h> - -#if RF_INCLUDE_EVENODD > 0 - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagffrd.h> -#include <dev/raidframe/rf_dagffwr.h> -#include <dev/raidframe/rf_dagdegrd.h> -#include <dev/raidframe/rf_dagdegwr.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_configure.h> -#include <dev/raidframe/rf_parityscan.h> -#include <dev/raidframe/rf_evenodd.h> -#include <dev/raidframe/rf_evenodd_dagfuncs.h> - -/* These redundant functions are for small write */ -RF_RedFuncs_t rf_EOSmallWritePFuncs = {rf_RegularXorFunc, "Regular Old-New P", rf_SimpleXorFunc, "Simple Old-New P"}; -RF_RedFuncs_t rf_EOSmallWriteEFuncs = {rf_RegularONEFunc, "Regular Old-New E", rf_SimpleONEFunc, "Regular Old-New E"}; -/* These redundant functions are for degraded read */ -RF_RedFuncs_t rf_eoPRecoveryFuncs = {rf_RecoveryXorFunc, "Recovery Xr", rf_RecoveryXorFunc, "Recovery Xr"}; -RF_RedFuncs_t rf_eoERecoveryFuncs = {rf_RecoveryEFunc, "Recovery E Func", rf_RecoveryEFunc, "Recovery E Func"}; -/********************************************************************************************** - * the following encoding node functions is used in EO_000_CreateLargeWriteDAG - **********************************************************************************************/ -int -rf_RegularPEFunc(node) - RF_DagNode_t *node; -{ - rf_RegularESubroutine(node, node->results[1]); - rf_RegularXorFunc(node);/* does the wakeup here! */ -#if 1 - return (0); /* XXX This was missing... GO */ -#endif -} - - -/************************************************************************************************ - * For EO_001_CreateSmallWriteDAG, there are (i)RegularONEFunc() and (ii)SimpleONEFunc() to - * be used. The previous case is when write access at least sectors of full stripe unit. - * The later function is used when the write access two stripe units but with total sectors - * less than sectors per SU. In this case, the access of parity and 'E' are shown as disconnected - * areas in their stripe unit and parity write and 'E' write are both devided into two distinct - * writes( totally four). This simple old-new write and regular old-new write happen as in RAID-5 - ************************************************************************************************/ - -/* Algorithm: - 1. Store the difference of old data and new data in the Rod buffer. - 2. then encode this buffer into the buffer which already have old 'E' information inside it, - the result can be shown to be the new 'E' information. - 3. xor the Wnd buffer into the difference buffer to recover the original old data. - Here we have another alternative: to allocate a temporary buffer for storing the difference of - old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach - take the same speed as the previous, and need more memory. -*/ -int -rf_RegularONEFunc(node) - RF_DagNode_t *node; -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; - int EpdaIndex = (node->numParams - 1) / 2 - 1; /* the parameter of node - * where you can find - * e-pda */ - int i, k, retcode = 0; - int suoffset, length; - RF_RowCol_t scol; - char *srcbuf, *destbuf; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - RF_PhysDiskAddr_t *pda, *EPDA = (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p; - int ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector); /* generally zero */ - - RF_ASSERT(EPDA->type == RF_PDA_TYPE_Q); - RF_ASSERT(ESUOffset == 0); - - RF_ETIMER_START(timer); - - /* Xor the Wnd buffer into Rod buffer, the difference of old data and - * new data is stored in Rod buffer */ - for (k = 0; k < EpdaIndex; k += 2) { - length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector); - retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length, node->dagHdr->bp); - } - /* Start to encoding the buffer storing the difference of old data and - * new data into 'E' buffer */ - for (i = 0; i < EpdaIndex; i += 2) - if (node->params[i + 1].p != node->results[0]) { /* results[0] is buf ptr - * of E */ - pda = (RF_PhysDiskAddr_t *) node->params[i].p; - srcbuf = (char *) node->params[i + 1].p; - scol = rf_EUCol(layoutPtr, pda->raidAddress); - suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); - destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset); - rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); - } - /* Recover the original old data to be used by parity encoding - * function in XorNode */ - for (k = 0; k < EpdaIndex; k += 2) { - length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector); - retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length, node->dagHdr->bp); - } - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->q_us += RF_ETIMER_VAL_US(timer); - rf_GenericWakeupFunc(node, 0); -#if 1 - return (0); /* XXX this was missing.. GO */ -#endif -} - -int -rf_SimpleONEFunc(node) - RF_DagNode_t *node; -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; - RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; - int retcode = 0; - char *srcbuf, *destbuf; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - int length; - RF_RowCol_t scol; - RF_Etimer_t timer; - - RF_ASSERT(((RF_PhysDiskAddr_t *) node->params[2].p)->type == RF_PDA_TYPE_Q); - if (node->dagHdr->status == rf_enable) { - RF_ETIMER_START(timer); - length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[4].p)->numSector); /* this is a pda of - * writeDataNodes */ - /* bxor to buffer of readDataNodes */ - retcode = rf_bxor(node->params[5].p, node->params[1].p, length, node->dagHdr->bp); - /* find out the corresponding colume in encoding matrix for - * write colume to be encoded into redundant disk 'E' */ - scol = rf_EUCol(layoutPtr, pda->raidAddress); - srcbuf = node->params[1].p; - destbuf = node->params[3].p; - /* Start encoding process */ - rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); - rf_bxor(node->params[5].p, node->params[1].p, length, node->dagHdr->bp); - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->q_us += RF_ETIMER_VAL_US(timer); - - } - return (rf_GenericWakeupFunc(node, retcode)); /* call wake func - * explicitly since no - * I/O in this node */ -} - - -/****** called by rf_RegularPEFunc(node) and rf_RegularEFunc(node) in f.f. large write ********/ -void -rf_RegularESubroutine(node, ebuf) - RF_DagNode_t *node; - char *ebuf; -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; - RF_PhysDiskAddr_t *pda; - int i, suoffset; - RF_RowCol_t scol; - char *srcbuf, *destbuf; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - - RF_ETIMER_START(timer); - for (i = 0; i < node->numParams - 2; i += 2) { - RF_ASSERT(node->params[i + 1].p != ebuf); - pda = (RF_PhysDiskAddr_t *) node->params[i].p; - suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); - scol = rf_EUCol(layoutPtr, pda->raidAddress); - srcbuf = (char *) node->params[i + 1].p; - destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset); - rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); - } - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->xor_us += RF_ETIMER_VAL_US(timer); -} - - -/******************************************************************************************* - * Used in EO_001_CreateLargeWriteDAG - ******************************************************************************************/ -int -rf_RegularEFunc(node) - RF_DagNode_t *node; -{ - rf_RegularESubroutine(node, node->results[0]); - rf_GenericWakeupFunc(node, 0); -#if 1 - return (0); /* XXX this was missing?.. GO */ -#endif -} -/******************************************************************************************* - * This degraded function allow only two case: - * 1. when write access the full failed stripe unit, then the access can be more than - * one tripe units. - * 2. when write access only part of the failed SU, we assume accesses of more than - * one stripe unit is not allowed so that the write can be dealt with like a - * large write. - * The following function is based on these assumptions. So except in the second case, - * it looks the same as a large write encodeing function. But this is not exactly the - * normal way for doing a degraded write, since raidframe have to break cases of access - * other than the above two into smaller accesses. We may have to change - * DegrESubroutin in the future. - *******************************************************************************************/ -void -rf_DegrESubroutine(node, ebuf) - RF_DagNode_t *node; - char *ebuf; -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; - RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; - RF_PhysDiskAddr_t *pda; - int i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); - RF_RowCol_t scol; - char *srcbuf, *destbuf; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - - RF_ETIMER_START(timer); - for (i = 0; i < node->numParams - 2; i += 2) { - RF_ASSERT(node->params[i + 1].p != ebuf); - pda = (RF_PhysDiskAddr_t *) node->params[i].p; - suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); - scol = rf_EUCol(layoutPtr, pda->raidAddress); - srcbuf = (char *) node->params[i + 1].p; - destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); - rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); - } - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->q_us += RF_ETIMER_VAL_US(timer); -} - - -/************************************************************************************** - * This function is used in case where one data disk failed and both redundant disks - * alive. It is used in the EO_100_CreateWriteDAG. Note: if there is another disk - * failed in the stripe but not accessed at this time, then we should, instead, use - * the rf_EOWriteDoubleRecoveryFunc(). - **************************************************************************************/ -int -rf_Degraded_100_EOFunc(node) - RF_DagNode_t *node; -{ - rf_DegrESubroutine(node, node->results[1]); - rf_RecoveryXorFunc(node); /* does the wakeup here! */ -#if 1 - return (0); /* XXX this was missing... SHould these be - * void functions??? GO */ -#endif -} -/************************************************************************************** - * This function is to encode one sector in one of the data disks to the E disk. - * However, in evenodd this function can also be used as decoding function to recover - * data from dead disk in the case of parity failure and a single data failure. - **************************************************************************************/ -void -rf_e_EncOneSect( - RF_RowCol_t srcLogicCol, - char *srcSecbuf, - RF_RowCol_t destLogicCol, - char *destSecbuf, - int bytesPerSector) -{ - int S_index; /* index of the EU in the src col which need - * be Xored into all EUs in a dest sector */ - int numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1; - RF_RowCol_t j, indexInDest, /* row index of an encoding unit in - * the destination colume of encoding - * matrix */ - indexInSrc; /* row index of an encoding unit in the source - * colume used for recovery */ - int bytesPerEU = bytesPerSector / numRowInEncMatix; - -#if RF_EO_MATRIX_DIM > 17 - int shortsPerEU = bytesPerEU / sizeof(short); - short *destShortBuf, *srcShortBuf1, *srcShortBuf2; - short temp1; -#elif RF_EO_MATRIX_DIM == 17 - int longsPerEU = bytesPerEU / sizeof(long); - long *destLongBuf, *srcLongBuf1, *srcLongBuf2; - long temp1; -#endif - -#if RF_EO_MATRIX_DIM > 17 - RF_ASSERT(sizeof(short) == 2 || sizeof(short) == 1); - RF_ASSERT(bytesPerEU % sizeof(short) == 0); -#elif RF_EO_MATRIX_DIM == 17 - RF_ASSERT(sizeof(long) == 8 || sizeof(long) == 4); - RF_ASSERT(bytesPerEU % sizeof(long) == 0); -#endif - - S_index = rf_EO_Mod((RF_EO_MATRIX_DIM - 1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM); -#if RF_EO_MATRIX_DIM > 17 - srcShortBuf1 = (short *) (srcSecbuf + S_index * bytesPerEU); -#elif RF_EO_MATRIX_DIM == 17 - srcLongBuf1 = (long *) (srcSecbuf + S_index * bytesPerEU); -#endif - - for (indexInDest = 0; indexInDest < numRowInEncMatix; indexInDest++) { - indexInSrc = rf_EO_Mod((indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM); - -#if RF_EO_MATRIX_DIM > 17 - destShortBuf = (short *) (destSecbuf + indexInDest * bytesPerEU); - srcShortBuf2 = (short *) (srcSecbuf + indexInSrc * bytesPerEU); - for (j = 0; j < shortsPerEU; j++) { - temp1 = destShortBuf[j] ^ srcShortBuf1[j]; - /* note: S_index won't be at the end row for any src - * col! */ - if (indexInSrc != RF_EO_MATRIX_DIM - 1) - destShortBuf[j] = (srcShortBuf2[j]) ^ temp1; - /* if indexInSrc is at the end row, ie. - * RF_EO_MATRIX_DIM -1, then all elements are zero! */ - else - destShortBuf[j] = temp1; - } - -#elif RF_EO_MATRIX_DIM == 17 - destLongBuf = (long *) (destSecbuf + indexInDest * bytesPerEU); - srcLongBuf2 = (long *) (srcSecbuf + indexInSrc * bytesPerEU); - for (j = 0; j < longsPerEU; j++) { - temp1 = destLongBuf[j] ^ srcLongBuf1[j]; - if (indexInSrc != RF_EO_MATRIX_DIM - 1) - destLongBuf[j] = (srcLongBuf2[j]) ^ temp1; - else - destLongBuf[j] = temp1; - } -#endif - } -} - -void -rf_e_encToBuf( - RF_Raid_t * raidPtr, - RF_RowCol_t srcLogicCol, - char *srcbuf, - RF_RowCol_t destLogicCol, - char *destbuf, - int numSector) -{ - int i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); - - for (i = 0; i < numSector; i++) { - rf_e_EncOneSect(srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector); - srcbuf += bytesPerSector; - destbuf += bytesPerSector; - } -} -/************************************************************************************** - * when parity die and one data die, We use second redundant information, 'E', - * to recover the data in dead disk. This function is used in the recovery node of - * for EO_110_CreateReadDAG - **************************************************************************************/ -int -rf_RecoveryEFunc(node) - RF_DagNode_t *node; -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; - RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; - RF_RowCol_t scol, /* source logical column */ - fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress); /* logical column of - * failed SU */ - int i; - RF_PhysDiskAddr_t *pda; - int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); - char *srcbuf, *destbuf; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - - bzero((char *) node->results[0], rf_RaidAddressToByte(raidPtr, failedPDA->numSector)); - if (node->dagHdr->status == rf_enable) { - RF_ETIMER_START(timer); - for (i = 0; i < node->numParams - 2; i += 2) - if (node->params[i + 1].p != node->results[0]) { - pda = (RF_PhysDiskAddr_t *) node->params[i].p; - if (i == node->numParams - 4) - scol = RF_EO_MATRIX_DIM - 2; /* the colume of - * redundant E */ - else - scol = rf_EUCol(layoutPtr, pda->raidAddress); - srcbuf = (char *) node->params[i + 1].p; - suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); - destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); - rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector); - } - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->xor_us += RF_ETIMER_VAL_US(timer); - } - return (rf_GenericWakeupFunc(node, 0)); /* node execute successfully */ -} -/************************************************************************************** - * This function is used in the case where one data and the parity have filed. - * (in EO_110_CreateWriteDAG ) - **************************************************************************************/ -int -rf_EO_DegradedWriteEFunc(RF_DagNode_t * node) -{ - rf_DegrESubroutine(node, node->results[0]); - rf_GenericWakeupFunc(node, 0); -#if 1 - return (0); /* XXX Yet another one!! GO */ -#endif -} - - - -/************************************************************************************** - * THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES - **************************************************************************************/ - -void -rf_doubleEOdecode( - RF_Raid_t * raidPtr, - char **rrdbuf, - char **dest, - RF_RowCol_t * fcol, - char *pbuf, - char *ebuf) -{ - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); - int i, j, k, f1, f2, row; - int rrdrow, erow, count = 0; - int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); - int numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1; -#if 0 - int pcol = (RF_EO_MATRIX_DIM) - 1; -#endif - int ecol = (RF_EO_MATRIX_DIM) - 2; - int bytesPerEU = bytesPerSector / numRowInEncMatix; - int numDataCol = layoutPtr->numDataCol; -#if RF_EO_MATRIX_DIM > 17 - int shortsPerEU = bytesPerEU / sizeof(short); - short *rrdbuf_current, *pbuf_current, *ebuf_current; - short *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current; - short *temp; - short *P; - - RF_ASSERT(bytesPerEU % sizeof(short) == 0); - RF_Malloc(P, bytesPerEU, (short *)); - RF_Malloc(temp, bytesPerEU, (short *)); -#elif RF_EO_MATRIX_DIM == 17 - int longsPerEU = bytesPerEU / sizeof(long); - long *rrdbuf_current, *pbuf_current, *ebuf_current; - long *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current; - long *temp; - long *P; - - RF_ASSERT(bytesPerEU % sizeof(long) == 0); - RF_Malloc(P, bytesPerEU, (long *)); - RF_Malloc(temp, bytesPerEU, (long *)); -#endif - RF_ASSERT(*((long *) dest[0]) == 0); - RF_ASSERT(*((long *) dest[1]) == 0); - bzero((char *) P, bytesPerEU); - bzero((char *) temp, bytesPerEU); - RF_ASSERT(*P == 0); - /* calculate the 'P' parameter, which, not parity, is the Xor of all - * elements in the last two column, ie. 'E' and 'parity' colume, see - * the Ref. paper by Blaum, et al 1993 */ - for (i = 0; i < numRowInEncMatix; i++) - for (k = 0; k < longsPerEU; k++) { -#if RF_EO_MATRIX_DIM > 17 - ebuf_current = ((short *) ebuf) + i * shortsPerEU + k; - pbuf_current = ((short *) pbuf) + i * shortsPerEU + k; -#elif RF_EO_MATRIX_DIM == 17 - ebuf_current = ((long *) ebuf) + i * longsPerEU + k; - pbuf_current = ((long *) pbuf) + i * longsPerEU + k; -#endif - P[k] ^= *ebuf_current; - P[k] ^= *pbuf_current; - } - RF_ASSERT(fcol[0] != fcol[1]); - if (fcol[0] < fcol[1]) { -#if RF_EO_MATRIX_DIM > 17 - dest_smaller = (short *) (dest[0]); - dest_larger = (short *) (dest[1]); -#elif RF_EO_MATRIX_DIM == 17 - dest_smaller = (long *) (dest[0]); - dest_larger = (long *) (dest[1]); -#endif - f1 = fcol[0]; - f2 = fcol[1]; - } else { -#if RF_EO_MATRIX_DIM > 17 - dest_smaller = (short *) (dest[1]); - dest_larger = (short *) (dest[0]); -#elif RF_EO_MATRIX_DIM == 17 - dest_smaller = (long *) (dest[1]); - dest_larger = (long *) (dest[0]); -#endif - f1 = fcol[1]; - f2 = fcol[0]; - } - row = (RF_EO_MATRIX_DIM) - 1; - while ((row = rf_EO_Mod((row + f1 - f2), RF_EO_MATRIX_DIM)) != ((RF_EO_MATRIX_DIM) - 1)) { -#if RF_EO_MATRIX_DIM > 17 - dest_larger_current = dest_larger + row * shortsPerEU; - dest_smaller_current = dest_smaller + row * shortsPerEU; -#elif RF_EO_MATRIX_DIM == 17 - dest_larger_current = dest_larger + row * longsPerEU; - dest_smaller_current = dest_smaller + row * longsPerEU; -#endif - /** Do the diagonal recovery. Initially, temp[k] = (failed 1), - which is the failed data in the colume which has smaller col index. **/ - /* step 1: ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3)) */ - for (j = 0; j < numDataCol; j++) { - if (j == f1 || j == f2) - continue; - rrdrow = rf_EO_Mod((row + f2 - j), RF_EO_MATRIX_DIM); - if (rrdrow != (RF_EO_MATRIX_DIM) - 1) { -#if RF_EO_MATRIX_DIM > 17 - rrdbuf_current = (short *) (rrdbuf[j]) + rrdrow * shortsPerEU; - for (k = 0; k < shortsPerEU; k++) - temp[k] ^= *(rrdbuf_current + k); -#elif RF_EO_MATRIX_DIM == 17 - rrdbuf_current = (long *) (rrdbuf[j]) + rrdrow * longsPerEU; - for (k = 0; k < longsPerEU; k++) - temp[k] ^= *(rrdbuf_current + k); -#endif - } - } - /* step 2: ^E(erow,m-2), If erow is at the buttom row, don't - * Xor into it E(erow,m-2) = (principle diagonal) ^ (failed - * 1) ^ (failed 2) ^ ( SUM of nonfailed in-diagonal - * A(rrdrow,0..m-3) ) After this step, temp[k] = (principle - * diagonal) ^ (failed 2) */ - - erow = rf_EO_Mod((row + f2 - ecol), (RF_EO_MATRIX_DIM)); - if (erow != (RF_EO_MATRIX_DIM) - 1) { -#if RF_EO_MATRIX_DIM > 17 - ebuf_current = (short *) ebuf + shortsPerEU * erow; - for (k = 0; k < shortsPerEU; k++) - temp[k] ^= *(ebuf_current + k); -#elif RF_EO_MATRIX_DIM == 17 - ebuf_current = (long *) ebuf + longsPerEU * erow; - for (k = 0; k < longsPerEU; k++) - temp[k] ^= *(ebuf_current + k); -#endif - } - /* step 3: ^P to obtain the failed data (failed 2). P can be - * proved to be actually (principle diagonal) After this - * step, temp[k] = (failed 2), the failed data to be recovered */ -#if RF_EO_MATRIX_DIM > 17 - for (k = 0; k < shortsPerEU; k++) - temp[k] ^= P[k]; - /* Put the data to the destination buffer */ - for (k = 0; k < shortsPerEU; k++) - dest_larger_current[k] = temp[k]; -#elif RF_EO_MATRIX_DIM == 17 - for (k = 0; k < longsPerEU; k++) - temp[k] ^= P[k]; - /* Put the data to the destination buffer */ - for (k = 0; k < longsPerEU; k++) - dest_larger_current[k] = temp[k]; -#endif - - /** THE FOLLOWING DO THE HORIZONTAL XOR **/ - /* step 1: ^(SUM of A(row,0..m-3)), ie. all nonfailed data - * columes */ - for (j = 0; j < numDataCol; j++) { - if (j == f1 || j == f2) - continue; -#if RF_EO_MATRIX_DIM > 17 - rrdbuf_current = (short *) (rrdbuf[j]) + row * shortsPerEU; - for (k = 0; k < shortsPerEU; k++) - temp[k] ^= *(rrdbuf_current + k); -#elif RF_EO_MATRIX_DIM == 17 - rrdbuf_current = (long *) (rrdbuf[j]) + row * longsPerEU; - for (k = 0; k < longsPerEU; k++) - temp[k] ^= *(rrdbuf_current + k); -#endif - } - /* step 2: ^A(row,m-1) */ - /* step 3: Put the data to the destination buffer */ -#if RF_EO_MATRIX_DIM > 17 - pbuf_current = (short *) pbuf + shortsPerEU * row; - for (k = 0; k < shortsPerEU; k++) - temp[k] ^= *(pbuf_current + k); - for (k = 0; k < shortsPerEU; k++) - dest_smaller_current[k] = temp[k]; -#elif RF_EO_MATRIX_DIM == 17 - pbuf_current = (long *) pbuf + longsPerEU * row; - for (k = 0; k < longsPerEU; k++) - temp[k] ^= *(pbuf_current + k); - for (k = 0; k < longsPerEU; k++) - dest_smaller_current[k] = temp[k]; -#endif - count++; - } - /* Check if all Encoding Unit in the data buffer have been decoded, - * according EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime number, - * this algorithm will covered all buffer */ - RF_ASSERT(count == numRowInEncMatix); - RF_Free((char *) P, bytesPerEU); - RF_Free((char *) temp, bytesPerEU); -} - - -/*************************************************************************************** -* This function is called by double degragded read -* EO_200_CreateReadDAG -* -***************************************************************************************/ -int -rf_EvenOddDoubleRecoveryFunc(node) - RF_DagNode_t *node; -{ - int ndataParam = 0; - int np = node->numParams; - RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p; - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); - int i, prm, sector, nresults = node->numResults; - RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; - unsigned sosAddr; - int two = 0, mallc_one = 0, mallc_two = 0; /* flags to indicate if - * memory is allocated */ - int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); - RF_PhysDiskAddr_t *ppda, *ppda2, *epda, *epda2, *pda, *pda0, *pda1, - npda; - RF_RowCol_t fcol[2], fsuoff[2], fsuend[2], numDataCol = layoutPtr->numDataCol; - char **buf, *ebuf, *pbuf, *dest[2]; - long *suoff = NULL, *suend = NULL, *prmToCol = NULL, psuoff, esuoff; - RF_SectorNum_t startSector, endSector; - RF_Etimer_t timer; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - - RF_ETIMER_START(timer); - - /* Find out the number of parameters which are pdas for data - * information */ - for (i = 0; i <= np; i++) - if (((RF_PhysDiskAddr_t *) node->params[i].p)->type != RF_PDA_TYPE_DATA) { - ndataParam = i; - break; - } - RF_Malloc(buf, numDataCol * sizeof(char *), (char **)); - if (ndataParam != 0) { - RF_Malloc(suoff, ndataParam * sizeof(long), (long *)); - RF_Malloc(suend, ndataParam * sizeof(long), (long *)); - RF_Malloc(prmToCol, ndataParam * sizeof(long), (long *)); - } - if (asmap->failedPDAs[1] && - (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) { - RF_ASSERT(0); /* currently, no support for this situation */ - ppda = node->params[np - 6].p; - ppda2 = node->params[np - 5].p; - RF_ASSERT(ppda2->type == RF_PDA_TYPE_PARITY); - epda = node->params[np - 4].p; - epda2 = node->params[np - 3].p; - RF_ASSERT(epda2->type == RF_PDA_TYPE_Q); - two = 1; - } else { - ppda = node->params[np - 4].p; - epda = node->params[np - 3].p; - psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector); - esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector); - RF_ASSERT(psuoff == esuoff); - } - /* - the followings have three goals: - 1. determine the startSector to begin decoding and endSector to end decoding. - 2. determine the colume numbers of the two failed disks. - 3. determine the offset and end offset of the access within each failed stripe unit. - */ - if (nresults == 1) { - /* find the startSector to begin decoding */ - pda = node->results[0]; - bzero(pda->bufPtr, bytesPerSector * pda->numSector); - fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector); - fsuend[0] = fsuoff[0] + pda->numSector; - startSector = fsuoff[0]; - endSector = fsuend[0]; - - /* find out the column of failed disk being accessed */ - fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress); - - /* find out the other failed colume not accessed */ - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - for (i = 0; i < numDataCol; i++) { - npda.raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0); - /* skip over dead disks */ - if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status)) - if (i != fcol[0]) - break; - } - RF_ASSERT(i < numDataCol); - fcol[1] = i; - } else { - RF_ASSERT(nresults == 2); - pda0 = node->results[0]; - bzero(pda0->bufPtr, bytesPerSector * pda0->numSector); - pda1 = node->results[1]; - bzero(pda1->bufPtr, bytesPerSector * pda1->numSector); - /* determine the failed colume numbers of the two failed - * disks. */ - fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress); - fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress); - /* determine the offset and end offset of the access within - * each failed stripe unit. */ - fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector); - fsuend[0] = fsuoff[0] + pda0->numSector; - fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector); - fsuend[1] = fsuoff[1] + pda1->numSector; - /* determine the startSector to begin decoding */ - startSector = RF_MIN(pda0->startSector, pda1->startSector); - /* determine the endSector to end decoding */ - endSector = RF_MAX(fsuend[0], fsuend[1]); - } - /* - assign the beginning sector and the end sector for each parameter - find out the corresponding colume # for each parameter - */ - for (prm = 0; prm < ndataParam; prm++) { - pda = node->params[prm].p; - suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector); - suend[prm] = suoff[prm] + pda->numSector; - prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress); - } - /* 'sector' is the sector for the current decoding algorithm. For each - * sector in the failed SU, find out the corresponding parameters that - * cover the current sector and that are needed for decoding of this - * sector in failed SU. 2. Find out if sector is in the shadow of any - * accessed failed SU. If not, malloc a temporary space of a sector in - * size. */ - for (sector = startSector; sector < endSector; sector++) { - if (nresults == 2) - if (!(fsuoff[0] <= sector && sector < fsuend[0]) && !(fsuoff[1] <= sector && sector < fsuend[1])) - continue; - for (prm = 0; prm < ndataParam; prm++) - if (suoff[prm] <= sector && sector < suend[prm]) - buf[(prmToCol[prm])] = ((RF_PhysDiskAddr_t *) node->params[prm].p)->bufPtr + - rf_RaidAddressToByte(raidPtr, sector - suoff[prm]); - /* find out if sector is in the shadow of any accessed failed - * SU. If yes, assign dest[0], dest[1] to point at suitable - * position of the buffer corresponding to failed SUs. if no, - * malloc a temporary space of a sector in size for - * destination of decoding. */ - RF_ASSERT(nresults == 1 || nresults == 2); - if (nresults == 1) { - dest[0] = ((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]); - /* Always malloc temp buffer to dest[1] */ - RF_Malloc(dest[1], bytesPerSector, (char *)); - bzero(dest[1], bytesPerSector); - mallc_two = 1; - } else { - if (fsuoff[0] <= sector && sector < fsuend[0]) - dest[0] = ((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]); - else { - RF_Malloc(dest[0], bytesPerSector, (char *)); - bzero(dest[0], bytesPerSector); - mallc_one = 1; - } - if (fsuoff[1] <= sector && sector < fsuend[1]) - dest[1] = ((RF_PhysDiskAddr_t *) node->results[1])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[1]); - else { - RF_Malloc(dest[1], bytesPerSector, (char *)); - bzero(dest[1], bytesPerSector); - mallc_two = 1; - } - RF_ASSERT(mallc_one == 0 || mallc_two == 0); - } - pbuf = ppda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - psuoff); - ebuf = epda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - esuoff); - /* - * After finish finding all needed sectors, call doubleEOdecode function for decoding - * one sector to destination. - */ - rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf); - /* free all allocated memory, and mark flag to indicate no - * memory is being allocated */ - if (mallc_one == 1) - RF_Free(dest[0], bytesPerSector); - if (mallc_two == 1) - RF_Free(dest[1], bytesPerSector); - mallc_one = mallc_two = 0; - } - RF_Free(buf, numDataCol * sizeof(char *)); - if (ndataParam != 0) { - RF_Free(suoff, ndataParam * sizeof(long)); - RF_Free(suend, ndataParam * sizeof(long)); - RF_Free(prmToCol, ndataParam * sizeof(long)); - } - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - if (tracerec) { - tracerec->q_us += RF_ETIMER_VAL_US(timer); - } - rf_GenericWakeupFunc(node, 0); -#if 1 - return (0); /* XXX is this even close!!?!?!!? GO */ -#endif -} - - -/* currently, only access of one of the two failed SU is allowed in this function. - * also, asmap->numStripeUnitsAccessed is limited to be one, the RaidFrame will break large access into - * many accesses of single stripe unit. - */ - -int -rf_EOWriteDoubleRecoveryFunc(node) - RF_DagNode_t *node; -{ - int np = node->numParams; - RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p; - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); - RF_SectorNum_t sector; - RF_RowCol_t col, scol; - int prm, i, j; - RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; - unsigned sosAddr; - unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); - RF_int64 numbytes; - RF_SectorNum_t startSector, endSector; - RF_PhysDiskAddr_t *ppda, *epda, *pda, *fpda, npda; - RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol; - char **buf; /* buf[0], buf[1], buf[2], ...etc. point to - * buffer storing data read from col0, col1, - * col2 */ - char *ebuf, *pbuf, *dest[2], *olddata[2]; - RF_Etimer_t timer; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - - RF_ASSERT(asmap->numDataFailed == 1); /* currently only support this - * case, the other failed SU - * is not being accessed */ - RF_ETIMER_START(timer); - RF_Malloc(buf, numDataCol * sizeof(char *), (char **)); - - ppda = node->results[0];/* Instead of being buffers, node->results[0] - * and [1] are Ppda and Epda */ - epda = node->results[1]; - fpda = asmap->failedPDAs[0]; - - /* First, recovery the failed old SU using EvenOdd double decoding */ - /* determine the startSector and endSector for decoding */ - startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector); - endSector = startSector + fpda->numSector; - /* Assign buf[col] pointers to point to each non-failed colume and - * initialize the pbuf and ebuf to point at the beginning of each - * source buffers and destination buffers */ - for (prm = 0; prm < numDataCol - 2; prm++) { - pda = (RF_PhysDiskAddr_t *) node->params[prm].p; - col = rf_EUCol(layoutPtr, pda->raidAddress); - buf[col] = pda->bufPtr; - } - /* pbuf and ebuf: they will change values as double recovery decoding - * goes on */ - pbuf = ppda->bufPtr; - ebuf = epda->bufPtr; - /* find out the logical colume numbers in the encoding matrix of the - * two failed columes */ - fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress); - - /* find out the other failed colume not accessed this time */ - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - for (i = 0; i < numDataCol; i++) { - npda.raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0); - /* skip over dead disks */ - if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status)) - if (i != fcol[0]) - break; - } - RF_ASSERT(i < numDataCol); - fcol[1] = i; - /* assign temporary space to put recovered failed SU */ - numbytes = fpda->numSector * bytesPerSector; - RF_Malloc(olddata[0], numbytes, (char *)); - RF_Malloc(olddata[1], numbytes, (char *)); - dest[0] = olddata[0]; - dest[1] = olddata[1]; - bzero(olddata[0], numbytes); - bzero(olddata[1], numbytes); - /* Begin the recovery decoding, initially buf[j], ebuf, pbuf, dest[j] - * have already pointed at the beginning of each source buffers and - * destination buffers */ - for (sector = startSector, i = 0; sector < endSector; sector++, i++) { - rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf); - for (j = 0; j < numDataCol; j++) - if ((j != fcol[0]) && (j != fcol[1])) - buf[j] += bytesPerSector; - dest[0] += bytesPerSector; - dest[1] += bytesPerSector; - ebuf += bytesPerSector; - pbuf += bytesPerSector; - } - /* after recovery, the buffer pointed by olddata[0] is the old failed - * data. With new writing data and this old data, use small write to - * calculate the new redundant informations */ - /* node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of - * Rrd; params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol - * -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ; params[ - * PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol - * +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1] are Pdas of - * wudNodes; For current implementation, we assume the simplest case: - * asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1 - * ie. PDAPerDisk = 1 then node->params[numDataCol] must be the new - * data to be writen to the failed disk. We first bxor the new data - * into the old recovered data, then do the same things as small - * write. */ - - rf_bxor(((RF_PhysDiskAddr_t *) node->params[numDataCol].p)->bufPtr, olddata[0], numbytes, node->dagHdr->bp); - /* do new 'E' calculation */ - /* find out the corresponding colume in encoding matrix for write - * colume to be encoded into redundant disk 'E' */ - scol = rf_EUCol(layoutPtr, fpda->raidAddress); - /* olddata[0] now is source buffer pointer; epda->bufPtr is the dest - * buffer pointer */ - rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2, epda->bufPtr, fpda->numSector); - - /* do new 'P' calculation */ - rf_bxor(olddata[0], ppda->bufPtr, numbytes, node->dagHdr->bp); - /* Free the allocated buffer */ - RF_Free(olddata[0], numbytes); - RF_Free(olddata[1], numbytes); - RF_Free(buf, numDataCol * sizeof(char *)); - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - if (tracerec) { - tracerec->q_us += RF_ETIMER_VAL_US(timer); - } - rf_GenericWakeupFunc(node, 0); - return (0); -} -#endif /* RF_INCLUDE_EVENODD > 0 */ diff --git a/sys/dev/raidframe/rf_evenodd_dagfuncs.h b/sys/dev/raidframe/rf_evenodd_dagfuncs.h deleted file mode 100644 index cf5028b..0000000 --- a/sys/dev/raidframe/rf_evenodd_dagfuncs.h +++ /dev/null @@ -1,79 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_evenodd_dagfuncs.h,v 1.2 1999/02/05 00:06:11 oster Exp $ */ -/* - * rf_evenodd_dagfuncs.h - */ -/* - * Copyright (c) 1996 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Chang-Ming Wu - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_EVENODD_DAGFUNCS_H_ -#define _RF__RF_EVENODD_DAGFUNCS_H_ - -extern RF_RedFuncs_t rf_EOSmallWriteEFuncs; -extern RF_RedFuncs_t rf_EOSmallWritePFuncs; -extern RF_RedFuncs_t rf_eoERecoveryFuncs; -extern RF_RedFuncs_t rf_eoPRecoveryFuncs; - -int rf_RegularPEFunc(RF_DagNode_t * node); -int rf_RegularONEFunc(RF_DagNode_t * node); -int rf_SimpleONEFunc(RF_DagNode_t * node); -void rf_RegularESubroutine(RF_DagNode_t * node, char *ebuf); -int rf_RegularEFunc(RF_DagNode_t * node); -void rf_DegrESubroutine(RF_DagNode_t * node, char *ebuf); -int rf_Degraded_100_EOFunc(RF_DagNode_t * node); -void -rf_e_EncOneSect(RF_RowCol_t srcLogicCol, char *srcSecbuf, - RF_RowCol_t destLogicCol, char *destSecbuf, int bytesPerSector); -void -rf_e_encToBuf(RF_Raid_t * raidPtr, RF_RowCol_t srcLogicCol, - char *srcbuf, RF_RowCol_t destLogicCol, char *destbuf, int numSector); -int rf_RecoveryEFunc(RF_DagNode_t * node); -int rf_EO_DegradedWriteEFunc(RF_DagNode_t * node); -void -rf_doubleEOdecode(RF_Raid_t * raidPtr, char **rrdbuf, char **dest, - RF_RowCol_t * fcol, char *pbuf, char *ebuf); -int rf_EvenOddDoubleRecoveryFunc(RF_DagNode_t * node); -int rf_EOWriteDoubleRecoveryFunc(RF_DagNode_t * node); - -#define rf_EUCol(_layoutPtr_, _addr_ ) \ -( (_addr_)%( (_layoutPtr_)->dataSectorsPerStripe ) )/((_layoutPtr_)->sectorsPerStripeUnit) - -#define rf_EO_Mod( _int1_, _int2_ ) \ -( ((_int1_) < 0)? (((_int1_)+(_int2_))%(_int2_)) : (_int1_)%(_int2_) ) - -#define rf_OffsetOfNextEUBoundary(_offset_, sec_per_eu) ((_offset_)/(sec_per_eu) + 1)*(sec_per_eu) - -#define RF_EO_MATRIX_DIM 17 - -/* - * RF_EO_MATRIX_DIM should be a prime number: and "bytesPerSector" should be - * dividable by ( RF_EO_MATRIX_DIM - 1) to fully encode and utilize the space - * in a sector, this number could also be 17. Tha later case doesn't apply - * for disk array larger than 17 columns totally. - */ - -#endif /* !_RF__RF_EVENODD_DAGFUNCS_H_ */ diff --git a/sys/dev/raidframe/rf_evenodd_dags.c b/sys/dev/raidframe/rf_evenodd_dags.c deleted file mode 100644 index cef32c2..0000000 --- a/sys/dev/raidframe/rf_evenodd_dags.c +++ /dev/null @@ -1,191 +0,0 @@ -/* $NetBSD: rf_evenodd_dags.c,v 1.2 1999/02/05 00:06:11 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * rf_evenodd_dags.c - */ -/* - * Copyright (c) 1996 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Chang-Ming Wu - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#include <dev/raidframe/rf_archs.h> - -#if RF_INCLUDE_EVENODD > 0 - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_acctrace.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_evenodd_dags.h> -#include <dev/raidframe/rf_evenodd.h> -#include <dev/raidframe/rf_evenodd_dagfuncs.h> -#include <dev/raidframe/rf_pq.h> -#include <dev/raidframe/rf_dagdegrd.h> -#include <dev/raidframe/rf_dagdegwr.h> -#include <dev/raidframe/rf_dagffwr.h> - - -/* - * Lost one data. - * Use P to reconstruct missing data. - */ -RF_CREATE_DAG_FUNC_DECL(rf_EO_100_CreateReadDAG) -{ - rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_eoPRecoveryFuncs); -} -/* - * Lost data + E. - * Use P to reconstruct missing data. - */ -RF_CREATE_DAG_FUNC_DECL(rf_EO_101_CreateReadDAG) -{ - rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_eoPRecoveryFuncs); -} -/* - * Lost data + P. - * Make E look like P, and use Eor for Xor, and we can - * use degraded read DAG. - */ -RF_CREATE_DAG_FUNC_DECL(rf_EO_110_CreateReadDAG) -{ - RF_PhysDiskAddr_t *temp; - /* swap P and E pointers to fake out the DegradedReadDAG code */ - temp = asmap->parityInfo; - asmap->parityInfo = asmap->qInfo; - asmap->qInfo = temp; - rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_eoERecoveryFuncs); -} -/* - * Lost two data. - */ -RF_CREATE_DAG_FUNC_DECL(rf_EOCreateDoubleDegradedReadDAG) -{ - rf_EO_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList); -} -/* - * Lost two data. - */ -RF_CREATE_DAG_FUNC_DECL(rf_EO_200_CreateReadDAG) -{ - rf_EOCreateDoubleDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList); -} -RF_CREATE_DAG_FUNC_DECL(rf_EO_100_CreateWriteDAG) -{ - if (asmap->numStripeUnitsAccessed != 1 && - asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) - RF_PANIC(); - rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2, (int (*) (RF_DagNode_t *)) rf_Degraded_100_EOFunc, RF_TRUE); -} -/* - * E is dead. Small write. - */ -RF_CREATE_DAG_FUNC_DECL(rf_EO_001_CreateSmallWriteDAG) -{ - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_EOSmallWritePFuncs, NULL); -} -/* - * E is dead. Large write. - */ -RF_CREATE_DAG_FUNC_DECL(rf_EO_001_CreateLargeWriteDAG) -{ - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularPFunc, RF_TRUE); -} -/* - * P is dead. Small write. - * Swap E + P, use single-degraded stuff. - */ -RF_CREATE_DAG_FUNC_DECL(rf_EO_010_CreateSmallWriteDAG) -{ - RF_PhysDiskAddr_t *temp; - /* swap P and E pointers to fake out the DegradedReadDAG code */ - temp = asmap->parityInfo; - asmap->parityInfo = asmap->qInfo; - asmap->qInfo = temp; - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_EOSmallWriteEFuncs, NULL); -} -/* - * P is dead. Large write. - * Swap E + P, use single-degraded stuff. - */ -RF_CREATE_DAG_FUNC_DECL(rf_EO_010_CreateLargeWriteDAG) -{ - RF_PhysDiskAddr_t *temp; - /* swap P and E pointers to fake out the code */ - temp = asmap->parityInfo; - asmap->parityInfo = asmap->qInfo; - asmap->qInfo = temp; - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularEFunc, RF_FALSE); -} -RF_CREATE_DAG_FUNC_DECL(rf_EO_011_CreateWriteDAG) -{ - rf_CreateNonRedundantWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - RF_IO_TYPE_WRITE); -} -RF_CREATE_DAG_FUNC_DECL(rf_EO_110_CreateWriteDAG) -{ - RF_PhysDiskAddr_t *temp; - - if (asmap->numStripeUnitsAccessed != 1 && - asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) { - RF_PANIC(); - } - /* swap P and E to fake out parity code */ - temp = asmap->parityInfo; - asmap->parityInfo = asmap->qInfo; - asmap->qInfo = temp; - rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, (int (*) (RF_DagNode_t *)) rf_EO_DegradedWriteEFunc, RF_FALSE); - /* is the regular E func the right one to call? */ -} -RF_CREATE_DAG_FUNC_DECL(rf_EO_101_CreateWriteDAG) -{ - if (asmap->numStripeUnitsAccessed != 1 && - asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) - RF_PANIC(); - rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RecoveryXorFunc, RF_TRUE); -} -RF_CREATE_DAG_FUNC_DECL(rf_EO_DoubleDegRead) -{ - rf_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList, - "Re", "EvenOddRecovery", rf_EvenOddDoubleRecoveryFunc); -} -RF_CREATE_DAG_FUNC_DECL(rf_EOCreateSmallWriteDAG) -{ - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_EOSmallWriteEFuncs); -} -RF_CREATE_DAG_FUNC_DECL(rf_EOCreateLargeWriteDAG) -{ - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2, rf_RegularPEFunc, RF_FALSE); -} -RF_CREATE_DAG_FUNC_DECL(rf_EO_200_CreateWriteDAG) -{ - rf_DoubleDegSmallWrite(raidPtr, asmap, dag_h, bp, flags, allocList, "Re", "We", "EOWrDDRecovery", rf_EOWriteDoubleRecoveryFunc); -} -#endif /* RF_INCLUDE_EVENODD > 0 */ diff --git a/sys/dev/raidframe/rf_evenodd_dags.h b/sys/dev/raidframe/rf_evenodd_dags.h deleted file mode 100644 index c4218a4..0000000 --- a/sys/dev/raidframe/rf_evenodd_dags.h +++ /dev/null @@ -1,64 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_evenodd_dags.h,v 1.2 1999/02/05 00:06:11 oster Exp $ */ -/* - * rf_evenodd_dags.h - */ -/* - * Copyright (c) 1996 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Chang-Ming Wu - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_EVENODD_DAGS_H_ -#define _RF__RF_EVENODD_DAGS_H_ - -#include <dev/raidframe/rf_types.h> - -#if RF_UTILITY == 0 -#include <dev/raidframe/rf_dag.h> - -/* extern decl's of the failure mode EO functions. - * swiped from rf_pqdeg.h - */ - -RF_CREATE_DAG_FUNC_DECL(rf_EO_100_CreateReadDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EO_101_CreateReadDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EO_110_CreateReadDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EO_200_CreateReadDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EOCreateDoubleDegradedReadDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EO_100_CreateWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EO_010_CreateSmallWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EO_001_CreateSmallWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EO_010_CreateLargeWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EO_001_CreateLargeWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EO_011_CreateWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EO_110_CreateWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EO_101_CreateWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EO_DoubleDegRead); -RF_CREATE_DAG_FUNC_DECL(rf_EOCreateSmallWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EOCreateLargeWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EO_200_CreateWriteDAG); -#endif /* RF_UTILITY == 0 */ - -#endif /* !_RF__RF_EVENODD_DAGS_H_ */ diff --git a/sys/dev/raidframe/rf_fifo.c b/sys/dev/raidframe/rf_fifo.c deleted file mode 100644 index d5ce0d0..0000000 --- a/sys/dev/raidframe/rf_fifo.c +++ /dev/null @@ -1,238 +0,0 @@ -/* $NetBSD: rf_fifo.c,v 1.5 2000/03/04 03:27:13 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/*************************************************** - * - * rf_fifo.c -- prioritized fifo queue code. - * There are only two priority levels: hi and lo. - * - * Aug 4, 1994, adapted from raidSim version (MCH) - * - ***************************************************/ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_stripelocks.h> -#include <dev/raidframe/rf_layout.h> -#include <dev/raidframe/rf_diskqueue.h> -#include <dev/raidframe/rf_fifo.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_options.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_types.h> - -/* just malloc a header, zero it (via calloc), and return it */ -/*ARGSUSED*/ -void * -rf_FifoCreate(sectPerDisk, clList, listp) - RF_SectorCount_t sectPerDisk; - RF_AllocListElem_t *clList; - RF_ShutdownList_t **listp; -{ - RF_FifoHeader_t *q; - - RF_CallocAndAdd(q, 1, sizeof(RF_FifoHeader_t), (RF_FifoHeader_t *), clList); - q->hq_count = q->lq_count = 0; - return ((void *) q); -} - -void -rf_FifoEnqueue(q_in, elem, priority) - void *q_in; - RF_DiskQueueData_t *elem; - int priority; -{ - RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in; - - RF_ASSERT(priority == RF_IO_NORMAL_PRIORITY || priority == RF_IO_LOW_PRIORITY); - - elem->next = NULL; - if (priority == RF_IO_NORMAL_PRIORITY) { - if (!q->hq_tail) { - RF_ASSERT(q->hq_count == 0 && q->hq_head == NULL); - q->hq_head = q->hq_tail = elem; - } else { - RF_ASSERT(q->hq_count != 0 && q->hq_head != NULL); - q->hq_tail->next = elem; - q->hq_tail = elem; - } - q->hq_count++; - } else { - RF_ASSERT(elem->next == NULL); - if (rf_fifoDebug) { - printf("raid%d: fifo: ENQ lopri\n", - elem->raidPtr->raidid); - } - if (!q->lq_tail) { - RF_ASSERT(q->lq_count == 0 && q->lq_head == NULL); - q->lq_head = q->lq_tail = elem; - } else { - RF_ASSERT(q->lq_count != 0 && q->lq_head != NULL); - q->lq_tail->next = elem; - q->lq_tail = elem; - } - q->lq_count++; - } - if ((q->hq_count + q->lq_count) != elem->queue->queueLength) { - printf("Queue lengths differ!: %d %d %d\n", - q->hq_count, q->lq_count, (int) elem->queue->queueLength); - printf("%d %d %d %d\n", - (int) elem->queue->numOutstanding, - (int) elem->queue->maxOutstanding, - (int) elem->queue->row, - (int) elem->queue->col); - } - RF_ASSERT((q->hq_count + q->lq_count) == elem->queue->queueLength); -} - -RF_DiskQueueData_t * -rf_FifoDequeue(q_in) - void *q_in; -{ - RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in; - RF_DiskQueueData_t *nd; - - RF_ASSERT(q); - if (q->hq_head) { - RF_ASSERT(q->hq_count != 0 && q->hq_tail != NULL); - nd = q->hq_head; - q->hq_head = q->hq_head->next; - if (!q->hq_head) - q->hq_tail = NULL; - nd->next = NULL; - q->hq_count--; - } else - if (q->lq_head) { - RF_ASSERT(q->lq_count != 0 && q->lq_tail != NULL); - nd = q->lq_head; - q->lq_head = q->lq_head->next; - if (!q->lq_head) - q->lq_tail = NULL; - nd->next = NULL; - q->lq_count--; - if (rf_fifoDebug) { - printf("raid%d: fifo: DEQ lopri %lx\n", - nd->raidPtr->raidid, (long) nd); - } - } else { - RF_ASSERT(q->hq_count == 0 && q->lq_count == 0 && q->hq_tail == NULL && q->lq_tail == NULL); - nd = NULL; - } - return (nd); -} - -/* Return ptr to item at head of queue. Used to examine request - * info without actually dequeueing the request. - */ -RF_DiskQueueData_t * -rf_FifoPeek(void *q_in) -{ - RF_DiskQueueData_t *headElement = NULL; - RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in; - - RF_ASSERT(q); - if (q->hq_head) - headElement = q->hq_head; - else - if (q->lq_head) - headElement = q->lq_head; - return (headElement); -} -/* We sometimes need to promote a low priority access to a regular priority access. - * Currently, this is only used when the user wants to write a stripe which is currently - * under reconstruction. - * This routine will promote all accesses tagged with the indicated parityStripeID from - * the low priority queue to the end of the normal priority queue. - * We assume the queue is locked upon entry. - */ -int -rf_FifoPromote(q_in, parityStripeID, which_ru) - void *q_in; - RF_StripeNum_t parityStripeID; - RF_ReconUnitNum_t which_ru; -{ - RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in; - RF_DiskQueueData_t *lp = q->lq_head, *pt = NULL; /* lp = lo-pri queue - * pointer, pt = trailer */ - int retval = 0; - - while (lp) { - - /* search for the indicated parity stripe in the low-pri queue */ - if (lp->parityStripeID == parityStripeID && lp->which_ru == which_ru) { - /* printf("FifoPromote: promoting access for psid - * %ld\n",parityStripeID); */ - if (pt) - pt->next = lp->next; /* delete an entry other - * than the first */ - else - q->lq_head = lp->next; /* delete the head entry */ - - if (!q->lq_head) - q->lq_tail = NULL; /* we deleted the only - * entry */ - else - if (lp == q->lq_tail) - q->lq_tail = pt; /* we deleted the tail - * entry */ - - lp->next = NULL; - q->lq_count--; - - if (q->hq_tail) { - q->hq_tail->next = lp; - q->hq_tail = lp; - } - /* append to hi-priority queue */ - else { - q->hq_head = q->hq_tail = lp; - } - q->hq_count++; - - /* UpdateShortestSeekFinishTimeForced(lp->requestPtr, - * lp->diskState); *//* deal with this later, if ever */ - - lp = (pt) ? pt->next : q->lq_head; /* reset low-pri pointer - * and continue */ - retval++; - - } else { - pt = lp; - lp = lp->next; - } - } - - /* sanity check. delete this if you ever put more than one entry in - * the low-pri queue */ - RF_ASSERT(retval == 0 || retval == 1); - return (retval); -} diff --git a/sys/dev/raidframe/rf_fifo.h b/sys/dev/raidframe/rf_fifo.h deleted file mode 100644 index 9392f08..0000000 --- a/sys/dev/raidframe/rf_fifo.h +++ /dev/null @@ -1,62 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_fifo.h,v 1.3 1999/02/05 00:06:11 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * rf_fifo.h -- prioritized FIFO queue code. - * - * 4-9-93 Created (MCH) - */ - - -#ifndef _RF__RF_FIFO_H_ -#define _RF__RF_FIFO_H_ - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_diskqueue.h> - -typedef struct RF_FifoHeader_s { - RF_DiskQueueData_t *hq_head, *hq_tail; /* high priority requests */ - RF_DiskQueueData_t *lq_head, *lq_tail; /* low priority requests */ - int hq_count, lq_count; /* debug only */ -} RF_FifoHeader_t; - -extern void * -rf_FifoCreate(RF_SectorCount_t sectPerDisk, - RF_AllocListElem_t * clList, RF_ShutdownList_t ** listp); -extern void -rf_FifoEnqueue(void *q_in, RF_DiskQueueData_t * elem, - int priority); -extern RF_DiskQueueData_t *rf_FifoDequeue(void *q_in); -extern RF_DiskQueueData_t *rf_FifoPeek(void *q_in); -extern int -rf_FifoPromote(void *q_in, RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru); - -#endif /* !_RF__RF_FIFO_H_ */ diff --git a/sys/dev/raidframe/rf_freebsdkintf.c b/sys/dev/raidframe/rf_freebsdkintf.c deleted file mode 100644 index 13f5abb..0000000 --- a/sys/dev/raidframe/rf_freebsdkintf.c +++ /dev/null @@ -1,3192 +0,0 @@ -/*- - * Copyright (c) 2002 Scott Long <scottl@freebsd.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -/* $NetBSD: rf_netbsdkintf.c,v 1.105 2001/04/05 02:48:51 oster Exp $ */ -/*- - * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. - * All rights reserved. - * - * This code is derived from software contributed to The NetBSD Foundation - * by Greg Oster; Jason R. Thorpe. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. - * 4. Neither the name of The NetBSD Foundation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * Copyright (c) 1988 University of Utah. - * Copyright (c) 1990, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * the Systems Programming Group of the University of Utah Computer - * Science Department. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: Utah $Hdr: cd.c 1.6 90/11/28$ - * - * @(#)cd.c 8.2 (Berkeley) 11/16/93 - */ - - - - -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Authors: Mark Holland, Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/*********************************************************** - * - * rf_kintf.c -- the kernel interface routines for RAIDframe - * - ***********************************************************/ - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/errno.h> -#include <sys/param.h> -#include <sys/queue.h> -#include <sys/stat.h> -#include <sys/ioccom.h> -#include <sys/filio.h> -#include <sys/filedesc.h> -#include <sys/fcntl.h> -#include <sys/systm.h> -#include <sys/namei.h> -#include <sys/vnode.h> -#include <sys/bio.h> -#include <sys/buf.h> -#include <sys/conf.h> -#include <sys/disk.h> -#include <sys/lock.h> -#include <sys/reboot.h> -#include <sys/module.h> -#include <vm/uma.h> -#include <geom/geom_disk.h> - -#include "opt_raid.h" -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_raidframe.h> -#include <dev/raidframe/rf_copyback.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagflags.h> -#include <dev/raidframe/rf_desc.h> -#include <dev/raidframe/rf_diskqueue.h> -#include <dev/raidframe/rf_acctrace.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_kintf.h> -#include <dev/raidframe/rf_options.h> -#include <dev/raidframe/rf_driver.h> -#include <dev/raidframe/rf_parityscan.h> -#include <dev/raidframe/rf_debugprint.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_configure.h> - -RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex) - -static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a - * spare table */ -static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from - * installation process */ - -/* prototypes */ -static void KernelWakeupFunc(struct bio *); -static void InitBP(struct bio *, struct vnode *, unsigned rw_flag, - dev_t dev, RF_SectorNum_t startSect, - RF_SectorCount_t numSect, caddr_t buf, - void (*cbFunc) (struct bio *), void *cbArg, - int logBytesPerSector, struct proc * b_proc); -static struct raid_softc *raidinit(RF_Raid_t *); -static void rf_search_label(dev_t, struct disklabel *, - RF_AutoConfig_t **) __unused; - -static int raid_modevent(module_t, int, void*); -void raidattach(void); - -disk_open_t raidopen; -disk_close_t raidclose; -disk_ioctl_t raidioctl; -disk_strategy_t raidstrategy; - -d_open_t raidctlopen; -d_close_t raidctlclose; -d_ioctl_t raidctlioctl; - -static struct cdevsw raidctl_cdevsw = { - .d_version = D_VERSION, - .d_flags = D_NEEDGIANT, - .d_open = raidctlopen, - .d_close = raidctlclose, - .d_ioctl = raidctlioctl, - .d_name = "raidctl", -}; - -/* - * Pilfered from ccd.c - */ - -struct raidbuf { - struct bio rf_buf; /* new I/O buf. MUST BE FIRST!!! */ - struct bio *rf_obp; /* ptr. to original I/O buf */ - int rf_flags; /* misc. flags */ - RF_DiskQueueData_t *req;/* the request that this was part of.. */ -}; - - -#define RAIDGETBUF(sc) uma_zalloc((sc)->sc_cbufpool, M_NOWAIT) -#define RAIDPUTBUF(sc, cbp) uma_zfree((sc)->sc_cbufpool, cbp) - -#define RF_MAX_ARRAYS 32 - -/* Raid control device */ -struct raidctl_softc { - dev_t sc_dev; /* Device node */ - int sc_flags; /* flags */ - int sc_numraid; /* Number of configured raid devices */ - struct raid_softc *sc_raiddevs[RF_MAX_ARRAYS]; -}; - -struct raid_softc { - dev_t sc_parent_dev; - int sc_flags; /* flags */ - int sc_busycount; /* How many times are we opened? */ - size_t sc_size; /* size of the raid device */ - dev_t sc_parent; /* Parent device */ - struct disk *sc_disk; /* generic disk device info */ - uma_zone_t sc_cbufpool; /* component buffer pool */ - RF_Raid_t *raidPtr; /* Raid information struct */ - struct bio_queue_head bio_queue; /* used for the device queue */ -}; -/* sc_flags */ -#define RAIDF_OPEN 0x01 /* unit has been initialized */ -#define RAIDF_WLABEL 0x02 /* label area is writable */ -#define RAIDF_LABELLING 0x04 /* unit is currently being labelled */ -#define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */ -#define RAIDF_LOCKED 0x80 /* unit is locked */ - -/* - * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. - * Be aware that large numbers can allow the driver to consume a lot of - * kernel memory, especially on writes, and in degraded mode reads. - * - * For example: with a stripe width of 64 blocks (32k) and 5 disks, - * a single 64K write will typically require 64K for the old data, - * 64K for the old parity, and 64K for the new parity, for a total - * of 192K (if the parity buffer is not re-used immediately). - * Even it if is used immedately, that's still 128K, which when multiplied - * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. - * - * Now in degraded mode, for example, a 64K read on the above setup may - * require data reconstruction, which will require *all* of the 4 remaining - * disks to participate -- 4 * 32K/disk == 128K again. - */ - -#ifndef RAIDOUTSTANDING -#define RAIDOUTSTANDING 10 -#endif - -static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, struct disk*); -static int raidlock(struct raid_softc *); -static void raidunlock(struct raid_softc *); - -static void rf_markalldirty(RF_Raid_t *); - -static dev_t raidctl_dev; - -void rf_ReconThread(struct rf_recon_req *); -/* XXX what I want is: */ -/*void rf_ReconThread(RF_Raid_t *raidPtr); */ -void rf_RewriteParityThread(RF_Raid_t *raidPtr); -void rf_CopybackThread(RF_Raid_t *raidPtr); -void rf_ReconstructInPlaceThread(struct rf_recon_req *); -void rf_buildroothack(void *, struct raidctl_softc *); - -RF_AutoConfig_t *rf_find_raid_components(void); -RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); -static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); -static int rf_reasonable_label(RF_ComponentLabel_t *); -void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); -int rf_set_autoconfig(RF_Raid_t *, int); -int rf_set_rootpartition(RF_Raid_t *, int); -void rf_release_all_vps(RF_ConfigSet_t *); -void rf_cleanup_config_set(RF_ConfigSet_t *); -int rf_have_enough_components(RF_ConfigSet_t *); -int rf_auto_config_set(RF_ConfigSet_t *, int *, struct raidctl_softc *); -static int raidgetunit(struct raidctl_softc *, int); -static int raidshutdown(void); - -void -raidattach(void) -{ - struct raidctl_softc *parent_sc = NULL; - RF_AutoConfig_t *ac_list; /* autoconfig list */ - RF_ConfigSet_t *config_sets; - int autoconfig = 0; - - /* This is where all the initialization stuff gets done. */ - - if(rf_mutex_init(&rf_sparet_wait_mutex, __FUNCTION__)) { - rf_printf(0, "RAIDframe: failed to initialize mutexes\n"); - return; - } - - rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; - - if (rf_BootRaidframe() != 0) { - rf_printf(0, "Serious error booting RAIDframe!!\n"); - return; - } - - rf_printf(0, "Kernelized RAIDframe activated\n"); - MALLOC(parent_sc, struct raidctl_softc *, sizeof(*parent_sc), - M_RAIDFRAME, M_NOWAIT|M_ZERO); - if (parent_sc == NULL) { - RF_PANIC(); - return; - } - - parent_sc->sc_dev= make_dev(&raidctl_cdevsw, 0, UID_ROOT, GID_WHEEL, - 0600, "raidctl"); - parent_sc->sc_dev->si_drv1 = parent_sc; - raidctl_dev = parent_sc->sc_dev; - -#if RAID_AUTOCONFIG - autoconfig = 1; -#endif - - if (autoconfig) { - /* 1. locate all RAID components on the system */ - - rf_printf(0, "Searching for raid components...\n"); - ac_list = rf_find_raid_components(); - if (ac_list == NULL) - return; - - /* 2. sort them into their respective sets */ - - config_sets = rf_create_auto_sets(ac_list); - - /* 3. evaluate each set and configure the valid ones - This gets done in rf_buildroothack() */ - - /* schedule the creation of the thread to do the - "/ on RAID" stuff */ - - rf_buildroothack(config_sets, parent_sc); -#if 0 - kthread_create(rf_buildroothack,config_sets); - -#endif /* RAID_AUTOCONFIG */ - } -} - -void -rf_buildroothack(arg, parent_sc) - void *arg; - struct raidctl_softc *parent_sc; -{ - RF_ConfigSet_t *config_sets = arg; - RF_ConfigSet_t *cset; - RF_ConfigSet_t *next_cset; - int retcode; - int raidID; - int rootID; - int num_root; - - rootID = 0; - num_root = 0; - cset = config_sets; - while(cset != NULL ) { - next_cset = cset->next; - if (rf_have_enough_components(cset) && - cset->ac->clabel->autoconfigure==1) { - retcode = rf_auto_config_set(cset, &raidID, parent_sc); - if (!retcode) { - if (cset->rootable) { - rootID = raidID; - num_root++; - } - } else { - /* The autoconfig didn't work :( */ - rf_printf(1, "Autoconfig failed with code %d" - "for raid%d\n", retcode, raidID); - rf_release_all_vps(cset); - } - } else { - /* we're not autoconfiguring this set... - release the associated resources */ - rf_release_all_vps(cset); - } - /* cleanup */ - rf_cleanup_config_set(cset); - cset = next_cset; - } - if (boothowto & RB_ASKNAME) { - /* We don't auto-config... */ - } else { - /* They didn't ask, and we found something bootable... */ - -#if 0 - if (num_root == 1) { - booted_device = &raidrootdev[rootID]; - } else if (num_root > 1) { - /* we can't guess.. require the user to answer... */ - boothowto |= RB_ASKNAME; - } -#endif - } -} - -int -raidctlopen(dev_t dev, int flags, int fmt, struct thread *td) -{ - struct raidctl_softc *parent_sc; - - parent_sc = dev->si_drv1; - - if ((parent_sc->sc_flags & RAIDF_OPEN) != 0) - return (EBUSY); - - parent_sc->sc_flags |= RAIDF_OPEN; - return (0); -} - -int -raidctlclose(dev_t dev, int flags, int fmt, struct thread *td) -{ - struct raidctl_softc *parent_sc; - - parent_sc = dev->si_drv1; - - parent_sc->sc_flags &= ~RAIDF_OPEN; - return (0); -} - -int -raidctlioctl(dev_t dev, u_long cmd, caddr_t data, int flags, struct thread *td) -{ - struct raidctl_softc *parent_sc; - struct raid_softc *sc; - RF_Config_t *u_cfg, *k_cfg; - RF_Raid_t *raidPtr; - u_char *specific_buf; - u_int unit; - int retcode = 0; - - parent_sc = dev->si_drv1; - - switch (cmd) { - /* configure the system */ - case RAIDFRAME_CONFIGURE: - - /* copy-in the configuration information */ - /* data points to a pointer to the configuration structure */ - - u_cfg = *((RF_Config_t **) data); - RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *)); - if (k_cfg == NULL) { - return (ENOMEM); - } - retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg, - sizeof(RF_Config_t)); - if (retcode) { - RF_Free(k_cfg, sizeof(RF_Config_t)); - rf_printf(2, "raidctlioctl: retcode=%d copyin.1\n", - retcode); - return (retcode); - } - /* allocate a buffer for the layout-specific data, and copy it - * in */ - if (k_cfg->layoutSpecificSize) { - if (k_cfg->layoutSpecificSize > 10000) { - /* sanity check */ - RF_Free(k_cfg, sizeof(RF_Config_t)); - return (EINVAL); - } - RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, - (u_char *)); - if (specific_buf == NULL) { - RF_Free(k_cfg, sizeof(RF_Config_t)); - return (ENOMEM); - } - retcode = copyin(k_cfg->layoutSpecific, - (caddr_t) specific_buf, - k_cfg->layoutSpecificSize); - if (retcode) { - RF_Free(specific_buf, - k_cfg->layoutSpecificSize); - RF_Free(k_cfg, sizeof(RF_Config_t)); - rf_printf(2, "raidctlioctl: retcode=%d " - "copyin.2\n", retcode); - return (retcode); - } - } else - specific_buf = NULL; - k_cfg->layoutSpecific = specific_buf; - - /* should do some kind of sanity check on the configuration. - * Store the sum of all the bytes in the last byte? */ - - /* configure the system */ - - RF_Malloc(raidPtr, sizeof(*raidPtr), (RF_Raid_t *)); - if (raidPtr == NULL) { - rf_printf(0, "No memory for raid device\n"); - RF_Free(k_cfg, sizeof(RF_Config_t)); - retcode = ENOMEM; - } - bzero((char *) raidPtr, sizeof(RF_Raid_t)); - - /* Request a unit number for this soon-to-be device. */ - unit = raidgetunit(parent_sc, 0); - if (unit == -1) { - rf_printf(0, "Cannot allocate raid unit\n"); - RF_Free(raidPtr, sizeof(*raidPtr)); - goto out; - } - raidPtr->raidid = unit; - - if ((retcode = rf_Configure(raidPtr, k_cfg, NULL)) == 0) { - - /* allow this many simultaneous IO's to - this RAID device */ - raidPtr->openings = RAIDOUTSTANDING; - - parent_sc->sc_raiddevs[unit] = raidinit(raidPtr); - if (parent_sc->sc_raiddevs[unit] == NULL) { - rf_printf(0, "Could not create raid device\n"); - RF_Free(raidPtr, sizeof(*raidPtr)); - goto out; - } - parent_sc->sc_numraid++; - ((struct raid_softc *)raidPtr->sc)->sc_parent_dev = dev; - rf_markalldirty(raidPtr); - } else { - parent_sc->sc_raiddevs[unit] = NULL; - RF_Free(raidPtr, sizeof(*raidPtr)); - } - -out: - /* free the buffers. No return code here. */ - if (k_cfg->layoutSpecificSize) { - RF_Free(specific_buf, k_cfg->layoutSpecificSize); - } - RF_Free(k_cfg, sizeof(RF_Config_t)); - break; - - case RAIDFRAME_SHUTDOWN: - - unit = *(u_int *)data; - if ((unit >= RF_MAX_ARRAYS) || - (parent_sc->sc_raiddevs[unit] == NULL)) - return (EINVAL); - - sc = parent_sc->sc_raiddevs[unit]; - if ((retcode = raidlock(sc)) != 0) - return (retcode); - - /* - * If somebody has a partition mounted, we shouldn't - * shutdown. - */ - - if ((sc->sc_flags & RAIDF_OPEN) != 0) { - raidunlock(sc); - return (EBUSY); - } - - rf_printf(0, "Shutting down RAIDframe engine\n"); - retcode = rf_Shutdown(sc->raidPtr); - RF_THREADGROUP_WAIT_STOP(&sc->raidPtr->engine_tg); - - disk_destroy(sc->sc_disk); - raidunlock(sc); - - /* XXX Need to be able to destroy the zone */ - uma_zdestroy(sc->sc_cbufpool); - - parent_sc->sc_numraid--; - parent_sc->sc_raiddevs[unit] = NULL; - - RF_Free(sc->raidPtr, sizeof(*raidPtr)); - RF_Free(sc, sizeof(*sc)); - - break; - - default: - retcode = ENOIOCTL; - } - - return (retcode); -} - -/* ARGSUSED */ -int -raidopen(struct disk *dp) -{ - struct raid_softc *sc; - int error = 0; - - sc = dp->d_drv1; - - if ((error = raidlock(sc)) != 0) - return (error); - dp = sc->sc_disk; - - rf_printf(1, "Opening raid device %s%d\n", dp->d_name, dp->d_unit); - - /* Generate overall disklabel */ - raidgetdefaultlabel(sc->raidPtr, sc, dp); - - if (sc->sc_busycount == 0) { - /* First one... mark things as dirty... Note that we *MUST* - have done a configure before this. I DO NOT WANT TO BE - SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED - THAT THEY BELONG TOGETHER!!!!! */ - /* XXX should check to see if we're only open for reading - here... If so, we needn't do this, but then need some - other way of keeping track of what's happened.. */ - - rf_markalldirty( sc->raidPtr ); - sc->sc_flags |= RAIDF_OPEN; - } - - /* Prevent this unit from being unconfigured while open. */ - sc->sc_busycount++; - - raidunlock(sc); - - return (error); - - -} -/* ARGSUSED */ -int -raidclose(struct disk *dp) -{ - struct raid_softc *sc; - int error = 0; - - sc = dp->d_drv1; - - if ((error = raidlock(sc)) != 0) - return (error); - - sc->sc_busycount--; - if (sc->sc_busycount == 0) { - sc->sc_flags &= ~RAIDF_OPEN; - rf_update_component_labels(sc->raidPtr, - RF_FINAL_COMPONENT_UPDATE); - } - - raidunlock(sc); - return (0); - -} - -void -raidstrategy(bp) - struct bio *bp; -{ - RF_Raid_t *raidPtr; - struct raid_softc *sc = bp->bio_disk->d_drv1; - int s; - - raidPtr = sc->raidPtr; - if (raidPtr == NULL) { - bp->bio_error = ENODEV; - bp->bio_flags |= BIO_ERROR; - bp->bio_resid = bp->bio_bcount; - biodone(bp); - return; - } - if (!raidPtr->valid) { - bp->bio_error = ENODEV; - bp->bio_flags |= BIO_ERROR; - bp->bio_resid = bp->bio_bcount; - biodone(bp); - return; - } - if (bp->bio_bcount == 0) { - rf_printf(2, "b_bcount is zero..\n"); - biodone(bp); - return; - } - - s = splbio(); - - bp->bio_resid = 0; - - /* stuff it onto our queue. XXX locking? */ - bioq_insert_tail(&sc->bio_queue, bp); - - raidstart(raidPtr); - - splx(s); -} - -int -raidioctl(dp, cmd, data, flag, td) - struct disk *dp; - u_long cmd; - void *data; - int flag; - struct thread *td; -{ - struct raid_softc *sc; - RF_Raid_t *raidPtr; - RF_RaidDisk_t *diskPtr; - RF_AccTotals_t *totals; - RF_DeviceConfig_t *d_cfg, **ucfgp; - struct rf_recon_req *rrcopy, *rr; - RF_ComponentLabel_t *clabel; - RF_ComponentLabel_t *ci_label; - RF_SingleComponent_t *sparePtr,*componentPtr; - RF_SingleComponent_t *hot_spare, *component; - RF_ProgressInfo_t progressInfo; - int retcode = 0; - int row, column; - int unit; - int i, j, d; - - sc = dp->d_drv1; - raidPtr = sc->raidPtr; - - rf_printf(2, "raidioctl: %s%d %ld\n", dp->d_name, dp->d_unit, cmd); - - switch (cmd) { - - case RAIDFRAME_GET_COMPONENT_LABEL: - /* need to read the component label for the disk indicated - by row,column in clabel */ - - /* For practice, let's get it directly fromdisk, rather - than from the in-core copy */ - RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ), - (RF_ComponentLabel_t *)); - if (clabel == NULL) - return (ENOMEM); - - bzero((char *) clabel, sizeof(RF_ComponentLabel_t)); - - bcopy(data, clabel, sizeof(RF_ComponentLabel_t)); - - row = clabel->row; - column = clabel->column; - - if ((row < 0) || (row >= raidPtr->numRow) || - (column < 0) || (column >= raidPtr->numCol + - raidPtr->numSpare)) { - RF_Free( clabel, sizeof(RF_ComponentLabel_t)); - return(EINVAL); - } - - raidread_component_label(raidPtr->Disks[row][column].dev, - raidPtr->raid_cinfo[row][column].ci_vp, - clabel ); - - bcopy(clabel, data, sizeof(RF_ComponentLabel_t)); - RF_Free( clabel, sizeof(RF_ComponentLabel_t)); - return (retcode); - - case RAIDFRAME_SET_COMPONENT_LABEL: - clabel = (RF_ComponentLabel_t *) data; - - /* XXX check the label for valid stuff... */ - /* Note that some things *should not* get modified -- - the user should be re-initing the labels instead of - trying to patch things. - */ - - rf_printf(1, "Got component label:\n"); - rf_printf(1, "Version: %d\n",clabel->version); - rf_printf(1, "Serial Number: %d\n",clabel->serial_number); - rf_printf(1, "Mod counter: %d\n",clabel->mod_counter); - rf_printf(1, "Row: %d\n", clabel->row); - rf_printf(1, "Column: %d\n", clabel->column); - rf_printf(1, "Num Rows: %d\n", clabel->num_rows); - rf_printf(1, "Num Columns: %d\n", clabel->num_columns); - rf_printf(1, "Clean: %d\n", clabel->clean); - rf_printf(1, "Status: %d\n", clabel->status); - - row = clabel->row; - column = clabel->column; - - if ((row < 0) || (row >= raidPtr->numRow) || - (column < 0) || (column >= raidPtr->numCol)) { - return(EINVAL); - } - - /* XXX this isn't allowed to do anything for now :-) */ - - /* XXX and before it is, we need to fill in the rest - of the fields!?!?!?! */ -#if 0 - raidwrite_component_label( - raidPtr->Disks[row][column].dev, - raidPtr->raid_cinfo[row][column].ci_vp, - clabel ); -#endif - return (0); - - case RAIDFRAME_INIT_LABELS: - MALLOC(ci_label, RF_ComponentLabel_t *, - sizeof(RF_ComponentLabel_t), M_RAIDFRAME, - M_WAITOK | M_ZERO); - clabel = (RF_ComponentLabel_t *) data; - /* - we only want the serial number from - the above. We get all the rest of the information - from the config that was used to create this RAID - set. - */ - - raidPtr->serial_number = clabel->serial_number; - - raid_init_component_label(raidPtr, ci_label); - ci_label->serial_number = clabel->serial_number; - - for(row=0;row<raidPtr->numRow;row++) { - ci_label->row = row; - for(column=0;column<raidPtr->numCol;column++) { - diskPtr = &raidPtr->Disks[row][column]; - if (!RF_DEAD_DISK(diskPtr->status)) { - ci_label->partitionSize = - diskPtr->partitionSize; - ci_label->column = column; - raidwrite_component_label( - raidPtr->Disks[row][column].dev, - raidPtr->raid_cinfo[row][column].ci_vp, - ci_label ); - } - } - } - - FREE(ci_label, M_RAIDFRAME); - return (retcode); - case RAIDFRAME_SET_AUTOCONFIG: - d = rf_set_autoconfig(raidPtr, *(int *) data); - rf_printf(1, "New autoconfig value is: %d\n", d); - *(int *) data = d; - return (retcode); - - case RAIDFRAME_SET_ROOT: - d = rf_set_rootpartition(raidPtr, *(int *) data); - rf_printf(1, "New rootpartition value is: %d\n", d); - *(int *) data = d; - return (retcode); - - /* initialize all parity */ - case RAIDFRAME_REWRITEPARITY: - - if (raidPtr->Layout.map->faultsTolerated == 0) { - /* Parity for RAID 0 is trivially correct */ - raidPtr->parity_good = RF_RAID_CLEAN; - return(0); - } - - if (raidPtr->parity_rewrite_in_progress == 1) { - /* Re-write is already in progress! */ - return(EINVAL); - } - - retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, - rf_RewriteParityThread, - raidPtr,"raid_parity"); - return (retcode); - - - case RAIDFRAME_ADD_HOT_SPARE: - MALLOC(hot_spare, RF_SingleComponent_t *, - sizeof(RF_SingleComponent_t), M_RAIDFRAME, - M_WAITOK | M_ZERO); - sparePtr = (RF_SingleComponent_t *) data; - memcpy( hot_spare, sparePtr, sizeof(RF_SingleComponent_t)); - retcode = rf_add_hot_spare(raidPtr, hot_spare); - FREE(hot_spare, M_RAIDFRAME); - return(retcode); - - case RAIDFRAME_REMOVE_HOT_SPARE: - return(retcode); - - case RAIDFRAME_DELETE_COMPONENT: - MALLOC(component, RF_SingleComponent_t *, - sizeof(RF_SingleComponent_t), M_RAIDFRAME, - M_WAITOK | M_ZERO); - componentPtr = (RF_SingleComponent_t *)data; - memcpy( component, componentPtr, - sizeof(RF_SingleComponent_t)); - retcode = rf_delete_component(raidPtr, component); - FREE(component, M_RAIDFRAME); - return(retcode); - - case RAIDFRAME_INCORPORATE_HOT_SPARE: - MALLOC(component, RF_SingleComponent_t *, - sizeof(RF_SingleComponent_t), M_RAIDFRAME, - M_WAITOK | M_ZERO); - componentPtr = (RF_SingleComponent_t *)data; - memcpy( component, componentPtr, - sizeof(RF_SingleComponent_t)); - retcode = rf_incorporate_hot_spare(raidPtr, component); - FREE(component, M_RAIDFRAME); - return(retcode); - - case RAIDFRAME_REBUILD_IN_PLACE: - - MALLOC(component, RF_SingleComponent_t *, - sizeof(RF_SingleComponent_t), M_RAIDFRAME, - M_WAITOK | M_ZERO); - if (raidPtr->Layout.map->faultsTolerated == 0) { - /* Can't do this on a RAID 0!! */ - FREE(component, M_RAIDFRAME); - return(EINVAL); - } - - if (raidPtr->recon_in_progress == 1) { - /* a reconstruct is already in progress! */ - FREE(component, M_RAIDFRAME); - return(EINVAL); - } - - componentPtr = (RF_SingleComponent_t *) data; - memcpy( component, componentPtr, - sizeof(RF_SingleComponent_t)); - row = component->row; - column = component->column; - unit = raidPtr->raidid; - rf_printf(0, "raid%d Rebuild: %d %d\n", unit, row, column); - if ((row < 0) || (row >= raidPtr->numRow) || - (column < 0) || (column >= raidPtr->numCol)) { - FREE(component, M_RAIDFRAME); - return(EINVAL); - } - - RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); - if (rrcopy == NULL) { - FREE(component, M_RAIDFRAME); - return(ENOMEM); - } - - rrcopy->raidPtr = (void *) raidPtr; - rrcopy->row = row; - rrcopy->col = column; - - retcode = RF_CREATE_THREAD(raidPtr->recon_thread, - rf_ReconstructInPlaceThread, - rrcopy,"raid_reconip"); - FREE(component, M_RAIDFRAME); - return(retcode); - - case RAIDFRAME_GET_UNIT: - - *(int *)data = raidPtr->raidid; - return (0); - - case RAIDFRAME_GET_INFO: - if (!raidPtr->valid) - return (ENODEV); - ucfgp = (RF_DeviceConfig_t **) data; - RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t), - (RF_DeviceConfig_t *)); - if (d_cfg == NULL) - return (ENOMEM); - bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t)); - d_cfg->rows = raidPtr->numRow; - d_cfg->cols = raidPtr->numCol; - d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol; - if (d_cfg->ndevs >= RF_MAX_DISKS) { - RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); - return (ENOMEM); - } - d_cfg->nspares = raidPtr->numSpare; - if (d_cfg->nspares >= RF_MAX_DISKS) { - RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); - return (ENOMEM); - } - d_cfg->maxqdepth = raidPtr->maxQueueDepth; - d = 0; - for (i = 0; i < d_cfg->rows; i++) { - for (j = 0; j < d_cfg->cols; j++) { - d_cfg->devs[d] = raidPtr->Disks[i][j]; - d++; - } - } - for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) { - d_cfg->spares[i] = raidPtr->Disks[0][j]; - } - - retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t)); - - RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); - - return (retcode); - - case RAIDFRAME_CHECK_PARITY: - *(int *) data = raidPtr->parity_good; - return (0); - - case RAIDFRAME_RESET_ACCTOTALS: - bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals)); - return (0); - - case RAIDFRAME_GET_ACCTOTALS: - totals = (RF_AccTotals_t *) data; - *totals = raidPtr->acc_totals; - return (0); - - case RAIDFRAME_KEEP_ACCTOTALS: - raidPtr->keep_acc_totals = *(int *)data; - return (0); - - case RAIDFRAME_GET_SIZE: - *(int *) data = raidPtr->totalSectors; - return (0); - - /* fail a disk & optionally start reconstruction */ - case RAIDFRAME_FAIL_DISK: - - if (raidPtr->Layout.map->faultsTolerated == 0) { - /* Can't do this on a RAID 0!! */ - return(EINVAL); - } - - rr = (struct rf_recon_req *) data; - - if (rr->row < 0 || rr->row >= raidPtr->numRow - || rr->col < 0 || rr->col >= raidPtr->numCol) - return (EINVAL); - - rf_printf(0, "%s%d: Failing the disk: row: %d col: %d\n", - dp->d_name, dp->d_unit, rr->row, rr->col); - - /* make a copy of the recon request so that we don't rely on - * the user's buffer */ - RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); - if (rrcopy == NULL) - return(ENOMEM); - bcopy(rr, rrcopy, sizeof(*rr)); - rrcopy->raidPtr = (void *) raidPtr; - - retcode = RF_CREATE_THREAD(raidPtr->recon_thread, - rf_ReconThread, - rrcopy,"raid_recon"); - return (0); - - /* invoke a copyback operation after recon on whatever disk - * needs it, if any */ - case RAIDFRAME_COPYBACK: - - if (raidPtr->Layout.map->faultsTolerated == 0) { - /* This makes no sense on a RAID 0!! */ - return(EINVAL); - } - - if (raidPtr->copyback_in_progress == 1) { - /* Copyback is already in progress! */ - return(EINVAL); - } - - retcode = RF_CREATE_THREAD(raidPtr->copyback_thread, - rf_CopybackThread, - raidPtr,"raid_copyback"); - return (retcode); - - /* return the percentage completion of reconstruction */ - case RAIDFRAME_CHECK_RECON_STATUS: - if (raidPtr->Layout.map->faultsTolerated == 0) { - /* This makes no sense on a RAID 0, so tell the - user it's done. */ - *(int *) data = 100; - return(0); - } - row = 0; /* XXX we only consider a single row... */ - if (raidPtr->status[row] != rf_rs_reconstructing) - *(int *) data = 100; - else - *(int *) data = raidPtr->reconControl[row]->percentComplete; - return (0); - case RAIDFRAME_CHECK_RECON_STATUS_EXT: - row = 0; /* XXX we only consider a single row... */ - if (raidPtr->status[row] != rf_rs_reconstructing) { - progressInfo.remaining = 0; - progressInfo.completed = 100; - progressInfo.total = 100; - } else { - progressInfo.total = - raidPtr->reconControl[row]->numRUsTotal; - progressInfo.completed = - raidPtr->reconControl[row]->numRUsComplete; - progressInfo.remaining = progressInfo.total - - progressInfo.completed; - } - bcopy((caddr_t) &progressInfo, data, sizeof(RF_ProgressInfo_t)); - return (retcode); - - case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: - if (raidPtr->Layout.map->faultsTolerated == 0) { - /* This makes no sense on a RAID 0, so tell the - user it's done. */ - *(int *) data = 100; - return(0); - } - if (raidPtr->parity_rewrite_in_progress == 1) { - *(int *) data = 100 * - raidPtr->parity_rewrite_stripes_done / - raidPtr->Layout.numStripe; - } else { - *(int *) data = 100; - } - return (0); - - case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: - if (raidPtr->parity_rewrite_in_progress == 1) { - progressInfo.total = raidPtr->Layout.numStripe; - progressInfo.completed = - raidPtr->parity_rewrite_stripes_done; - progressInfo.remaining = progressInfo.total - - progressInfo.completed; - } else { - progressInfo.remaining = 0; - progressInfo.completed = 100; - progressInfo.total = 100; - } - bcopy((caddr_t) &progressInfo, data, sizeof(RF_ProgressInfo_t)); - return (retcode); - - case RAIDFRAME_CHECK_COPYBACK_STATUS: - if (raidPtr->Layout.map->faultsTolerated == 0) { - /* This makes no sense on a RAID 0 */ - *(int *) data = 100; - return(0); - } - if (raidPtr->copyback_in_progress == 1) { - *(int *) data = 100 * raidPtr->copyback_stripes_done / - raidPtr->Layout.numStripe; - } else { - *(int *) data = 100; - } - return (0); - - case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: - if (raidPtr->copyback_in_progress == 1) { - progressInfo.total = raidPtr->Layout.numStripe; - progressInfo.completed = - raidPtr->copyback_stripes_done; - progressInfo.remaining = progressInfo.total - - progressInfo.completed; - } else { - progressInfo.remaining = 0; - progressInfo.completed = 100; - progressInfo.total = 100; - } - bcopy((caddr_t) &progressInfo, data, sizeof(RF_ProgressInfo_t)); - return (retcode); - - /* the sparetable daemon calls this to wait for the kernel to - * need a spare table. this ioctl does not return until a - * spare table is needed. XXX -- calling mpsleep here in the - * ioctl code is almost certainly wrong and evil. -- XXX XXX - * -- I should either compute the spare table in the kernel, - * or have a different -- XXX XXX -- interface (a different - * character device) for delivering the table -- XXX */ -#if 0 - case RAIDFRAME_SPARET_WAIT: - RF_LOCK_MUTEX(rf_sparet_wait_mutex); - while (!rf_sparet_wait_queue) - mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE); - waitreq = rf_sparet_wait_queue; - rf_sparet_wait_queue = rf_sparet_wait_queue->next; - RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); - - /* structure assignment */ - *((RF_SparetWait_t *) data) = *waitreq; - - RF_Free(waitreq, sizeof(*waitreq)); - return (0); - - /* wakes up a process waiting on SPARET_WAIT and puts an error - * code in it that will cause the dameon to exit */ - case RAIDFRAME_ABORT_SPARET_WAIT: - RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); - waitreq->fcol = -1; - RF_LOCK_MUTEX(rf_sparet_wait_mutex); - waitreq->next = rf_sparet_wait_queue; - rf_sparet_wait_queue = waitreq; - RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); - wakeup(&rf_sparet_wait_queue); - return (0); - - /* used by the spare table daemon to deliver a spare table - * into the kernel */ - case RAIDFRAME_SEND_SPARET: - - /* install the spare table */ - retcode = rf_SetSpareTable(raidPtr, *(void **) data); - - /* respond to the requestor. the return status of the spare - * table installation is passed in the "fcol" field */ - RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); - waitreq->fcol = retcode; - RF_LOCK_MUTEX(rf_sparet_wait_mutex); - waitreq->next = rf_sparet_resp_queue; - rf_sparet_resp_queue = waitreq; - wakeup(&rf_sparet_resp_queue); - RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); - - return (retcode); -#endif - - default: - retcode = ENOIOCTL; - break; /* fall through to the os-specific code below */ - - } - - return (retcode); - -} - - -/* raidinit -- complete the rest of the initialization for the - RAIDframe device. */ - - -static struct raid_softc * -raidinit(raidPtr) - RF_Raid_t *raidPtr; -{ - struct raid_softc *sc; - - RF_Malloc(sc, sizeof(struct raid_softc), (struct raid_softc *)); - if (sc == NULL) { - rf_printf(1, "No memory for raid device\n"); - return(NULL); - } - - sc->raidPtr = raidPtr; - - /* XXX Should check return code here */ - bioq_init(&sc->bio_queue); - sc->sc_cbufpool = uma_zcreate("raidpl", sizeof(struct raidbuf), NULL, - NULL, NULL, NULL, 0, 0); - - /* XXX There may be a weird interaction here between this, and - * protectedSectors, as used in RAIDframe. */ - - sc->sc_size = raidPtr->totalSectors; - - /* Create the disk device */ - sc->sc_disk = disk_alloc(); - sc->sc_disk->d_open = raidopen; - sc->sc_disk->d_close = raidclose; - sc->sc_disk->d_ioctl = raidioctl; - sc->sc_disk->d_strategy = raidstrategy; - sc->sc_disk->d_drv1 = sc; - sc->sc_disk->d_maxsize = DFLTPHYS; - sc->sc_disk->d_name = "raid"; - sc->sc_disk->d_unit = raidPtr->raidid; - sc->sc_disk->d_flags = DISKFLAG_NEEDSGIANT; - disk_create(sc->sc_disk, DISK_VERSION); - raidPtr->sc = sc; - - return (sc); -} - -/* wake up the daemon & tell it to get us a spare table - * XXX - * the entries in the queues should be tagged with the raidPtr - * so that in the extremely rare case that two recons happen at once, - * we know for which device were requesting a spare table - * XXX - * - * XXX This code is not currently used. GO - */ -int -rf_GetSpareTableFromDaemon(req) - RF_SparetWait_t *req; -{ - int retcode; - - RF_LOCK_MUTEX(rf_sparet_wait_mutex); - req->next = rf_sparet_wait_queue; - rf_sparet_wait_queue = req; - wakeup(&rf_sparet_wait_queue); - - /* mpsleep unlocks the mutex */ - while (!rf_sparet_resp_queue) { - tsleep(&rf_sparet_resp_queue, PRIBIO, - "raidframe getsparetable", 0); - } - req = rf_sparet_resp_queue; - rf_sparet_resp_queue = req->next; - RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); - - retcode = req->fcol; - RF_Free(req, sizeof(*req)); /* this is not the same req as we - * alloc'd */ - return (retcode); -} - -/* a wrapper around rf_DoAccess that extracts appropriate info from the - * bp & passes it down. - * any calls originating in the kernel must use non-blocking I/O - * do some extra sanity checking to return "appropriate" error values for - * certain conditions (to make some standard utilities work) - * - * Formerly known as: rf_DoAccessKernel - */ -void -raidstart(raidPtr) - RF_Raid_t *raidPtr; -{ - RF_SectorCount_t num_blocks, pb, sum; - RF_RaidAddr_t raid_addr; - struct raid_softc *sc; - struct bio *bp; - daddr_t blocknum; - int unit, retcode, do_async; - - unit = raidPtr->raidid; - sc = raidPtr->sc; - - /* quick check to see if anything has died recently */ - RF_LOCK_MUTEX(raidPtr->mutex); - if (raidPtr->numNewFailures > 0) { - raidPtr->numNewFailures--; - RF_UNLOCK_MUTEX(raidPtr->mutex); - rf_update_component_labels(raidPtr, - RF_NORMAL_COMPONENT_UPDATE); - } else - RF_UNLOCK_MUTEX(raidPtr->mutex); - - /* Check to see if we're at the limit... */ - RF_LOCK_MUTEX(raidPtr->mutex); - while (raidPtr->openings > 0) { - RF_UNLOCK_MUTEX(raidPtr->mutex); - - /* get the next item, if any, from the queue */ - if ((bp = bioq_first(&sc->bio_queue)) == NULL) { - /* nothing more to do */ - return; - } - bioq_remove(&sc->bio_queue, bp); - - /* Ok, for the bp we have here, bp->b_blkno is relative to the - * partition.. Need to make it absolute to the underlying - * device.. */ - - blocknum = bp->bio_pblkno = - bp->bio_offset >> raidPtr->logBytesPerSector; - - rf_printf(3, "Blocks: %ld, %ld\n", (long)bp->bio_pblkno, (long)blocknum); - - rf_printf(3, "bp->bio_bcount = %d\n", (int) bp->bio_bcount); - rf_printf(3, "bp->bio_resid = %d\n", (int) bp->bio_resid); - - /* *THIS* is where we adjust what block we're going to... - * but DO NOT TOUCH bp->bio_pblkno!!! */ - raid_addr = blocknum; - - num_blocks = bp->bio_bcount >> raidPtr->logBytesPerSector; - pb = (bp->bio_bcount & raidPtr->sectorMask) ? 1 : 0; - sum = raid_addr + num_blocks + pb; - if (rf_debugKernelAccess) { - rf_printf(0, "raid_addr=0x%x sum=%d num_blocks=%d(+%d) " - "(%d)\n", (int)raid_addr, (int)sum, - (int)num_blocks, (int)pb, - (int)bp->bio_resid); - } - if ((sum > raidPtr->totalSectors) || (sum < raid_addr) - || (sum < num_blocks) || (sum < pb)) { - bp->bio_error = ENOSPC; - bp->bio_flags |= BIO_ERROR; - bp->bio_resid = bp->bio_bcount; - biodone(bp); - RF_LOCK_MUTEX(raidPtr->mutex); - continue; - } - /* - * XXX rf_DoAccess() should do this, not just DoAccessKernel() - */ - - if (bp->bio_bcount & raidPtr->sectorMask) { - bp->bio_error = EINVAL; - bp->bio_flags |= BIO_ERROR; - bp->bio_resid = bp->bio_bcount; - biodone(bp); - RF_LOCK_MUTEX(raidPtr->mutex); - continue; - - } - rf_printf(3, "Calling DoAccess..\n"); - - - RF_LOCK_MUTEX(raidPtr->mutex); - raidPtr->openings--; - RF_UNLOCK_MUTEX(raidPtr->mutex); - - /* - * Everything is async. - */ - do_async = 1; - - /* XXX we're still at splbio() here... do we *really* - need to be? */ - - /* don't ever condition on bp->bio_cmd & BIO_WRITE. - * always condition on BIO_READ instead */ - - retcode = rf_DoAccess(raidPtr, (bp->bio_cmd & BIO_READ) ? - RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, - do_async, raid_addr, num_blocks, - bp->bio_data, bp, NULL, NULL, - RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL); - - - RF_LOCK_MUTEX(raidPtr->mutex); - } - RF_UNLOCK_MUTEX(raidPtr->mutex); -} - - - - -/* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ - -int -rf_DispatchKernelIO(queue, req) - RF_DiskQueue_t *queue; - RF_DiskQueueData_t *req; -{ - int op = (req->type == RF_IO_TYPE_READ) ? BIO_READ : BIO_WRITE; - struct bio *bp; - struct raidbuf *raidbp = NULL; - struct raid_softc *sc; - - /* XXX along with the vnode, we also need the softc associated with - * this device.. */ - - req->queue = queue; - - sc = queue->raidPtr->sc; - - rf_printf(3, "DispatchKernelIO %s\n", sc->sc_disk->d_name); - - bp = req->bp; -#if 1 - /* XXX when there is a physical disk failure, someone is passing us a - * buffer that contains old stuff!! Attempt to deal with this problem - * without taking a performance hit... (not sure where the real bug - * is. It's buried in RAIDframe somewhere) :-( GO ) */ - - if (bp->bio_flags & BIO_ERROR) { - bp->bio_flags &= ~BIO_ERROR; - } - if (bp->bio_error != 0) { - bp->bio_error = 0; - } -#endif - raidbp = RAIDGETBUF(sc); - - raidbp->rf_flags = 0; /* XXX not really used anywhere... */ - - /* - * context for raidiodone - */ - raidbp->rf_obp = bp; - raidbp->req = req; - -#if 0 /* XXX */ - LIST_INIT(&raidbp->rf_buf.b_dep); -#endif - - switch (req->type) { - case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ - /* XXX need to do something extra here.. */ - /* I'm leaving this in, as I've never actually seen it used, - * and I'd like folks to report it... GO */ - rf_printf(2, "WAKEUP CALLED\n"); - queue->numOutstanding++; - - /* XXX need to glue the original buffer into this? */ - - KernelWakeupFunc(&raidbp->rf_buf); - break; - - case RF_IO_TYPE_READ: - case RF_IO_TYPE_WRITE: - - if (req->tracerec) { - RF_ETIMER_START(req->tracerec->timer); - } - InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp, - op | bp->bio_cmd, queue->rf_cinfo->ci_dev, - req->sectorOffset, req->numSector, - req->buf, KernelWakeupFunc, (void *) req, - queue->raidPtr->logBytesPerSector, req->b_proc); - - if (rf_debugKernelAccess) { - rf_printf(0, "dispatch: bp->bio_pblkno = %ld\n", - (long) bp->bio_pblkno); - } - queue->numOutstanding++; - queue->last_deq_sector = req->sectorOffset; - /* acc wouldn't have been let in if there were any pending - * reqs at any other priority */ - queue->curPriority = req->priority; - - rf_printf(3, "Going for %c to %s%d row %d col %d\n", - req->type, sc->sc_disk->d_name, - sc->sc_disk->d_unit, queue->row, queue->col); - rf_printf(3, "sector %d count %d (%d bytes) %d\n", - (int) req->sectorOffset, (int) req->numSector, - (int) (req->numSector << - queue->raidPtr->logBytesPerSector), - (int) queue->raidPtr->logBytesPerSector); -#if 0 /* XXX */ - if ((raidbp->rf_buf.bio_cmd & BIO_READ) == 0) { - raidbp->rf_buf.b_vp->v_numoutput++; - } -#endif - (*devsw(raidbp->rf_buf.bio_dev)->d_strategy)(&raidbp->rf_buf); - - break; - - default: - panic("bad req->type in rf_DispatchKernelIO"); - } - rf_printf(3, "Exiting from DispatchKernelIO\n"); - /* splx(s); */ /* want to test this */ - return (0); -} -/* This is the callback function associated with an I/O invoked from - kernel code. - */ -static void -KernelWakeupFunc(vbp) - struct bio *vbp; -{ - RF_DiskQueueData_t *req = NULL; - RF_DiskQueue_t *queue; - struct raidbuf *raidbp = (struct raidbuf *) vbp; - struct bio *bp; - struct raid_softc *sc; - int s; - - s = splbio(); - rf_printf(2, "recovering the request queue:\n"); - req = raidbp->req; - - bp = raidbp->rf_obp; - queue = (RF_DiskQueue_t *) req->queue; - sc = queue->raidPtr->sc; - - if (raidbp->rf_buf.bio_flags & BIO_ERROR) { - bp->bio_flags |= BIO_ERROR; - bp->bio_error = raidbp->rf_buf.bio_error ? - raidbp->rf_buf.bio_error : EIO; - } - - /* XXX methinks this could be wrong... */ -#if 1 - bp->bio_resid = raidbp->rf_buf.bio_resid; -#endif - - if (req->tracerec) { - RF_ETIMER_STOP(req->tracerec->timer); - RF_ETIMER_EVAL(req->tracerec->timer); - RF_LOCK_MUTEX(rf_tracing_mutex); - req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); - req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); - req->tracerec->num_phys_ios++; - RF_UNLOCK_MUTEX(rf_tracing_mutex); - } - bp->bio_bcount = raidbp->rf_buf.bio_bcount; /* XXXX ? */ - - /* XXX Ok, let's get aggressive... If BIO_ERROR is set, let's go - * ballistic, and mark the component as hosed... */ - - if (bp->bio_flags & BIO_ERROR) { - /* Mark the disk as dead */ - /* but only mark it once... */ - if (queue->raidPtr->Disks[queue->row][queue->col].status == - rf_ds_optimal) { - rf_printf(0, "%s%d: IO Error. Marking %s as " - "failed.\n", sc->sc_disk->d_name, sc->sc_disk->d_unit, - queue->raidPtr->Disks[queue->row][queue->col].devname); - queue->raidPtr->Disks[queue->row][queue->col].status = - rf_ds_failed; - queue->raidPtr->status[queue->row] = rf_rs_degraded; - queue->raidPtr->numFailures++; - queue->raidPtr->numNewFailures++; - } else { /* Disk is already dead... */ - /* printf("Disk already marked as dead!\n"); */ - } - - } - - RAIDPUTBUF(sc, raidbp); - - rf_DiskIOComplete(queue, req, (bp->bio_flags & BIO_ERROR) ? 1 : 0); - (req->CompleteFunc)(req->argument, (bp->bio_flags & BIO_ERROR) ? 1 : 0); - - splx(s); -} - - - -/* - * initialize a buf structure for doing an I/O in the kernel. - */ -static void -InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg, - logBytesPerSector, b_proc) - struct bio *bp; - struct vnode *b_vp; - unsigned rw_flag; - dev_t dev; - RF_SectorNum_t startSect; - RF_SectorCount_t numSect; - caddr_t buf; - void (*cbFunc) (struct bio *); - void *cbArg; - int logBytesPerSector; - struct proc *b_proc; -{ - bp->bio_cmd = rw_flag; - bp->bio_bcount = numSect << logBytesPerSector; -#if 0 /* XXX */ - bp->bio_bufsize = bp->bio_bcount; -#endif - bp->bio_error = 0; - bp->bio_dev = dev; - bp->bio_data = buf; - bp->bio_resid = bp->bio_bcount; /* XXX is this right!?!?!! */ - bp->bio_offset = startSect << logBytesPerSector; - if (bp->bio_bcount == 0) { - panic("bp->bio_bcount is zero in InitBP!!\n"); - } -/* - bp->b_proc = b_proc; - bp->b_vp = b_vp; -*/ - bp->bio_done = cbFunc; - -} - -static void -raidgetdefaultlabel(raidPtr, sc, dp) - RF_Raid_t *raidPtr; - struct raid_softc *sc; - struct disk *dp; -{ - rf_printf(1, "Building a default label...\n"); - if (dp == NULL) - panic("raidgetdefaultlabel(): dp is NULL\n"); - - /* fabricate a label... */ - dp->d_mediasize = raidPtr->totalSectors * raidPtr->bytesPerSector; - dp->d_sectorsize = raidPtr->bytesPerSector; - dp->d_fwsectors = raidPtr->Layout.dataSectorsPerStripe; - dp->d_fwheads = 4 * raidPtr->numCol; - -} -/* - * Lookup the provided name in the filesystem. If the file exists, - * is a valid block device, and isn't being used by anyone else, - * set *vpp to the file's vnode. - * You'll find the original of this in ccd.c - */ -int -raidlookup(path, td, vpp) - char *path; - struct thread *td; - struct vnode **vpp; /* result */ -{ - struct nameidata *nd; - struct vnode *vp; - struct vattr *va; - struct proc *p; - int error = 0, flags; - - MALLOC(nd, struct nameidata *, sizeof(struct nameidata), M_TEMP, M_NOWAIT | M_ZERO); - MALLOC(va, struct vattr *, sizeof(struct vattr), M_TEMP, M_NOWAIT | M_ZERO); - if ((nd == NULL) || (va == NULL)) { - printf("Out of memory?\n"); - return (ENOMEM); - } - - /* Sanity check the p_fd fields. This is really just a hack */ - p = td->td_proc; - if (!p->p_fd->fd_rdir || !p->p_fd->fd_cdir) - printf("Warning: p_fd fields not set\n"); - - if (!td->td_proc->p_fd->fd_rdir) - p->p_fd->fd_rdir = rootvnode; - - if (!p->p_fd->fd_cdir) - p->p_fd->fd_cdir = rootvnode; - - NDINIT(nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, curthread); - flags = FREAD | FWRITE; - if ((error = vn_open(nd, &flags, 0, -1)) != 0) { - rf_printf(2, "RAIDframe: vn_open returned %d\n", error); - goto end1; - } - vp = nd->ni_vp; - if (vp->v_usecount > 1) { - rf_printf(1, "raidlookup() vp->v_usecount= %d\n", vp->v_usecount); - error = EBUSY; - goto end; - } - if ((error = VOP_GETATTR(vp, va, td->td_ucred, td)) != 0) { - rf_printf(1, "raidlookup() VOP_GETATTR returned %d", error); - goto end; - } - /* XXX: eventually we should handle VREG, too. */ - if (va->va_type != VCHR) { - rf_printf(1, "Returning ENOTBLK\n"); - error = ENOTBLK; - } - *vpp = vp; - -end: - VOP_UNLOCK(vp, 0, td); - NDFREE(nd, NDF_ONLY_PNBUF); -end1: - FREE(nd, M_TEMP); - FREE(va, M_TEMP); - return (error); -} -/* - * Wait interruptibly for an exclusive lock. - * - * XXX - * Several drivers do this; it should be abstracted and made MP-safe. - * (Hmm... where have we seen this warning before :-> GO ) - */ -static int -raidlock(sc) - struct raid_softc *sc; -{ - int error; - - while ((sc->sc_flags & RAIDF_LOCKED) != 0) { - sc->sc_flags |= RAIDF_WANTED; - if ((error = - tsleep(sc, PRIBIO | PCATCH, "raidlck", 0)) != 0) - return (error); - } - sc->sc_flags |= RAIDF_LOCKED; - return (0); -} -/* - * Unlock and wake up any waiters. - */ -static void -raidunlock(sc) - struct raid_softc *sc; -{ - - sc->sc_flags &= ~RAIDF_LOCKED; - if ((sc->sc_flags & RAIDF_WANTED) != 0) { - sc->sc_flags &= ~RAIDF_WANTED; - wakeup(sc); - } -} - - -#define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ -#define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ - -int -raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter) -{ - RF_ComponentLabel_t *clabel; - - MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t), - M_RAIDFRAME, M_NOWAIT | M_ZERO); - if (clabel == NULL) { - printf("raidmarkclean: Out of memory?\n"); - return (ENOMEM); - } - - raidread_component_label(dev, b_vp, clabel); - clabel->mod_counter = mod_counter; - clabel->clean = RF_RAID_CLEAN; - raidwrite_component_label(dev, b_vp, clabel); - FREE(clabel, M_RAIDFRAME); - return(0); -} - - -int -raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter) -{ - RF_ComponentLabel_t *clabel; - - MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t), - M_RAIDFRAME, M_NOWAIT | M_ZERO); - if (clabel == NULL) { - printf("raidmarkclean: Out of memory?\n"); - return (ENOMEM); - } - - raidread_component_label(dev, b_vp, clabel); - clabel->mod_counter = mod_counter; - clabel->clean = RF_RAID_DIRTY; - raidwrite_component_label(dev, b_vp, clabel); - FREE(clabel, M_RAIDFRAME); - return(0); -} - -/* ARGSUSED */ -int -raidread_component_label(dev, b_vp, clabel) - dev_t dev; - struct vnode *b_vp; - RF_ComponentLabel_t *clabel; -{ - struct buf *bp; - int error; - - /* XXX should probably ensure that we don't try to do this if - someone has changed rf_protected_sectors. */ - - if (b_vp == NULL) { - /* For whatever reason, this component is not valid. - Don't try to read a component label from it. */ - return(EINVAL); - } - - /* get a block of the appropriate size... */ - bp = geteblk((int)RF_COMPONENT_INFO_SIZE); - bp->b_dev = dev; - - /* get our ducks in a row for the read */ - bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; - bp->b_iooffset = RF_COMPONENT_INFO_OFFSET; - bp->b_bcount = RF_COMPONENT_INFO_SIZE; - bp->b_iocmd = BIO_READ; - bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; - - DEV_STRATEGY(bp); - error = bufwait(bp); - - if (!error) { - memcpy(clabel, bp->b_data, sizeof(RF_ComponentLabel_t)); -#if 0 - rf_print_component_label( clabel ); -#endif - } else { -#if 0 - rf_printf(0, "Failed to read RAID component label!\n"); -#endif - } - - bp->b_flags |= B_INVAL | B_AGE; - brelse(bp); - return(error); -} -/* ARGSUSED */ -int -raidwrite_component_label(dev, b_vp, clabel) - dev_t dev; - struct vnode *b_vp; - RF_ComponentLabel_t *clabel; -{ - struct buf *bp; - int error; - - /* get a block of the appropriate size... */ - bp = geteblk((int)RF_COMPONENT_INFO_SIZE); - bp->b_dev = dev; - - /* get our ducks in a row for the write */ - bp->b_flags = 0; - bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; - bp->b_iooffset = RF_COMPONENT_INFO_OFFSET; - bp->b_bcount = RF_COMPONENT_INFO_SIZE; - bp->b_iocmd = BIO_WRITE; - bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; - - memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE ); - - memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t)); - - DEV_STRATEGY(bp); - error = bufwait(bp); - - bp->b_flags |= B_INVAL | B_AGE; - brelse(bp); - if (error) { -#if 1 - rf_printf(0, "Failed to write RAID component info!\n"); - rf_printf(0, "b_error= %d\n", bp->b_error); -#endif - } - - return(error); -} - -void -rf_markalldirty(raidPtr) - RF_Raid_t *raidPtr; -{ - RF_ComponentLabel_t *clabel; - int r,c; - - MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t), - M_RAIDFRAME, M_NOWAIT | M_ZERO); - - if (clabel == NULL) { - printf("rf_markalldirty: Out of memory?\n"); - return; - } - - raidPtr->mod_counter++; - for (r = 0; r < raidPtr->numRow; r++) { - for (c = 0; c < raidPtr->numCol; c++) { - /* we don't want to touch (at all) a disk that has - failed */ - if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) { - raidread_component_label( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - clabel); - if (clabel->status == rf_ds_spared) { - /* XXX do something special... - but whatever you do, don't - try to access it!! */ - } else { -#if 0 - clabel->status = - raidPtr->Disks[r][c].status; - raidwrite_component_label( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - clabel); -#endif - raidmarkdirty( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - raidPtr->mod_counter); - } - } - } - } - /* printf("Component labels marked dirty.\n"); */ -#if 0 - for( c = 0; c < raidPtr->numSpare ; c++) { - sparecol = raidPtr->numCol + c; - if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) { - /* - - XXX this is where we get fancy and map this spare - into it's correct spot in the array. - - */ - /* - - we claim this disk is "optimal" if it's - rf_ds_used_spare, as that means it should be - directly substitutable for the disk it replaced. - We note that too... - - */ - - for(i=0;i<raidPtr->numRow;i++) { - for(j=0;j<raidPtr->numCol;j++) { - if ((raidPtr->Disks[i][j].spareRow == - r) && - (raidPtr->Disks[i][j].spareCol == - sparecol)) { - srow = r; - scol = sparecol; - break; - } - } - } - - raidread_component_label( - raidPtr->Disks[r][sparecol].dev, - raidPtr->raid_cinfo[r][sparecol].ci_vp, - &clabel); - /* make sure status is noted */ - clabel.version = RF_COMPONENT_LABEL_VERSION; - clabel.mod_counter = raidPtr->mod_counter; - clabel.serial_number = raidPtr->serial_number; - clabel.row = srow; - clabel.column = scol; - clabel.num_rows = raidPtr->numRow; - clabel.num_columns = raidPtr->numCol; - clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/ - clabel.status = rf_ds_optimal; - raidwrite_component_label( - raidPtr->Disks[r][sparecol].dev, - raidPtr->raid_cinfo[r][sparecol].ci_vp, - &clabel); - raidmarkclean( raidPtr->Disks[r][sparecol].dev, - raidPtr->raid_cinfo[r][sparecol].ci_vp); - } - } - -#endif - FREE(clabel, M_RAIDFRAME); -} - - -void -rf_update_component_labels(raidPtr, final) - RF_Raid_t *raidPtr; - int final; -{ - RF_ComponentLabel_t *clabel; - int sparecol; - int r,c; - int i,j; - int srow, scol; - - srow = -1; - scol = -1; - - MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t), - M_RAIDFRAME, M_NOWAIT | M_ZERO); - if (clabel == NULL) { - printf("rf_update_component_labels: Out of memory?\n"); - return; - } - - /* XXX should do extra checks to make sure things really are clean, - rather than blindly setting the clean bit... */ - - raidPtr->mod_counter++; - - for (r = 0; r < raidPtr->numRow; r++) { - for (c = 0; c < raidPtr->numCol; c++) { - if (raidPtr->Disks[r][c].status == rf_ds_optimal) { - raidread_component_label( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - clabel); - /* make sure status is noted */ - clabel->status = rf_ds_optimal; - /* bump the counter */ - clabel->mod_counter = raidPtr->mod_counter; - - raidwrite_component_label( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - clabel); - if (final == RF_FINAL_COMPONENT_UPDATE) { - if (raidPtr->parity_good == RF_RAID_CLEAN) { - raidmarkclean( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - raidPtr->mod_counter); - } - } - } - /* else we don't touch it.. */ - } - } - - for( c = 0; c < raidPtr->numSpare ; c++) { - sparecol = raidPtr->numCol + c; - if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) { - /* - - we claim this disk is "optimal" if it's - rf_ds_used_spare, as that means it should be - directly substitutable for the disk it replaced. - We note that too... - - */ - - for(i=0;i<raidPtr->numRow;i++) { - for(j=0;j<raidPtr->numCol;j++) { - if ((raidPtr->Disks[i][j].spareRow == - 0) && - (raidPtr->Disks[i][j].spareCol == - sparecol)) { - srow = i; - scol = j; - break; - } - } - } - - /* XXX shouldn't *really* need this... */ - raidread_component_label( - raidPtr->Disks[0][sparecol].dev, - raidPtr->raid_cinfo[0][sparecol].ci_vp, - clabel); - /* make sure status is noted */ - - raid_init_component_label(raidPtr, clabel); - - clabel->mod_counter = raidPtr->mod_counter; - clabel->row = srow; - clabel->column = scol; - clabel->status = rf_ds_optimal; - - raidwrite_component_label( - raidPtr->Disks[0][sparecol].dev, - raidPtr->raid_cinfo[0][sparecol].ci_vp, - clabel); - if (final == RF_FINAL_COMPONENT_UPDATE) { - if (raidPtr->parity_good == RF_RAID_CLEAN) { - raidmarkclean( raidPtr->Disks[0][sparecol].dev, - raidPtr->raid_cinfo[0][sparecol].ci_vp, - raidPtr->mod_counter); - } - } - } - } - FREE(clabel, M_RAIDFRAME); - rf_printf(1, "Component labels updated\n"); -} - -void -rf_close_component(raidPtr, vp, auto_configured) - RF_Raid_t *raidPtr; - struct vnode *vp; - int auto_configured; -{ - struct thread *td; - - td = raidPtr->engine_thread; - - if (vp != NULL) { - if (auto_configured == 1) { - VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td); - - vrele(vp); - } else { - vn_close(vp, FREAD | FWRITE, td->td_ucred, td); - } - } else { - rf_printf(1, "vnode was NULL\n"); - } -} - - -void -rf_UnconfigureVnodes(raidPtr) - RF_Raid_t *raidPtr; -{ - int r,c; - struct thread *td; - struct vnode *vp; - int acd; - - - /* We take this opportunity to close the vnodes like we should.. */ - - td = raidPtr->engine_thread; - - for (r = 0; r < raidPtr->numRow; r++) { - for (c = 0; c < raidPtr->numCol; c++) { - rf_printf(1, "Closing vnode for row: %d col: %d\n", r, c); - vp = raidPtr->raid_cinfo[r][c].ci_vp; - acd = raidPtr->Disks[r][c].auto_configured; - rf_close_component(raidPtr, vp, acd); - raidPtr->raid_cinfo[r][c].ci_vp = NULL; - raidPtr->Disks[r][c].auto_configured = 0; - } - } - for (r = 0; r < raidPtr->numSpare; r++) { - rf_printf(1, "Closing vnode for spare: %d\n", r); - vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp; - acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured; - rf_close_component(raidPtr, vp, acd); - raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL; - raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0; - } -} - - -void -rf_ReconThread(req) - struct rf_recon_req *req; -{ - RF_Raid_t *raidPtr; - - mtx_lock(&Giant); - raidPtr = (RF_Raid_t *) req->raidPtr; - raidPtr->recon_in_progress = 1; - - rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col, - ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); - - /* XXX get rid of this! we don't need it at all.. */ - RF_Free(req, sizeof(*req)); - - raidPtr->recon_in_progress = 0; - - /* That's all... */ - RF_THREAD_EXIT(0); /* does not return */ -} - -void -rf_RewriteParityThread(raidPtr) - RF_Raid_t *raidPtr; -{ - int retcode; - - mtx_lock(&Giant); - raidPtr->parity_rewrite_in_progress = 1; - retcode = rf_RewriteParity(raidPtr); - if (retcode) { - rf_printf(0, "raid%d: Error re-writing parity!\n",raidPtr->raidid); - } else { - /* set the clean bit! If we shutdown correctly, - the clean bit on each component label will get - set */ - raidPtr->parity_good = RF_RAID_CLEAN; - } - raidPtr->parity_rewrite_in_progress = 0; - - /* Anyone waiting for us to stop? If so, inform them... */ - if (raidPtr->waitShutdown) { - wakeup(&raidPtr->parity_rewrite_in_progress); - } - - /* That's all... */ - RF_THREAD_EXIT(0); /* does not return */ -} - - -void -rf_CopybackThread(raidPtr) - RF_Raid_t *raidPtr; -{ - mtx_lock(&Giant); - raidPtr->copyback_in_progress = 1; - rf_CopybackReconstructedData(raidPtr); - raidPtr->copyback_in_progress = 0; - - /* That's all... */ - RF_THREAD_EXIT(0); /* does not return */ -} - - -void -rf_ReconstructInPlaceThread(req) - struct rf_recon_req *req; -{ - int retcode; - RF_Raid_t *raidPtr; - - mtx_lock(&Giant); - raidPtr = req->raidPtr; - raidPtr->recon_in_progress = 1; - retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col); - RF_Free(req, sizeof(*req)); - raidPtr->recon_in_progress = 0; - - /* That's all... */ - RF_THREAD_EXIT(0); /* does not return */ -} - -RF_AutoConfig_t * -rf_find_raid_components() -{ - RF_AutoConfig_t *ac_list = NULL; -#if 0 /* XXX GEOM */ - struct vnode *vp; - struct disklabel *label; - struct diskslice *slice; - struct diskslices *slices; - struct disk *disk; - struct thread *td; - dev_t dev; - char *devname; - int error, j; - int nslices; - - td = curthread; - - MALLOC(label, struct disklabel *, sizeof(struct disklabel), - M_RAIDFRAME, M_NOWAIT|M_ZERO); - MALLOC(slices, struct diskslices *, sizeof(struct diskslices), - M_RAIDFRAME, M_NOWAIT|M_ZERO); - if ((label == NULL) || (slices == NULL)) { - printf("rf_find_raid_components: Out of Memory?\n"); - return (NULL); - } - - /* initialize the AutoConfig list */ - ac_list = NULL; - - /* we begin by trolling through *all* the disk devices on the system */ - - disk = NULL; - while ((disk = disk_enumerate(disk))) { - - /* we don't care about floppies... */ - devname = disk->d_dev->si_name; - if (!strncmp(devname, "fd", 2) || - !strncmp(devname, "cd", 2) || - !strncmp(devname, "acd", 3)) - continue; - - rf_printf(1, "Examining %s\n", disk->d_dev->si_name); - if (bdevvp(disk->d_dev, &vp)) - panic("RAIDframe can't alloc vnode"); - vref(vp); - - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); - error = VOP_OPEN(vp, FREAD, td->td_ucred, td, -1); - VOP_UNLOCK(vp, 0, td); - if (error) { - vput(vp); - continue; - } - - error = VOP_IOCTL(vp, DIOCGSLICEINFO, (caddr_t)slices, - FREAD, td->td_ucred, td); - VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td); - vrele(vp); - if (error) { - /* No slice table. */ - continue; - } - - nslices = slices->dss_nslices; - if ((nslices == 0) || (nslices > MAX_SLICES)) - continue; - - /* Iterate through the slices */ - for (j = 1; j < nslices; j++) { - - rf_printf(1, "Examining slice %d\n", j); - slice = &slices->dss_slices[j - 1]; - dev = dkmodslice(disk->d_dev, j); - if (bdevvp(dev, &vp)) - panic("RAIDframe can't alloc vnode"); - - vref(vp); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); - error = VOP_OPEN(vp, FREAD, td->td_ucred, td, -1); - VOP_UNLOCK(vp, 0, td); - if (error) { - continue; - } - - error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)label, - FREAD, td->td_ucred, td); - VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td); - vrele(vp); - if (error) - continue; - - rf_search_label(dev, label, &ac_list); - } - } - - FREE(label, M_RAIDFRAME); - FREE(slices, M_RAIDFRAME); -#endif - return (ac_list); -} - -static void -rf_search_label(dev_t dev, struct disklabel *label, RF_AutoConfig_t **ac_list) -{ - RF_AutoConfig_t *ac; - RF_ComponentLabel_t *clabel; - struct vnode *vp; - struct thread *td; - dev_t dev1; - int i, error, good_one; - - td = curthread; - - /* Iterate through the partitions */ - for (i=0; i < label->d_npartitions; i++) { - /* We only support partitions marked as RAID */ - if (label->d_partitions[i].p_fstype != FS_RAID) - continue; - -#if 0 /* GEOM */ - dev1 = dkmodpart(dev, i); -#else - dev1 = NULL; -#endif - if (dev1 == NULL) { - rf_printf(1, "dev1 == null\n"); - continue; - } - if (bdevvp(dev1, &vp)) - panic("RAIDframe can't alloc vnode"); - - vref(vp); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); - error = VOP_OPEN(vp, FREAD, td->td_ucred, td, -1); - VOP_UNLOCK(vp, 0, td); - if (error) { - /* Whatever... */ - continue; - } - - good_one = 0; - - clabel = (RF_ComponentLabel_t *) - malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, - M_NOWAIT); - if (clabel == NULL) { - /* XXX CLEANUP HERE */ - panic("RAID autoconfig: no memory!\n"); - } - - if (!raidread_component_label(dev1, vp, clabel)) { - /* Got the label. Is it reasonable? */ - if (rf_reasonable_label(clabel) && - (clabel->partitionSize <= - label->d_partitions[i].p_size)) { - rf_printf(1, "Component on: %s: %d\n", - dev1->si_name, label->d_partitions[i].p_size); - rf_print_component_label(clabel); - /* if it's reasonable, add it, else ignore it */ - ac = (RF_AutoConfig_t *) - malloc(sizeof(RF_AutoConfig_t), - M_RAIDFRAME, M_NOWAIT); - if (ac == NULL) { - /* XXX should panic? */ - panic("RAID autoconfig: no memory!\n"); - } - - sprintf(ac->devname, "%s", dev->si_name); - ac->dev = dev1; - ac->vp = vp; - ac->clabel = clabel; - ac->next = *ac_list; - *ac_list = ac; - good_one = 1; - } - } - if (!good_one) { - /* cleanup */ - free(clabel, M_RAIDFRAME); - VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td); - vrele(vp); - } - } -} - -static int -rf_reasonable_label(clabel) - RF_ComponentLabel_t *clabel; -{ - - if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || - (clabel->version==RF_COMPONENT_LABEL_VERSION)) && - ((clabel->clean == RF_RAID_CLEAN) || - (clabel->clean == RF_RAID_DIRTY)) && - clabel->row >=0 && - clabel->column >= 0 && - clabel->num_rows > 0 && - clabel->num_columns > 0 && - clabel->row < clabel->num_rows && - clabel->column < clabel->num_columns && - clabel->blockSize > 0 && - clabel->numBlocks > 0) { - /* label looks reasonable enough... */ - return(1); - } - return(0); -} - - -void -rf_print_component_label(clabel) - RF_ComponentLabel_t *clabel; -{ - rf_printf(1, " Row: %d Column: %d Num Rows: %d Num Columns: %d\n", - clabel->row, clabel->column, - clabel->num_rows, clabel->num_columns); - rf_printf(1, " Version: %d Serial Number: %d Mod Counter: %d\n", - clabel->version, clabel->serial_number, - clabel->mod_counter); - rf_printf(1, " Clean: %s Status: %d\n", - clabel->clean ? "Yes" : "No", clabel->status ); - rf_printf(1, " sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", - clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); - rf_printf(1, " RAID Level: %c blocksize: %d numBlocks: %d\n", - (char) clabel->parityConfig, clabel->blockSize, - clabel->numBlocks); - rf_printf(1, " Autoconfig: %s\n", clabel->autoconfigure ? "Yes":"No"); - rf_printf(1, " Contains root partition: %s\n", - clabel->root_partition ? "Yes" : "No" ); - rf_printf(1, " Last configured as: raid%d\n", clabel->last_unit ); -#if 0 - rf_printf(1, " Config order: %d\n", clabel->config_order); -#endif - -} - -RF_ConfigSet_t * -rf_create_auto_sets(ac_list) - RF_AutoConfig_t *ac_list; -{ - RF_AutoConfig_t *ac; - RF_ConfigSet_t *config_sets; - RF_ConfigSet_t *cset; - RF_AutoConfig_t *ac_next; - - - config_sets = NULL; - - /* Go through the AutoConfig list, and figure out which components - belong to what sets. */ - ac = ac_list; - while(ac!=NULL) { - /* we're going to putz with ac->next, so save it here - for use at the end of the loop */ - ac_next = ac->next; - - if (config_sets == NULL) { - /* will need at least this one... */ - config_sets = (RF_ConfigSet_t *) - malloc(sizeof(RF_ConfigSet_t), - M_RAIDFRAME, M_NOWAIT); - if (config_sets == NULL) { - panic("rf_create_auto_sets: No memory!\n"); - } - /* this one is easy :) */ - config_sets->ac = ac; - config_sets->next = NULL; - config_sets->rootable = 0; - ac->next = NULL; - } else { - /* which set does this component fit into? */ - cset = config_sets; - while(cset!=NULL) { - if (rf_does_it_fit(cset, ac)) { - /* looks like it matches... */ - ac->next = cset->ac; - cset->ac = ac; - break; - } - cset = cset->next; - } - if (cset==NULL) { - /* didn't find a match above... new set..*/ - cset = (RF_ConfigSet_t *) - malloc(sizeof(RF_ConfigSet_t), - M_RAIDFRAME, M_NOWAIT); - if (cset == NULL) { - panic("rf_create_auto_sets: No memory!\n"); - } - cset->ac = ac; - ac->next = NULL; - cset->next = config_sets; - cset->rootable = 0; - config_sets = cset; - } - } - ac = ac_next; - } - - - return(config_sets); -} - -static int -rf_does_it_fit(cset, ac) - RF_ConfigSet_t *cset; - RF_AutoConfig_t *ac; -{ - RF_ComponentLabel_t *clabel1, *clabel2; - - /* If this one matches the *first* one in the set, that's good - enough, since the other members of the set would have been - through here too... */ - /* note that we are not checking partitionSize here.. - - Note that we are also not checking the mod_counters here. - If everything else matches execpt the mod_counter, that's - good enough for this test. We will deal with the mod_counters - a little later in the autoconfiguration process. - - (clabel1->mod_counter == clabel2->mod_counter) && - - The reason we don't check for this is that failed disks - will have lower modification counts. If those disks are - not added to the set they used to belong to, then they will - form their own set, which may result in 2 different sets, - for example, competing to be configured at raid0, and - perhaps competing to be the root filesystem set. If the - wrong ones get configured, or both attempt to become /, - weird behaviour and or serious lossage will occur. Thus we - need to bring them into the fold here, and kick them out at - a later point. - - */ - - clabel1 = cset->ac->clabel; - clabel2 = ac->clabel; - if ((clabel1->version == clabel2->version) && - (clabel1->serial_number == clabel2->serial_number) && - (clabel1->num_rows == clabel2->num_rows) && - (clabel1->num_columns == clabel2->num_columns) && - (clabel1->sectPerSU == clabel2->sectPerSU) && - (clabel1->SUsPerPU == clabel2->SUsPerPU) && - (clabel1->SUsPerRU == clabel2->SUsPerRU) && - (clabel1->parityConfig == clabel2->parityConfig) && - (clabel1->maxOutstanding == clabel2->maxOutstanding) && - (clabel1->blockSize == clabel2->blockSize) && - (clabel1->numBlocks == clabel2->numBlocks) && - (clabel1->autoconfigure == clabel2->autoconfigure) && - (clabel1->root_partition == clabel2->root_partition) && - (clabel1->last_unit == clabel2->last_unit) && - (clabel1->config_order == clabel2->config_order)) { - /* if it get's here, it almost *has* to be a match */ - } else { - /* it's not consistent with somebody in the set.. - punt */ - return(0); - } - /* all was fine.. it must fit... */ - return(1); -} - -int -rf_have_enough_components(cset) - RF_ConfigSet_t *cset; -{ - RF_AutoConfig_t *ac; - RF_AutoConfig_t *auto_config; - RF_ComponentLabel_t *clabel; - int r,c; - int num_rows; - int num_cols; - int num_missing; - int mod_counter; - int mod_counter_found; - int even_pair_failed; - char parity_type; - - - /* check to see that we have enough 'live' components - of this set. If so, we can configure it if necessary */ - - num_rows = cset->ac->clabel->num_rows; - num_cols = cset->ac->clabel->num_columns; - parity_type = cset->ac->clabel->parityConfig; - - /* XXX Check for duplicate components!?!?!? */ - - /* Determine what the mod_counter is supposed to be for this set. */ - - mod_counter_found = 0; - mod_counter = 0; - ac = cset->ac; - while(ac!=NULL) { - if (mod_counter_found==0) { - mod_counter = ac->clabel->mod_counter; - mod_counter_found = 1; - } else { - if (ac->clabel->mod_counter > mod_counter) { - mod_counter = ac->clabel->mod_counter; - } - } - ac = ac->next; - } - - num_missing = 0; - auto_config = cset->ac; - - for(r=0; r<num_rows; r++) { - even_pair_failed = 0; - for(c=0; c<num_cols; c++) { - ac = auto_config; - while(ac!=NULL) { - if ((ac->clabel->row == r) && - (ac->clabel->column == c) && - (ac->clabel->mod_counter == mod_counter)) { - /* it's this one... */ - rf_printf(1, "Found: %s at %d,%d\n", - ac->devname,r,c); - break; - } - ac=ac->next; - } - if (ac==NULL) { - /* Didn't find one here! */ - /* special case for RAID 1, especially - where there are more than 2 - components (where RAIDframe treats - things a little differently :( ) */ - if (parity_type == '1') { - if (c%2 == 0) { /* even component */ - even_pair_failed = 1; - } else { /* odd component. If - we're failed, and - so is the even - component, it's - "Good Night, Charlie" */ - if (even_pair_failed == 1) { - return(0); - } - } - } else { - /* normal accounting */ - num_missing++; - } - } - if ((parity_type == '1') && (c%2 == 1)) { - /* Just did an even component, and we didn't - bail.. reset the even_pair_failed flag, - and go on to the next component.... */ - even_pair_failed = 0; - } - } - } - - clabel = cset->ac->clabel; - - if (((clabel->parityConfig == '0') && (num_missing > 0)) || - ((clabel->parityConfig == '4') && (num_missing > 1)) || - ((clabel->parityConfig == '5') && (num_missing > 1))) { - /* XXX this needs to be made *much* more general */ - /* Too many failures */ - return(0); - } - /* otherwise, all is well, and we've got enough to take a kick - at autoconfiguring this set */ - return(1); -} - -void -rf_create_configuration(ac,config,raidPtr) - RF_AutoConfig_t *ac; - RF_Config_t *config; - RF_Raid_t *raidPtr; -{ - RF_ComponentLabel_t *clabel; - int i; - - clabel = ac->clabel; - - /* 1. Fill in the common stuff */ - config->numRow = clabel->num_rows; - config->numCol = clabel->num_columns; - config->numSpare = 0; /* XXX should this be set here? */ - config->sectPerSU = clabel->sectPerSU; - config->SUsPerPU = clabel->SUsPerPU; - config->SUsPerRU = clabel->SUsPerRU; - config->parityConfig = clabel->parityConfig; - /* XXX... */ - strcpy(config->diskQueueType,"fifo"); - config->maxOutstandingDiskReqs = clabel->maxOutstanding; - config->layoutSpecificSize = 0; /* XXX ? */ - - while(ac!=NULL) { - /* row/col values will be in range due to the checks - in reasonable_label() */ - strcpy(config->devnames[ac->clabel->row][ac->clabel->column], - ac->devname); - ac = ac->next; - } - - for(i=0;i<RF_MAXDBGV;i++) { - config->debugVars[i][0] = '\0'; - } -} - -int -rf_set_autoconfig(raidPtr, new_value) - RF_Raid_t *raidPtr; - int new_value; -{ - RF_ComponentLabel_t *clabel; - struct vnode *vp; - dev_t dev; - int row, column; - - MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t), - M_RAIDFRAME, M_WAITOK | M_ZERO); - - raidPtr->autoconfigure = new_value; - for(row=0; row<raidPtr->numRow; row++) { - for(column=0; column<raidPtr->numCol; column++) { - if (raidPtr->Disks[row][column].status == - rf_ds_optimal) { - dev = raidPtr->Disks[row][column].dev; - vp = raidPtr->raid_cinfo[row][column].ci_vp; - raidread_component_label(dev, vp, clabel); - clabel->autoconfigure = new_value; - raidwrite_component_label(dev, vp, clabel); - } - } - } - FREE(clabel, M_RAIDFRAME); - return(new_value); -} - -int -rf_set_rootpartition(raidPtr, new_value) - RF_Raid_t *raidPtr; - int new_value; -{ - RF_ComponentLabel_t *clabel; - struct vnode *vp; - dev_t dev; - int row, column; - - MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t), - M_RAIDFRAME, M_WAITOK | M_ZERO); - - raidPtr->root_partition = new_value; - for(row=0; row<raidPtr->numRow; row++) { - for(column=0; column<raidPtr->numCol; column++) { - if (raidPtr->Disks[row][column].status == - rf_ds_optimal) { - dev = raidPtr->Disks[row][column].dev; - vp = raidPtr->raid_cinfo[row][column].ci_vp; - raidread_component_label(dev, vp, clabel); - clabel->root_partition = new_value; - raidwrite_component_label(dev, vp, clabel); - } - } - } - FREE(clabel, M_RAIDFRAME); - return(new_value); -} - -void -rf_release_all_vps(cset) - RF_ConfigSet_t *cset; -{ - RF_AutoConfig_t *ac; - struct thread *td; - - td = curthread; - ac = cset->ac; - while(ac!=NULL) { - /* Close the vp, and give it back */ - if (ac->vp) { - VOP_CLOSE(ac->vp, FREAD, td->td_ucred, td); - vrele(ac->vp); - ac->vp = NULL; - } - ac = ac->next; - } -} - - -void -rf_cleanup_config_set(cset) - RF_ConfigSet_t *cset; -{ - RF_AutoConfig_t *ac; - RF_AutoConfig_t *next_ac; - - ac = cset->ac; - while(ac!=NULL) { - next_ac = ac->next; - /* nuke the label */ - free(ac->clabel, M_RAIDFRAME); - /* cleanup the config structure */ - free(ac, M_RAIDFRAME); - /* "next.." */ - ac = next_ac; - } - /* and, finally, nuke the config set */ - free(cset, M_RAIDFRAME); -} - - -void -raid_init_component_label(raidPtr, clabel) - RF_Raid_t *raidPtr; - RF_ComponentLabel_t *clabel; -{ - /* current version number */ - clabel->version = RF_COMPONENT_LABEL_VERSION; - clabel->serial_number = raidPtr->serial_number; - clabel->mod_counter = raidPtr->mod_counter; - clabel->num_rows = raidPtr->numRow; - clabel->num_columns = raidPtr->numCol; - clabel->clean = RF_RAID_DIRTY; /* not clean */ - clabel->status = rf_ds_optimal; /* "It's good!" */ - - clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; - clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; - clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; - - clabel->blockSize = raidPtr->bytesPerSector; - clabel->numBlocks = raidPtr->sectorsPerDisk; - - /* XXX not portable */ - clabel->parityConfig = raidPtr->Layout.map->parityConfig; - clabel->maxOutstanding = raidPtr->maxOutstanding; - clabel->autoconfigure = raidPtr->autoconfigure; - clabel->root_partition = raidPtr->root_partition; - clabel->last_unit = raidPtr->raidid; - clabel->config_order = raidPtr->config_order; -} - -int -rf_auto_config_set(cset, unit, parent_sc) - RF_ConfigSet_t *cset; - int *unit; - struct raidctl_softc *parent_sc; -{ - int retcode = 0; - RF_Raid_t *raidPtr; - RF_Config_t *config; - int raidID; - - rf_printf(0, "RAIDframe autoconfigure\n"); - - *unit = -1; - - /* 1. Create a config structure */ - - config = (RF_Config_t *)malloc(sizeof(RF_Config_t), M_RAIDFRAME, - M_NOWAIT|M_ZERO); - if (config==NULL) { - rf_printf(0, "Out of mem at rf_auto_config_set\n"); - /* XXX do something more intelligent here. */ - return(1); - } - - /* XXX raidID needs to be set correctly.. */ - - /* - 2. Figure out what RAID ID this one is supposed to live at - See if we can get the same RAID dev that it was configured - on last time.. - */ - - raidID = cset->ac->clabel->last_unit; - if (raidID < 0) { - /* let's not wander off into lala land. */ - raidID = raidgetunit(parent_sc, 0); - } else { - raidID = raidgetunit(parent_sc, raidID); - } - - if (raidID < 0) { - /* punt... */ - rf_printf(0, "Unable to auto configure this set!\n"); - rf_printf(1, "Out of RAID devs!\n"); - return(1); - } - rf_printf(0, "Configuring raid%d:\n",raidID); - RF_Malloc(raidPtr, sizeof(*raidPtr), (RF_Raid_t *)); - if (raidPtr == NULL) { - rf_printf(0, "Out of mem at rf_auto_config_set\n"); - return (1); - } - bzero((char *)raidPtr, sizeof(RF_Raid_t)); - - /* XXX all this stuff should be done SOMEWHERE ELSE! */ - raidPtr->raidid = raidID; - raidPtr->openings = RAIDOUTSTANDING; - - /* 3. Build the configuration structure */ - rf_create_configuration(cset->ac, config, raidPtr); - - /* 4. Do the configuration */ - retcode = rf_Configure(raidPtr, config, cset->ac); - - if (retcode == 0) { - - parent_sc->sc_raiddevs[raidID] = raidinit(raidPtr); - if (parent_sc->sc_raiddevs[raidID] == NULL) { - rf_printf(0, "Could not create RAID device\n"); - RF_Free(raidPtr, sizeof(RF_Raid_t)); - free(config, M_RAIDFRAME); - return (1); - } - - parent_sc->sc_numraid++; - ((struct raid_softc *)raidPtr->sc)->sc_parent_dev = - parent_sc->sc_dev; - rf_markalldirty(raidPtr); - raidPtr->autoconfigure = 1; /* XXX do this here? */ - if (cset->ac->clabel->root_partition==1) { - /* everything configured just fine. Make a note - that this set is eligible to be root. */ - cset->rootable = 1; - /* XXX do this here? */ - raidPtr->root_partition = 1; - } - } - - /* 5. Cleanup */ - free(config, M_RAIDFRAME); - - *unit = raidID; - return(retcode); -} - -void -rf_disk_unbusy(desc) - RF_RaidAccessDesc_t *desc; -{ - struct raid_softc *sc; - struct bio *bp; - - sc = desc->raidPtr->sc; - bp = (struct bio *)desc->bp; -} - -/* - * Get the next available unit number from the bitmap. You can also request - * a particular unit number by passing it in the second arg. If it's not - * available, then grab the next free one. Return -1 if none are available. - */ -static int -raidgetunit(struct raidctl_softc *parent_sc, int id) -{ - int i; - - if (id >= RF_MAX_ARRAYS) - return (-1); - - for (i = id; i < RF_MAX_ARRAYS; i++) { - if (parent_sc->sc_raiddevs[i] == NULL) - return (i); - } - - if (id != 0) { - for (i = 0; i < id; i++) { - if (parent_sc->sc_raiddevs[i] == NULL) - return (i); - } - } - - return (-1); -} - -static int -raidshutdown(void) -{ - struct raidctl_softc *parent_sc; - int i, error = 0; - - parent_sc = raidctl_dev->si_drv1; - - if (parent_sc->sc_numraid != 0) { -#if XXX_KTHREAD_EXIT_RACE - return (EBUSY); -#else - for (i = 0; i < RF_MAX_ARRAYS; i++) { - if (parent_sc->sc_raiddevs[i] != NULL) { - rf_printf(0, "Shutting down raid%d\n", i); - error = raidctlioctl(raidctl_dev, - RAIDFRAME_SHUTDOWN, (caddr_t)&i, 0, NULL); - if (error) - return (error); - if (parent_sc->sc_numraid == 0) - break; - } - } -#endif - } - - destroy_dev(raidctl_dev); - - return (error); -} - -int -raid_getcomponentsize(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col) -{ - struct vnode *vp; - struct vattr va; - RF_Thread_t td; - off_t mediasize; - u_int secsize; - int retcode; - - td = raidPtr->engine_thread; - - retcode = raidlookup(raidPtr->Disks[row][col].devname, td, &vp); - - if (retcode) { - printf("raid%d: rebuilding: raidlookup on device: %s failed: %d!\n",raidPtr->raidid, - raidPtr->Disks[row][col].devname, retcode); - - /* XXX the component isn't responding properly... - must be still dead :-( */ - raidPtr->reconInProgress--; - return(retcode); - - } else { - - /* Ok, so we can at least do a lookup... - How about actually getting a vp for it? */ - - if ((retcode = VOP_GETATTR(vp, &va, rf_getucred(td), - td)) != 0) { - raidPtr->reconInProgress--; - return(retcode); - } - - retcode = VOP_IOCTL(vp, DIOCGSECTORSIZE, (caddr_t)&secsize, - FREAD, rf_getucred(td), td); - if (retcode) - return (retcode); - raidPtr->Disks[row][col].blockSize = secsize; - - retcode = VOP_IOCTL(vp, DIOCGMEDIASIZE, (caddr_t)&mediasize, - FREAD, rf_getucred(td), td); - if (retcode) - return (retcode); - raidPtr->Disks[row][col].numBlocks = mediasize / secsize; - - raidPtr->raid_cinfo[row][col].ci_vp = vp; - raidPtr->raid_cinfo[row][col].ci_dev = udev2dev(va.va_rdev); - raidPtr->Disks[row][col].dev = udev2dev(va.va_rdev); - - /* we allow the user to specify that only a - fraction of the disks should be used this is - just for debug: it speeds up - * the parity scan */ - raidPtr->Disks[row][col].numBlocks = - raidPtr->Disks[row][col].numBlocks * - rf_sizePercentage / 100; - } - - return(retcode); -} - -static int -raid_modevent(mod, type, data) - module_t mod; - int type; - void *data; -{ - int error = 0; - - switch (type) { - case MOD_LOAD: - raidattach(); - break; - - case MOD_UNLOAD: - case MOD_SHUTDOWN: - error = raidshutdown(); - break; - - default: - break; - } - - return (error); -} - -moduledata_t raid_mod = { - "raidframe", - (modeventhand_t) raid_modevent, - 0}; - -DECLARE_MODULE(raidframe, raid_mod, SI_SUB_RAID, SI_ORDER_MIDDLE); diff --git a/sys/dev/raidframe/rf_freelist.h b/sys/dev/raidframe/rf_freelist.h deleted file mode 100644 index 13a5e83..0000000 --- a/sys/dev/raidframe/rf_freelist.h +++ /dev/null @@ -1,702 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_freelist.h,v 1.6 2002/08/08 02:53:01 oster Exp $ */ -/* - * rf_freelist.h - */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * rf_freelist.h -- code to manage counted freelists - * - * Keep an arena of fixed-size objects. When a new object is needed, - * allocate it as necessary. When an object is freed, either put it - * in the arena, or really free it, depending on the maximum arena - * size. - */ - -#ifndef _RF__RF_FREELIST_H_ -#define _RF__RF_FREELIST_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_threadstuff.h> - -#define RF_FREELIST_STATS 0 - -#if RF_FREELIST_STATS > 0 -typedef struct RF_FreeListStats_s { - char *file; - int line; - int allocations; - int frees; - int max_free; - int grows; - int outstanding; - int max_outstanding; -} RF_FreeListStats_t; -#define RF_FREELIST_STAT_INIT(_fl_) { \ - bzero((char *)&((_fl_)->stats), sizeof(RF_FreeListStats_t)); \ - (_fl_)->stats.file = __FILE__; \ - (_fl_)->stats.line = __LINE__; \ -} - -#define RF_FREELIST_STAT_ALLOC(_fl_) { \ - (_fl_)->stats.allocations++; \ - (_fl_)->stats.outstanding++; \ - if ((_fl_)->stats.outstanding > (_fl_)->stats.max_outstanding) \ - (_fl_)->stats.max_outstanding = (_fl_)->stats.outstanding; \ -} - -#define RF_FREELIST_STAT_FREE_UPDATE(_fl_) { \ - if ((_fl_)->free_cnt > (_fl_)->stats.max_free) \ - (_fl_)->stats.max_free = (_fl_)->free_cnt; \ -} - -#define RF_FREELIST_STAT_FREE(_fl_) { \ - (_fl_)->stats.frees++; \ - (_fl_)->stats.outstanding--; \ - RF_FREELIST_STAT_FREE_UPDATE(_fl_); \ -} - -#define RF_FREELIST_STAT_GROW(_fl_) { \ - (_fl_)->stats.grows++; \ - RF_FREELIST_STAT_FREE_UPDATE(_fl_); \ -} - -#define RF_FREELIST_STAT_REPORT(_fl_) { \ - printf("Freelist at %s %d (%s)\n", (_fl_)->stats.file, (_fl_)->stats.line, RF_STRING(_fl_)); \ - printf(" %d allocations, %d frees\n", (_fl_)->stats.allocations, (_fl_)->stats.frees); \ - printf(" %d grows\n", (_fl_)->stats.grows); \ - printf(" %d outstanding\n", (_fl_)->stats.outstanding); \ - printf(" %d free (max)\n", (_fl_)->stats.max_free); \ - printf(" %d outstanding (max)\n", (_fl_)->stats.max_outstanding); \ -} - -#else /* RF_FREELIST_STATS > 0 */ - -#define RF_FREELIST_STAT_INIT(_fl_) -#define RF_FREELIST_STAT_ALLOC(_fl_) -#define RF_FREELIST_STAT_FREE_UPDATE(_fl_) -#define RF_FREELIST_STAT_FREE(_fl_) -#define RF_FREELIST_STAT_GROW(_fl_) -#define RF_FREELIST_STAT_REPORT(_fl_) - -#endif /* RF_FREELIST_STATS > 0 */ - -struct RF_FreeList_s { - void *objlist; /* list of free obj */ - int free_cnt; /* how many free obj */ - int max_free_cnt; /* max free arena size */ - int obj_inc; /* how many to allocate at a time */ - int obj_size; /* size of objects */ - RF_DECLARE_MUTEX(lock) -#if RF_FREELIST_STATS > 0 - RF_FreeListStats_t stats; /* statistics */ -#endif /* RF_FREELIST_STATS > 0 */ -}; -/* - * fl = freelist - * maxcnt = max number of items in arena - * inc = how many to allocate at a time - * size = size of object - */ -#define RF_FREELIST_CREATE(_fl_,_maxcnt_,_inc_,_size_) { \ - int rc; \ - RF_ASSERT((_inc_) > 0); \ - RF_Malloc(_fl_, sizeof(RF_FreeList_t), (RF_FreeList_t *)); \ - (_fl_)->objlist = NULL; \ - (_fl_)->free_cnt = 0; \ - (_fl_)->max_free_cnt = _maxcnt_; \ - (_fl_)->obj_inc = _inc_; \ - (_fl_)->obj_size = _size_; \ - rc = rf_mutex_init(&(_fl_)->lock, "RF_FREELIST"); \ - if (rc) { \ - RF_Free(_fl_, sizeof(RF_FreeList_t)); \ - _fl_ = NULL; \ - } \ - RF_FREELIST_STAT_INIT(_fl_); \ -} - -/* - * fl = freelist - * cnt = number to prime with - * nextp = name of "next" pointer in obj - * cast = object cast - */ -#define RF_FREELIST_PRIME(_fl_,_cnt_,_nextp_,_cast_) { \ - void *_p; \ - int _i; \ - for(_i=0;_i<(_cnt_);_i++) { \ - RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \ - if (_p) { \ - RF_LOCK_MUTEX((_fl_)->lock); \ - (_cast_(_p))->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = _p; \ - (_fl_)->free_cnt++; \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - else { \ - break; \ - } \ - } \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_FREELIST_STAT_FREE_UPDATE(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} - -#define RF_FREELIST_MUTEX_OF(_fl_) ((_fl_)->lock) - -#define RF_FREELIST_DO_UNLOCK(_fl_) { \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} - -#define RF_FREELIST_DO_LOCK(_fl_) { \ - RF_LOCK_MUTEX((_fl_)->lock); \ -} - -/* - * fl = freelist - * cnt = number to prime with - * nextp = name of "next" pointer in obj - * cast = object cast - * init = func to call to init obj - */ -#define RF_FREELIST_PRIME_INIT(_fl_,_cnt_,_nextp_,_cast_,_init_) { \ - void *_p; \ - int _i; \ - for(_i=0;_i<(_cnt_);_i++) { \ - RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \ - if (_init_ (_cast_ _p)) { \ - RF_Free(_p,(_fl_)->obj_size); \ - _p = NULL; \ - } \ - if (_p) { \ - RF_LOCK_MUTEX((_fl_)->lock); \ - (_cast_(_p))->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = _p; \ - (_fl_)->free_cnt++; \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - else { \ - break; \ - } \ - } \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_FREELIST_STAT_FREE_UPDATE(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} - -/* - * fl = freelist - * cnt = number to prime with - * nextp = name of "next" pointer in obj - * cast = object cast - * init = func to call to init obj - * arg = arg to init obj func - */ -#define RF_FREELIST_PRIME_INIT_ARG(_fl_,_cnt_,_nextp_,_cast_,_init_,_arg_) { \ - void *_p; \ - int _i; \ - for(_i=0;_i<(_cnt_);_i++) { \ - RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \ - if (_init_ (_cast_ _p,_arg_)) { \ - RF_Free(_p,(_fl_)->obj_size); \ - _p = NULL; \ - } \ - if (_p) { \ - RF_LOCK_MUTEX((_fl_)->lock); \ - (_cast_(_p))->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = _p; \ - (_fl_)->free_cnt++; \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - else { \ - break; \ - } \ - } \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_FREELIST_STAT_FREE_UPDATE(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} - -/* - * fl = freelist - * obj = object to allocate - * nextp = name of "next" pointer in obj - * cast = cast of obj assignment - * init = init obj func - */ -#define RF_FREELIST_GET_INIT(_fl_,_obj_,_nextp_,_cast_,_init_) { \ - void *_p; \ - int _i; \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \ - if (_fl_->objlist) { \ - _obj_ = _cast_((_fl_)->objlist); \ - (_fl_)->objlist = (void *)((_obj_)->_nextp_); \ - (_fl_)->free_cnt--; \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - else { \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - /* \ - * Allocate one at a time so we can free \ - * one at a time without cleverness when arena \ - * is full. \ - */ \ - RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \ - if (_obj_) { \ - if (_init_ (_obj_)) { \ - RF_Free(_obj_,(_fl_)->obj_size); \ - _obj_ = NULL; \ - } \ - else { \ - for(_i=1;_i<(_fl_)->obj_inc;_i++) { \ - RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \ - if (_p) { \ - if (_init_ (_p)) { \ - RF_Free(_p,(_fl_)->obj_size); \ - _p = NULL; \ - break; \ - } \ - RF_LOCK_MUTEX((_fl_)->lock); \ - (_cast_(_p))->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = _p; \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - else { \ - break; \ - } \ - } \ - } \ - } \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_FREELIST_STAT_GROW(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_FREELIST_STAT_ALLOC(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} - -/* - * fl = freelist - * obj = object to allocate - * nextp = name of "next" pointer in obj - * cast = cast of obj assignment - * init = init obj func - * arg = arg to init obj func - */ -#define RF_FREELIST_GET_INIT_ARG(_fl_,_obj_,_nextp_,_cast_,_init_,_arg_) { \ - void *_p; \ - int _i; \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \ - if (_fl_->objlist) { \ - _obj_ = _cast_((_fl_)->objlist); \ - (_fl_)->objlist = (void *)((_obj_)->_nextp_); \ - (_fl_)->free_cnt--; \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - else { \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - /* \ - * Allocate one at a time so we can free \ - * one at a time without cleverness when arena \ - * is full. \ - */ \ - RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \ - if (_obj_) { \ - if (_init_ (_obj_,_arg_)) { \ - RF_Free(_obj_,(_fl_)->obj_size); \ - _obj_ = NULL; \ - } \ - else { \ - for(_i=1;_i<(_fl_)->obj_inc;_i++) { \ - RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \ - if (_p) { \ - if (_init_ (_p,_arg_)) { \ - RF_Free(_p,(_fl_)->obj_size); \ - _p = NULL; \ - break; \ - } \ - RF_LOCK_MUTEX((_fl_)->lock); \ - (_cast_(_p))->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = _p; \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - else { \ - break; \ - } \ - } \ - } \ - } \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_FREELIST_STAT_GROW(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_FREELIST_STAT_ALLOC(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} - -/* - * fl = freelist - * obj = object to allocate - * nextp = name of "next" pointer in obj - * cast = cast of obj assignment - * init = init obj func - */ -#define RF_FREELIST_GET_INIT_NOUNLOCK(_fl_,_obj_,_nextp_,_cast_,_init_) { \ - void *_p; \ - int _i; \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \ - if (_fl_->objlist) { \ - _obj_ = _cast_((_fl_)->objlist); \ - (_fl_)->objlist = (void *)((_obj_)->_nextp_); \ - (_fl_)->free_cnt--; \ - } \ - else { \ - /* \ - * Allocate one at a time so we can free \ - * one at a time without cleverness when arena \ - * is full. \ - */ \ - RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \ - if (_obj_) { \ - if (_init_ (_obj_)) { \ - RF_Free(_obj_,(_fl_)->obj_size); \ - _obj_ = NULL; \ - } \ - else { \ - for(_i=1;_i<(_fl_)->obj_inc;_i++) { \ - RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \ - if (_p) { \ - if (_init_ (_p)) { \ - RF_Free(_p,(_fl_)->obj_size); \ - _p = NULL; \ - break; \ - } \ - (_cast_(_p))->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = _p; \ - } \ - else { \ - break; \ - } \ - } \ - } \ - } \ - RF_FREELIST_STAT_GROW(_fl_); \ - } \ - RF_FREELIST_STAT_ALLOC(_fl_); \ -} - -/* - * fl = freelist - * obj = object to allocate - * nextp = name of "next" pointer in obj - * cast = cast of obj assignment - */ -#define RF_FREELIST_GET(_fl_,_obj_,_nextp_,_cast_) { \ - void *_p; \ - int _i; \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \ - if (_fl_->objlist) { \ - _obj_ = _cast_((_fl_)->objlist); \ - (_fl_)->objlist = (void *)((_obj_)->_nextp_); \ - (_fl_)->free_cnt--; \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - else { \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - /* \ - * Allocate one at a time so we can free \ - * one at a time without cleverness when arena \ - * is full. \ - */ \ - RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \ - if (_obj_) { \ - for(_i=1;_i<(_fl_)->obj_inc;_i++) { \ - RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \ - if (_p) { \ - RF_LOCK_MUTEX((_fl_)->lock); \ - (_cast_(_p))->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = _p; \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - else { \ - break; \ - } \ - } \ - } \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_FREELIST_STAT_GROW(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_FREELIST_STAT_ALLOC(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} - -/* - * fl = freelist - * obj = object to allocate - * nextp = name of "next" pointer in obj - * cast = cast of obj assignment - * num = num objs to return - */ -#define RF_FREELIST_GET_N(_fl_,_obj_,_nextp_,_cast_,_num_) { \ - void *_p, *_l, *_f; \ - int _i, _n; \ - _l = _f = NULL; \ - _n = 0; \ - RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \ - for(_n=0;_n<_num_;_n++) { \ - RF_LOCK_MUTEX((_fl_)->lock); \ - if (_fl_->objlist) { \ - _obj_ = _cast_((_fl_)->objlist); \ - (_fl_)->objlist = (void *)((_obj_)->_nextp_); \ - (_fl_)->free_cnt--; \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - else { \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - /* \ - * Allocate one at a time so we can free \ - * one at a time without cleverness when arena \ - * is full. \ - */ \ - RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \ - if (_obj_) { \ - for(_i=1;_i<(_fl_)->obj_inc;_i++) { \ - RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \ - if (_p) { \ - RF_LOCK_MUTEX((_fl_)->lock); \ - (_cast_(_p))->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = _p; \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - else { \ - break; \ - } \ - } \ - } \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_FREELIST_STAT_GROW(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - RF_LOCK_MUTEX((_fl_)->lock); \ - if (_f == NULL) \ - _f = _obj_; \ - if (_obj_) { \ - (_cast_(_obj_))->_nextp_ = _l; \ - _l = _obj_; \ - RF_FREELIST_STAT_ALLOC(_fl_); \ - } \ - else { \ - (_cast_(_f))->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = _l; \ - _n = _num_; \ - } \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ -} - -/* - * fl = freelist - * obj = object to free - * nextp = name of "next" pointer in obj - */ -#define RF_FREELIST_FREE(_fl_,_obj_,_nextp_) { \ - RF_LOCK_MUTEX((_fl_)->lock); \ - if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \ - RF_Free(_obj_,(_fl_)->obj_size); \ - } \ - else { \ - RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \ - (_obj_)->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = (void *)(_obj_); \ - (_fl_)->free_cnt++; \ - } \ - RF_FREELIST_STAT_FREE(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} - -/* - * fl = freelist - * obj = object to free - * nextp = name of "next" pointer in obj - * num = num to free (debugging) - */ -#define RF_FREELIST_FREE_N(_fl_,_obj_,_nextp_,_cast_,_num_) { \ - void *_no; \ - int _n; \ - _n = 0; \ - RF_LOCK_MUTEX((_fl_)->lock); \ - while(_obj_) { \ - _no = (_cast_(_obj_))->_nextp_; \ - if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \ - RF_Free(_obj_,(_fl_)->obj_size); \ - } \ - else { \ - RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \ - (_obj_)->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = (void *)(_obj_); \ - (_fl_)->free_cnt++; \ - } \ - _n++; \ - _obj_ = _no; \ - RF_FREELIST_STAT_FREE(_fl_); \ - } \ - RF_ASSERT(_n==(_num_)); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} - -/* - * fl = freelist - * obj = object to free - * nextp = name of "next" pointer in obj - * clean = undo for init - */ -#define RF_FREELIST_FREE_CLEAN(_fl_,_obj_,_nextp_,_clean_) { \ - RF_LOCK_MUTEX((_fl_)->lock); \ - if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \ - _clean_ (_obj_); \ - RF_Free(_obj_,(_fl_)->obj_size); \ - } \ - else { \ - RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \ - (_obj_)->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = (void *)(_obj_); \ - (_fl_)->free_cnt++; \ - } \ - RF_FREELIST_STAT_FREE(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} - -/* - * fl = freelist - * obj = object to free - * nextp = name of "next" pointer in obj - * clean = undo for init - * arg = arg for undo func - */ -#define RF_FREELIST_FREE_CLEAN_ARG(_fl_,_obj_,_nextp_,_clean_,_arg_) { \ - RF_LOCK_MUTEX((_fl_)->lock); \ - if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \ - _clean_ (_obj_,_arg_); \ - RF_Free(_obj_,(_fl_)->obj_size); \ - } \ - else { \ - RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \ - (_obj_)->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = (void *)(_obj_); \ - (_fl_)->free_cnt++; \ - } \ - RF_FREELIST_STAT_FREE(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} - -/* - * fl = freelist - * obj = object to free - * nextp = name of "next" pointer in obj - * clean = undo for init - */ -#define RF_FREELIST_FREE_CLEAN_NOUNLOCK(_fl_,_obj_,_nextp_,_clean_) { \ - RF_LOCK_MUTEX((_fl_)->lock); \ - if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \ - _clean_ (_obj_); \ - RF_Free(_obj_,(_fl_)->obj_size); \ - } \ - else { \ - RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \ - (_obj_)->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = (void *)(_obj_); \ - (_fl_)->free_cnt++; \ - } \ - RF_FREELIST_STAT_FREE(_fl_); \ -} - -/* - * fl = freelist - * nextp = name of "next" pointer in obj - * cast = cast to object type - */ -#define RF_FREELIST_DESTROY(_fl_,_nextp_,_cast_) { \ - void *_cur, *_next; \ - RF_FREELIST_STAT_REPORT(_fl_); \ - rf_mutex_destroy(&((_fl_)->lock)); \ - for(_cur=(_fl_)->objlist;_cur;_cur=_next) { \ - _next = (_cast_ _cur)->_nextp_; \ - RF_Free(_cur,(_fl_)->obj_size); \ - } \ - RF_Free(_fl_,sizeof(RF_FreeList_t)); \ -} - -/* - * fl = freelist - * nextp = name of "next" pointer in obj - * cast = cast to object type - * clean = func to undo obj init - */ -#define RF_FREELIST_DESTROY_CLEAN(_fl_,_nextp_,_cast_,_clean_) { \ - void *_cur, *_next; \ - RF_FREELIST_STAT_REPORT(_fl_); \ - rf_mutex_destroy(&((_fl_)->lock)); \ - for(_cur=(_fl_)->objlist;_cur;_cur=_next) { \ - _next = (_cast_ _cur)->_nextp_; \ - _clean_ (_cur); \ - RF_Free(_cur,(_fl_)->obj_size); \ - } \ - RF_Free(_fl_,sizeof(RF_FreeList_t)); \ -} - -/* - * fl = freelist - * nextp = name of "next" pointer in obj - * cast = cast to object type - * clean = func to undo obj init - * arg = arg for undo func - */ -#define RF_FREELIST_DESTROY_CLEAN_ARG(_fl_,_nextp_,_cast_,_clean_,_arg_) { \ - void *_cur, *_next; \ - RF_FREELIST_STAT_REPORT(_fl_); \ - rf_mutex_destroy(&((_fl_)->lock)); \ - for(_cur=(_fl_)->objlist;_cur;_cur=_next) { \ - _next = (_cast_ _cur)->_nextp_; \ - _clean_ (_cur,_arg_); \ - RF_Free(_cur,(_fl_)->obj_size); \ - } \ - RF_Free(_fl_,sizeof(RF_FreeList_t)); \ -} - -#endif /* !_RF__RF_FREELIST_H_ */ diff --git a/sys/dev/raidframe/rf_general.h b/sys/dev/raidframe/rf_general.h deleted file mode 100644 index e709899..0000000 --- a/sys/dev/raidframe/rf_general.h +++ /dev/null @@ -1,107 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_general.h,v 1.6 2000/12/15 02:12:58 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * rf_general.h -- some general-use definitions - */ - -/*#define NOASSERT*/ - -#ifndef _RF__RF_GENERAL_H_ -#define _RF__RF_GENERAL_H_ - -/* error reporting and handling */ - -#ifdef _KERNEL -#include<sys/systm.h> /* printf, sprintf, and friends */ -#endif - -#define RF_ERRORMSG(s) printf((s)) -#define RF_ERRORMSG1(s,a) printf((s),(a)) -#define RF_ERRORMSG2(s,a,b) printf((s),(a),(b)) -#define RF_ERRORMSG3(s,a,b,c) printf((s),(a),(b),(c)) - -void rf_print_panic_message(int, char *); -void rf_print_assert_panic_message(int, char *, char *); - -extern char rf_panicbuf[]; -#define RF_PANIC() {rf_print_panic_message(__LINE__,__FILE__); panic(rf_panicbuf);} - -#ifdef _KERNEL -#ifdef RF_ASSERT -#undef RF_ASSERT -#endif /* RF_ASSERT */ -#ifndef NOASSERT -#define RF_ASSERT(_x_) { \ - if (!(_x_)) { \ - rf_print_assert_panic_message(__LINE__, __FILE__, #_x_); \ - panic(rf_panicbuf); \ - } \ -} -#else /* !NOASSERT */ -#define RF_ASSERT(x) {/*noop*/} -#endif /* !NOASSERT */ -#else /* _KERNEL */ -#define RF_ASSERT(x) {/*noop*/} -#endif /* _KERNEL */ - -/* random stuff */ -#define RF_MAX(a,b) (((a) > (b)) ? (a) : (b)) -#define RF_MIN(a,b) (((a) < (b)) ? (a) : (b)) - -/* divide-by-zero check */ -#define RF_DB0_CHECK(a,b) ( ((b)==0) ? 0 : (a)/(b) ) - -/* get time of day */ -#define RF_GETTIME(_t) microtime(&(_t)) - -/* - * zero memory- not all bzero calls go through here, only - * those which in the kernel may have a user address - */ - -#define RF_BZERO(_bp,_b,_l) bzero(_b,_l) /* XXX This is likely - * incorrect. GO */ - -#if defined(__FreeBSD__) -#define NBPG PAGE_SIZE -#endif - -#define RF_UL(x) ((unsigned long) (x)) -#define RF_PGMASK RF_UL(NBPG-1) -#define RF_BLIP(x) (NBPG - (RF_UL(x) & RF_PGMASK)) /* bytes left in page */ -#define RF_PAGE_ALIGNED(x) ((RF_UL(x) & RF_PGMASK) == 0) - -#ifdef __STDC__ -#define RF_STRING(_str_) #_str_ -#else /* __STDC__ */ -#define RF_STRING(_str_) "_str_" -#endif /* __STDC__ */ - -#endif /* !_RF__RF_GENERAL_H_ */ diff --git a/sys/dev/raidframe/rf_geniq.c b/sys/dev/raidframe/rf_geniq.c deleted file mode 100644 index c21cb1b..0000000 --- a/sys/dev/raidframe/rf_geniq.c +++ /dev/null @@ -1,165 +0,0 @@ -/* $NetBSD: rf_geniq.c,v 1.3 1999/02/05 00:06:12 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Daniel Stodolsky - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_geniq.c - * code which implements Reed-Solomon encoding for RAID level 6 - */ - - -#define RF_UTILITY 1 -#include <dev/raidframe/rf_pqdeg.h> - -/* - five bit lfsr - poly - feedback connections - - val = value; -*/ -int -lsfr_shift(val, poly) - unsigned val, poly; -{ - unsigned new; - unsigned int i; - unsigned high = (val >> 4) & 1; - unsigned bit; - - new = (poly & 1) ? high : 0; - - for (i = 1; i <= 4; i++) { - bit = (val >> (i - 1)) & 1; - if (poly & (1 << i)) /* there is a feedback connection */ - new = new | ((bit ^ high) << i); - else - new = new | (bit << i); - } - return new; -} -/* generate Q matricies for the data */ - -RF_ua32_t rf_qfor[32]; - -void -main() -{ - unsigned int i, j, l, a, b; - unsigned int val; - unsigned int r; - unsigned int m, p, q; - - RF_ua32_t k; - - printf("/*\n"); - printf(" * rf_invertq.h\n"); - printf(" */\n"); - printf("/*\n"); - printf(" * GENERATED FILE -- DO NOT EDIT\n"); - printf(" */\n"); - printf("\n"); - printf("#ifndef _RF__RF_INVERTQ_H_\n"); - printf("#define _RF__RF_INVERTQ_H_\n"); - printf("\n"); - printf("/*\n"); - printf(" * rf_geniq.c must include rf_archs.h before including\n"); - printf(" * this file (to get VPATH magic right with the way we\n"); - printf(" * generate this file in kernel trees)\n"); - printf(" */\n"); - printf("/* #include \"rf_archs.h\" */\n"); - printf("\n"); - printf("#if (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0)\n"); - printf("\n"); - printf("#define RF_Q_COLS 32\n"); - printf("RF_ua32_t rf_rn = {\n"); - k[0] = 1; - for (j = 0; j < 31; j++) - k[j + 1] = lsfr_shift(k[j], 5); - for (j = 0; j < 32; j++) - printf("%d, ", k[j]); - printf("};\n"); - - printf("RF_ua32_t rf_qfor[32] = {\n"); - for (i = 0; i < 32; i++) { - printf("/* i = %d */ { 0, ", i); - rf_qfor[i][0] = 0; - for (j = 1; j < 32; j++) { - val = j; - for (l = 0; l < i; l++) - val = lsfr_shift(val, 5); - rf_qfor[i][j] = val; - printf("%d, ", val); - } - printf("},\n"); - } - printf("};\n"); - printf("#define RF_Q_DATA_COL(col_num) rf_rn[col_num],rf_qfor[28-(col_num)]\n"); - - /* generate the inverse tables. (i,j,p,q) */ - /* The table just stores a. Get b back from the parity */ - printf("#ifdef KERNEL\n"); - printf("RF_ua1024_t rf_qinv[1]; /* don't compile monster table into kernel */\n"); - printf("#elif defined(NO_PQ)\n"); - printf("RF_ua1024_t rf_qinv[29*29];\n"); - printf("#else /* !KERNEL && NO_PQ */\n"); - printf("RF_ua1024_t rf_qinv[29*29] = {\n"); - for (i = 0; i < 29; i++) { - for (j = 0; j < 29; j++) { - printf("/* i %d, j %d */{ ", i, j); - if (i == j) - for (l = 0; l < 1023; l++) - printf("0, "); - else { - for (p = 0; p < 32; p++) - for (q = 0; q < 32; q++) { - /* What are a, b such that a ^ - * b = p; and qfor[(28-i)][a - * ^ rf_rn[i+1]] ^ - * qfor[(28-j)][b ^ - * rf_rn[j+1]] = q. Solve by - * guessing a. Then testing. */ - for (a = 0; a < 32; a++) { - b = a ^ p; - if ((rf_qfor[28 - i][a ^ k[i + 1]] ^ rf_qfor[28 - j][b ^ k[j + 1]]) == q) - break; - } - if (a == 32) - printf("unable to solve %d %d %d %d\n", i, j, p, q); - printf("%d,", a); - } - } - printf("},\n"); - } - } - printf("};\n"); - printf("\n#endif /* (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */\n\n"); - printf("#endif /* !KERNEL && NO_PQ */\n"); - printf("#endif /* !_RF__RF_INVERTQ_H_ */\n"); - exit(0); -} diff --git a/sys/dev/raidframe/rf_hist.h b/sys/dev/raidframe/rf_hist.h deleted file mode 100644 index b8b12c3..0000000 --- a/sys/dev/raidframe/rf_hist.h +++ /dev/null @@ -1,57 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_hist.h,v 1.3 1999/02/05 00:06:12 oster Exp $ */ -/* - * rf_hist.h - * - * Histgram operations for RAIDframe stats - */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_HIST_H_ -#define _RF__RF_HIST_H_ - -#include <dev/raidframe/rf_types.h> - -#define RF_HIST_RESOLUTION 5 -#define RF_HIST_MIN_VAL 0 -#define RF_HIST_MAX_VAL 1000 -#define RF_HIST_RANGE (RF_HIST_MAX_VAL - RF_HIST_MIN_VAL) -#define RF_HIST_NUM_BUCKETS (RF_HIST_RANGE / RF_HIST_RESOLUTION + 1) - -typedef RF_uint32 RF_Hist_t; - -#define RF_HIST_ADD(_hist_,_val_) { \ - RF_Hist_t val; \ - val = ((RF_Hist_t)(_val_)) / 1000; \ - if (val >= RF_HIST_MAX_VAL) \ - _hist_[RF_HIST_NUM_BUCKETS-1]++; \ - else \ - _hist_[(val - RF_HIST_MIN_VAL) / RF_HIST_RESOLUTION]++; \ -} - -#endif /* !_RF__RF_HIST_H_ */ diff --git a/sys/dev/raidframe/rf_interdecluster.c b/sys/dev/raidframe/rf_interdecluster.c deleted file mode 100644 index 8b1dbdb..0000000 --- a/sys/dev/raidframe/rf_interdecluster.c +++ /dev/null @@ -1,285 +0,0 @@ -/* $NetBSD: rf_interdecluster.c,v 1.5 2001/01/26 05:09:13 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Khalil Amiri - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/************************************************************ - * - * rf_interdecluster.c -- implements interleaved declustering - * - ************************************************************/ - -#include <dev/raidframe/rf_archs.h> - -#if RF_INCLUDE_INTERDECLUSTER > 0 - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_interdecluster.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_utils.h> -#include <dev/raidframe/rf_dagffrd.h> -#include <dev/raidframe/rf_dagdegrd.h> -#include <dev/raidframe/rf_dagffwr.h> -#include <dev/raidframe/rf_dagdegwr.h> - -typedef struct RF_InterdeclusterConfigInfo_s { - RF_RowCol_t **stripeIdentifier; /* filled in at config time and used - * by IdentifyStripe */ - RF_StripeCount_t numSparingRegions; - RF_StripeCount_t stripeUnitsPerSparingRegion; - RF_SectorNum_t mirrorStripeOffset; -} RF_InterdeclusterConfigInfo_t; - -int -rf_ConfigureInterDecluster( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_StripeCount_t num_used_stripeUnitsPerDisk; - RF_InterdeclusterConfigInfo_t *info; - RF_RowCol_t i, tmp, SUs_per_region; - - /* create an Interleaved Declustering configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_InterdeclusterConfigInfo_t), (RF_InterdeclusterConfigInfo_t *), - raidPtr->cleanupList); - if (info == NULL) - return (ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - - /* fill in the config structure. */ - SUs_per_region = raidPtr->numCol * (raidPtr->numCol - 1); - info->stripeIdentifier = rf_make_2d_array(SUs_per_region, 2, raidPtr->cleanupList); - if (info->stripeIdentifier == NULL) - return (ENOMEM); - for (i = 0; i < SUs_per_region; i++) { - info->stripeIdentifier[i][0] = i / (raidPtr->numCol - 1); - tmp = i / raidPtr->numCol; - info->stripeIdentifier[i][1] = (i + 1 + tmp) % raidPtr->numCol; - } - - /* no spare tables */ - RF_ASSERT(raidPtr->numRow == 1); - - /* fill in the remaining layout parameters */ - - /* total number of stripes should a multiple of 2*numCol: Each sparing - * region consists of 2*numCol stripes: n-1 primary copy, n-1 - * secondary copy and 2 for spare .. */ - num_used_stripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk - (layoutPtr->stripeUnitsPerDisk % - (2 * raidPtr->numCol)); - info->numSparingRegions = num_used_stripeUnitsPerDisk / (2 * raidPtr->numCol); - /* this is in fact the number of stripe units (that are primary data - * copies) in the sparing region */ - info->stripeUnitsPerSparingRegion = raidPtr->numCol * (raidPtr->numCol - 1); - info->mirrorStripeOffset = info->numSparingRegions * (raidPtr->numCol + 1); - layoutPtr->numStripe = info->numSparingRegions * info->stripeUnitsPerSparingRegion; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = 1; - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numParityCol = 1; - - layoutPtr->dataStripeUnitsPerDisk = num_used_stripeUnitsPerDisk; - - raidPtr->sectorsPerDisk = - num_used_stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; - - raidPtr->totalSectors = - (layoutPtr->numStripe) * layoutPtr->sectorsPerStripeUnit; - - layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit; - - return (0); -} - -int -rf_GetDefaultNumFloatingReconBuffersInterDecluster(RF_Raid_t * raidPtr) -{ - return (30); -} - -RF_HeadSepLimit_t -rf_GetDefaultHeadSepLimitInterDecluster(RF_Raid_t * raidPtr) -{ - return (raidPtr->sectorsPerDisk); -} - -RF_ReconUnitCount_t -rf_GetNumSpareRUsInterDecluster( - RF_Raid_t * raidPtr) -{ - RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - - return (2 * ((RF_ReconUnitCount_t) info->numSparingRegions)); - /* the layout uses two stripe units per disk as spare within each - * sparing region */ -} -/* Maps to the primary copy of the data, i.e. the first mirror pair */ -void -rf_MapSectorInterDecluster( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - RF_StripeNum_t su_offset_into_disk, mirror_su_offset_into_disk; - RF_StripeNum_t sparing_region_id, index_within_region; - int col_before_remap; - - *row = 0; - sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; - index_within_region = SUID % info->stripeUnitsPerSparingRegion; - su_offset_into_disk = index_within_region % (raidPtr->numCol - 1); - mirror_su_offset_into_disk = index_within_region / raidPtr->numCol; - col_before_remap = index_within_region / (raidPtr->numCol - 1); - - if (!remap) { - *col = col_before_remap;; - *diskSector = (su_offset_into_disk + ((raidPtr->numCol - 1) * sparing_region_id)) * - raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - } else { - /* remap sector to spare space... */ - *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidPtr->numCol - 1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - *col = (index_within_region + 1 + mirror_su_offset_into_disk) % raidPtr->numCol; - *col = (*col + 1) % raidPtr->numCol; - if (*col == col_before_remap) - *col = (*col + 1) % raidPtr->numCol; - } -} -/* Maps to the second copy of the mirror pair. */ -void -rf_MapParityInterDecluster( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - RF_StripeNum_t sparing_region_id, index_within_region, mirror_su_offset_into_disk; - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - int col_before_remap; - - sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; - index_within_region = SUID % info->stripeUnitsPerSparingRegion; - mirror_su_offset_into_disk = index_within_region / raidPtr->numCol; - col_before_remap = (index_within_region + 1 + mirror_su_offset_into_disk) % raidPtr->numCol; - - *row = 0; - if (!remap) { - *col = col_before_remap; - *diskSector = info->mirrorStripeOffset * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += sparing_region_id * (raidPtr->numCol - 1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += mirror_su_offset_into_disk * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - } else { - /* remap parity to spare space ... */ - *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - *col = index_within_region / (raidPtr->numCol - 1); - *col = (*col + 1) % raidPtr->numCol; - if (*col == col_before_remap) - *col = (*col + 1) % raidPtr->numCol; - } -} - -void -rf_IdentifyStripeInterDecluster( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) -{ - RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - RF_StripeNum_t SUID; - - SUID = addr / raidPtr->Layout.sectorsPerStripeUnit; - SUID = SUID % info->stripeUnitsPerSparingRegion; - - *outRow = 0; - *diskids = info->stripeIdentifier[SUID]; -} - -void -rf_MapSIDToPSIDInterDecluster( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru) -{ - *which_ru = 0; - *psID = stripeID; -} -/****************************************************************************** - * select a graph to perform a single-stripe access - * - * Parameters: raidPtr - description of the physical array - * type - type of operation (read or write) requested - * asmap - logical & physical addresses for this access - * createFunc - name of function to use to create the graph - *****************************************************************************/ - -void -rf_RAIDIDagSelect( - RF_Raid_t * raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t * asmap, - RF_VoidFuncPtr * createFunc) -{ - RF_ASSERT(RF_IO_IS_R_OR_W(type)); - - if (asmap->numDataFailed + asmap->numParityFailed > 1) { - RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); - *createFunc = NULL; - return; - } - *createFunc = (type == RF_IO_TYPE_READ) ? (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG : (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; - if (type == RF_IO_TYPE_READ) { - if (asmap->numDataFailed == 0) - *createFunc = (RF_VoidFuncPtr) rf_CreateMirrorPartitionReadDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneDegradedReadDAG; - } else - *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; -} -#endif /* RF_INCLUDE_INTERDECLUSTER > 0 */ diff --git a/sys/dev/raidframe/rf_interdecluster.h b/sys/dev/raidframe/rf_interdecluster.h deleted file mode 100644 index 9bf3825..0000000 --- a/sys/dev/raidframe/rf_interdecluster.h +++ /dev/null @@ -1,60 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_interdecluster.h,v 1.3 1999/02/05 00:06:12 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Khalil Amiri - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_interdecluster.h - * header file for Interleaved Declustering - */ - -#ifndef _RF__RF_INTERDECLUSTER_H_ -#define _RF__RF_INTERDECLUSTER_H_ - -int -rf_ConfigureInterDecluster(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int rf_GetDefaultNumFloatingReconBuffersInterDecluster(RF_Raid_t * raidPtr); -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitInterDecluster(RF_Raid_t * raidPtr); -RF_ReconUnitCount_t rf_GetNumSpareRUsInterDecluster(RF_Raid_t * raidPtr); -void -rf_MapSectorInterDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapParityInterDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_IdentifyStripeInterDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -void -rf_MapSIDToPSIDInterDecluster(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru); -void -rf_RAIDIDagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); - -#endif /* !_RF__RF_INTERDECLUSTER_H_ */ diff --git a/sys/dev/raidframe/rf_invertq.c b/sys/dev/raidframe/rf_invertq.c deleted file mode 100644 index 66337b6..0000000 --- a/sys/dev/raidframe/rf_invertq.c +++ /dev/null @@ -1,34 +0,0 @@ -/* $NetBSD: rf_invertq.c,v 1.3 1999/02/05 00:06:12 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Daniel Stodolsky - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_pqdeg.h> -#include <dev/raidframe/rf_invertq.h> diff --git a/sys/dev/raidframe/rf_invertq.h b/sys/dev/raidframe/rf_invertq.h deleted file mode 100644 index fde2cae..0000000 --- a/sys/dev/raidframe/rf_invertq.h +++ /dev/null @@ -1,64 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_invertq.h,v 1.3 1999/02/05 00:06:12 oster Exp $ */ -/* - * rf_invertq.h - */ -/* - * This is normally a generated file. Not so for NetBSD. - */ - -#ifndef _RF__RF_INVERTQ_H_ -#define _RF__RF_INVERTQ_H_ - -/* - * rf_geniq.c must include rf_archs.h before including - * this file (to get VPATH magic right with the way we - * generate this file in kernel trees) - */ -/* #include <dev/raidframe/rf_archs.h> */ - -#if (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0) - -#define RF_Q_COLS 32 -RF_ua32_t rf_rn = { -1, 2, 4, 8, 16, 5, 10, 20, 13, 26, 17, 7, 14, 28, 29, 31, 27, 19, 3, 6, 12, 24, 21, 15, 30, 25, 23, 11, 22, 9, 18, 1,}; -RF_ua32_t rf_qfor[32] = { - /* i = 0 */ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,}, - /* i = 1 */ {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 5, 7, 1, 3, 13, 15, 9, 11, 21, 23, 17, 19, 29, 31, 25, 27,}, - /* i = 2 */ {0, 4, 8, 12, 16, 20, 24, 28, 5, 1, 13, 9, 21, 17, 29, 25, 10, 14, 2, 6, 26, 30, 18, 22, 15, 11, 7, 3, 31, 27, 23, 19,}, - /* i = 3 */ {0, 8, 16, 24, 5, 13, 21, 29, 10, 2, 26, 18, 15, 7, 31, 23, 20, 28, 4, 12, 17, 25, 1, 9, 30, 22, 14, 6, 27, 19, 11, 3,}, - /* i = 4 */ {0, 16, 5, 21, 10, 26, 15, 31, 20, 4, 17, 1, 30, 14, 27, 11, 13, 29, 8, 24, 7, 23, 2, 18, 25, 9, 28, 12, 19, 3, 22, 6,}, - /* i = 5 */ {0, 5, 10, 15, 20, 17, 30, 27, 13, 8, 7, 2, 25, 28, 19, 22, 26, 31, 16, 21, 14, 11, 4, 1, 23, 18, 29, 24, 3, 6, 9, 12,}, - /* i = 6 */ {0, 10, 20, 30, 13, 7, 25, 19, 26, 16, 14, 4, 23, 29, 3, 9, 17, 27, 5, 15, 28, 22, 8, 2, 11, 1, 31, 21, 6, 12, 18, 24,}, - /* i = 7 */ {0, 20, 13, 25, 26, 14, 23, 3, 17, 5, 28, 8, 11, 31, 6, 18, 7, 19, 10, 30, 29, 9, 16, 4, 22, 2, 27, 15, 12, 24, 1, 21,}, - /* i = 8 */ {0, 13, 26, 23, 17, 28, 11, 6, 7, 10, 29, 16, 22, 27, 12, 1, 14, 3, 20, 25, 31, 18, 5, 8, 9, 4, 19, 30, 24, 21, 2, 15,}, - /* i = 9 */ {0, 26, 17, 11, 7, 29, 22, 12, 14, 20, 31, 5, 9, 19, 24, 2, 28, 6, 13, 23, 27, 1, 10, 16, 18, 8, 3, 25, 21, 15, 4, 30,}, - /* i = 10 */ {0, 17, 7, 22, 14, 31, 9, 24, 28, 13, 27, 10, 18, 3, 21, 4, 29, 12, 26, 11, 19, 2, 20, 5, 1, 16, 6, 23, 15, 30, 8, 25,}, - /* i = 11 */ {0, 7, 14, 9, 28, 27, 18, 21, 29, 26, 19, 20, 1, 6, 15, 8, 31, 24, 17, 22, 3, 4, 13, 10, 2, 5, 12, 11, 30, 25, 16, 23,}, - /* i = 12 */ {0, 14, 28, 18, 29, 19, 1, 15, 31, 17, 3, 13, 2, 12, 30, 16, 27, 21, 7, 9, 6, 8, 26, 20, 4, 10, 24, 22, 25, 23, 5, 11,}, - /* i = 13 */ {0, 28, 29, 1, 31, 3, 2, 30, 27, 7, 6, 26, 4, 24, 25, 5, 19, 15, 14, 18, 12, 16, 17, 13, 8, 20, 21, 9, 23, 11, 10, 22,}, - /* i = 14 */ {0, 29, 31, 2, 27, 6, 4, 25, 19, 14, 12, 17, 8, 21, 23, 10, 3, 30, 28, 1, 24, 5, 7, 26, 16, 13, 15, 18, 11, 22, 20, 9,}, - /* i = 15 */ {0, 31, 27, 4, 19, 12, 8, 23, 3, 28, 24, 7, 16, 15, 11, 20, 6, 25, 29, 2, 21, 10, 14, 17, 5, 26, 30, 1, 22, 9, 13, 18,}, - /* i = 16 */ {0, 27, 19, 8, 3, 24, 16, 11, 6, 29, 21, 14, 5, 30, 22, 13, 12, 23, 31, 4, 15, 20, 28, 7, 10, 17, 25, 2, 9, 18, 26, 1,}, - /* i = 17 */ {0, 19, 3, 16, 6, 21, 5, 22, 12, 31, 15, 28, 10, 25, 9, 26, 24, 11, 27, 8, 30, 13, 29, 14, 20, 7, 23, 4, 18, 1, 17, 2,}, - /* i = 18 */ {0, 3, 6, 5, 12, 15, 10, 9, 24, 27, 30, 29, 20, 23, 18, 17, 21, 22, 19, 16, 25, 26, 31, 28, 13, 14, 11, 8, 1, 2, 7, 4,}, - /* i = 19 */ {0, 6, 12, 10, 24, 30, 20, 18, 21, 19, 25, 31, 13, 11, 1, 7, 15, 9, 3, 5, 23, 17, 27, 29, 26, 28, 22, 16, 2, 4, 14, 8,}, - /* i = 20 */ {0, 12, 24, 20, 21, 25, 13, 1, 15, 3, 23, 27, 26, 22, 2, 14, 30, 18, 6, 10, 11, 7, 19, 31, 17, 29, 9, 5, 4, 8, 28, 16,}, - /* i = 21 */ {0, 24, 21, 13, 15, 23, 26, 2, 30, 6, 11, 19, 17, 9, 4, 28, 25, 1, 12, 20, 22, 14, 3, 27, 7, 31, 18, 10, 8, 16, 29, 5,}, - /* i = 22 */ {0, 21, 15, 26, 30, 11, 17, 4, 25, 12, 22, 3, 7, 18, 8, 29, 23, 2, 24, 13, 9, 28, 6, 19, 14, 27, 1, 20, 16, 5, 31, 10,}, - /* i = 23 */ {0, 15, 30, 17, 25, 22, 7, 8, 23, 24, 9, 6, 14, 1, 16, 31, 11, 4, 21, 26, 18, 29, 12, 3, 28, 19, 2, 13, 5, 10, 27, 20,}, - /* i = 24 */ {0, 30, 25, 7, 23, 9, 14, 16, 11, 21, 18, 12, 28, 2, 5, 27, 22, 8, 15, 17, 1, 31, 24, 6, 29, 3, 4, 26, 10, 20, 19, 13,}, - /* i = 25 */ {0, 25, 23, 14, 11, 18, 28, 5, 22, 15, 1, 24, 29, 4, 10, 19, 9, 16, 30, 7, 2, 27, 21, 12, 31, 6, 8, 17, 20, 13, 3, 26,}, - /* i = 26 */ {0, 23, 11, 28, 22, 1, 29, 10, 9, 30, 2, 21, 31, 8, 20, 3, 18, 5, 25, 14, 4, 19, 15, 24, 27, 12, 16, 7, 13, 26, 6, 17,}, - /* i = 27 */ {0, 11, 22, 29, 9, 2, 31, 20, 18, 25, 4, 15, 27, 16, 13, 6, 1, 10, 23, 28, 8, 3, 30, 21, 19, 24, 5, 14, 26, 17, 12, 7,}, - /* i = 28 */ {0, 22, 9, 31, 18, 4, 27, 13, 1, 23, 8, 30, 19, 5, 26, 12, 2, 20, 11, 29, 16, 6, 25, 15, 3, 21, 10, 28, 17, 7, 24, 14,}, - /* i = 29 */ {0, 9, 18, 27, 1, 8, 19, 26, 2, 11, 16, 25, 3, 10, 17, 24, 4, 13, 22, 31, 5, 12, 23, 30, 6, 15, 20, 29, 7, 14, 21, 28,}, - /* i = 30 */ {0, 18, 1, 19, 2, 16, 3, 17, 4, 22, 5, 23, 6, 20, 7, 21, 8, 26, 9, 27, 10, 24, 11, 25, 12, 30, 13, 31, 14, 28, 15, 29,}, - /* i = 31 */ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,}, -}; -#define RF_Q_DATA_COL(col_num) rf_rn[col_num],rf_qfor[28-(col_num)] -RF_ua1024_t rf_qinv[1]; /* don't compile monster table into kernel */ - -#endif /* (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > - * 0) */ -#endif /* !_RF__RF_INVERTQ_H_ */ diff --git a/sys/dev/raidframe/rf_kintf.h b/sys/dev/raidframe/rf_kintf.h deleted file mode 100644 index ae2697b..0000000 --- a/sys/dev/raidframe/rf_kintf.h +++ /dev/null @@ -1,82 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_kintf.h,v 1.15 2000/10/20 02:24:45 oster Exp $ */ -/* - * rf_kintf.h - * - * RAIDframe exported kernel interface - */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_KINTF_H_ -#define _RF__RF_KINTF_H_ - -#include <dev/raidframe/rf_types.h> - -#if defined(__NetBSD__) -#define RF_LTSLEEP(cond, pri, text, time, mutex) \ - ltsleep(cond, pri, text, time, mutex) -#elif defined(__FreeBSD__) -#if __FreeBSD_version > 500005 -#define RF_LTSLEEP(cond, pri, text, time, mutex) \ - msleep(cond, mutex, pri, text, time); -#else -static __inline int -RF_LTSLEEP(void *cond, int pri, const char *text, int time, struct simplelock *mutex) -{ - int ret; - if (mutex != NULL) - simple_unlock(mutex); - ret = tsleep(cond, pri, text, time); - if (mutex != NULL) - simple_lock(mutex); - return (ret); -} -#endif -#endif - -int rf_GetSpareTableFromDaemon(RF_SparetWait_t * req); - -void raidstart(RF_Raid_t * raidPtr); -int rf_DispatchKernelIO(RF_DiskQueue_t * queue, RF_DiskQueueData_t * req); - -int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *); -int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *); - -#define RF_NORMAL_COMPONENT_UPDATE 0 -#define RF_FINAL_COMPONENT_UPDATE 1 -void rf_update_component_labels(RF_Raid_t *, int); -int raidlookup(char *, RF_Thread_t, struct vnode **); -int raidmarkclean(dev_t dev, struct vnode *b_vp, int); -int raidmarkdirty(dev_t dev, struct vnode *b_vp, int); -void raid_init_component_label(RF_Raid_t *, RF_ComponentLabel_t *); -void rf_print_component_label(RF_ComponentLabel_t *); -void rf_UnconfigureVnodes( RF_Raid_t * ); -void rf_close_component( RF_Raid_t *, struct vnode *, int); -void rf_disk_unbusy(RF_RaidAccessDesc_t *); -int raid_getcomponentsize(RF_Raid_t *, RF_RowCol_t, RF_RowCol_t); -#endif /* _RF__RF_KINTF_H_ */ diff --git a/sys/dev/raidframe/rf_layout.c b/sys/dev/raidframe/rf_layout.c deleted file mode 100644 index 53badbd..0000000 --- a/sys/dev/raidframe/rf_layout.c +++ /dev/null @@ -1,492 +0,0 @@ -/* $NetBSD: rf_layout.c,v 1.9 2001/01/27 19:34:43 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_layout.c -- driver code dealing with layout and mapping issues - */ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_configure.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_desc.h> -#include <dev/raidframe/rf_decluster.h> -#include <dev/raidframe/rf_pq.h> -#include <dev/raidframe/rf_declusterPQ.h> -#include <dev/raidframe/rf_raid0.h> -#include <dev/raidframe/rf_raid1.h> -#include <dev/raidframe/rf_raid4.h> -#include <dev/raidframe/rf_raid5.h> -#include <dev/raidframe/rf_states.h> -#if RF_INCLUDE_RAID5_RS > 0 -#include <dev/raidframe/rf_raid5_rotatedspare.h> -#endif /* RF_INCLUDE_RAID5_RS > 0 */ -#if RF_INCLUDE_CHAINDECLUSTER > 0 -#include <dev/raidframe/rf_chaindecluster.h> -#endif /* RF_INCLUDE_CHAINDECLUSTER > 0 */ -#if RF_INCLUDE_INTERDECLUSTER > 0 -#include <dev/raidframe/rf_interdecluster.h> -#endif /* RF_INCLUDE_INTERDECLUSTER > 0 */ -#if RF_INCLUDE_PARITYLOGGING > 0 -#include <dev/raidframe/rf_paritylogging.h> -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ -#if RF_INCLUDE_EVENODD > 0 -#include <dev/raidframe/rf_evenodd.h> -#endif /* RF_INCLUDE_EVENODD > 0 */ -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_driver.h> -#include <dev/raidframe/rf_parityscan.h> -#include <dev/raidframe/rf_reconbuffer.h> -#include <dev/raidframe/rf_reconutil.h> - -/*********************************************************************** - * - * the layout switch defines all the layouts that are supported. - * fields are: layout ID, init routine, shutdown routine, map - * sector, map parity, identify stripe, dag selection, map stripeid - * to parity stripe id (optional), num faults tolerated, special - * flags. - * - ***********************************************************************/ - -static RF_AccessState_t DefaultStates[] = {rf_QuiesceState, - rf_IncrAccessesCountState, - rf_MapState, - rf_LockState, - rf_CreateDAGState, - rf_ExecuteDAGState, - rf_ProcessDAGState, - rf_DecrAccessesCountState, - rf_CleanupState, - rf_LastState}; - -#define RF_NU(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p - -/* Note that if you add any new RAID types to this list, that you must - also update the mapsw[] table in the raidctl sources */ - -static RF_LayoutSW_t mapsw[] = { -#if RF_INCLUDE_PARITY_DECLUSTERING > 0 - /* parity declustering */ - {'T', "Parity declustering", - RF_NU( - rf_ConfigureDeclustered, - rf_MapSectorDeclustered, rf_MapParityDeclustered, NULL, - rf_IdentifyStripeDeclustered, - rf_RaidFiveDagSelect, - rf_MapSIDToPSIDDeclustered, - rf_GetDefaultHeadSepLimitDeclustered, - rf_GetDefaultNumFloatingReconBuffersDeclustered, - NULL, NULL, - rf_SubmitReconBufferBasic, - rf_VerifyParityBasic, - 1, - DefaultStates, - 0) - }, -#endif - -#if RF_INCLUDE_PARITY_DECLUSTERING_DS > 0 - /* parity declustering with distributed sparing */ - {'D', "Distributed sparing parity declustering", - RF_NU( - rf_ConfigureDeclusteredDS, - rf_MapSectorDeclustered, rf_MapParityDeclustered, NULL, - rf_IdentifyStripeDeclustered, - rf_RaidFiveDagSelect, - rf_MapSIDToPSIDDeclustered, - rf_GetDefaultHeadSepLimitDeclustered, - rf_GetDefaultNumFloatingReconBuffersDeclustered, - rf_GetNumSpareRUsDeclustered, rf_InstallSpareTable, - rf_SubmitReconBufferBasic, - rf_VerifyParityBasic, - 1, - DefaultStates, - RF_DISTRIBUTE_SPARE | RF_BD_DECLUSTERED) - }, -#endif - -#if RF_INCLUDE_DECL_PQ > 0 - /* declustered P+Q */ - {'Q', "Declustered P+Q", - RF_NU( - rf_ConfigureDeclusteredPQ, - rf_MapSectorDeclusteredPQ, rf_MapParityDeclusteredPQ, rf_MapQDeclusteredPQ, - rf_IdentifyStripeDeclusteredPQ, - rf_PQDagSelect, - rf_MapSIDToPSIDDeclustered, - rf_GetDefaultHeadSepLimitDeclustered, - rf_GetDefaultNumFloatingReconBuffersPQ, - NULL, NULL, - NULL, - rf_VerifyParityBasic, - 2, - DefaultStates, - 0) - }, -#endif /* RF_INCLUDE_DECL_PQ > 0 */ - -#if RF_INCLUDE_RAID5_RS > 0 - /* RAID 5 with rotated sparing */ - {'R', "RAID Level 5 rotated sparing", - RF_NU( - rf_ConfigureRAID5_RS, - rf_MapSectorRAID5_RS, rf_MapParityRAID5_RS, NULL, - rf_IdentifyStripeRAID5_RS, - rf_RaidFiveDagSelect, - rf_MapSIDToPSIDRAID5_RS, - rf_GetDefaultHeadSepLimitRAID5, - rf_GetDefaultNumFloatingReconBuffersRAID5, - rf_GetNumSpareRUsRAID5_RS, NULL, - rf_SubmitReconBufferBasic, - rf_VerifyParityBasic, - 1, - DefaultStates, - RF_DISTRIBUTE_SPARE) - }, -#endif /* RF_INCLUDE_RAID5_RS > 0 */ - -#if RF_INCLUDE_CHAINDECLUSTER > 0 - /* Chained Declustering */ - {'C', "Chained Declustering", - RF_NU( - rf_ConfigureChainDecluster, - rf_MapSectorChainDecluster, rf_MapParityChainDecluster, NULL, - rf_IdentifyStripeChainDecluster, - rf_RAIDCDagSelect, - rf_MapSIDToPSIDChainDecluster, - NULL, - NULL, - rf_GetNumSpareRUsChainDecluster, NULL, - rf_SubmitReconBufferBasic, - rf_VerifyParityBasic, - 1, - DefaultStates, - 0) - }, -#endif /* RF_INCLUDE_CHAINDECLUSTER > 0 */ - -#if RF_INCLUDE_INTERDECLUSTER > 0 - /* Interleaved Declustering */ - {'I', "Interleaved Declustering", - RF_NU( - rf_ConfigureInterDecluster, - rf_MapSectorInterDecluster, rf_MapParityInterDecluster, NULL, - rf_IdentifyStripeInterDecluster, - rf_RAIDIDagSelect, - rf_MapSIDToPSIDInterDecluster, - rf_GetDefaultHeadSepLimitInterDecluster, - rf_GetDefaultNumFloatingReconBuffersInterDecluster, - rf_GetNumSpareRUsInterDecluster, NULL, - rf_SubmitReconBufferBasic, - rf_VerifyParityBasic, - 1, - DefaultStates, - RF_DISTRIBUTE_SPARE) - }, -#endif /* RF_INCLUDE_INTERDECLUSTER > 0 */ - -#if RF_INCLUDE_RAID0 > 0 - /* RAID level 0 */ - {'0', "RAID Level 0", - RF_NU( - rf_ConfigureRAID0, - rf_MapSectorRAID0, rf_MapParityRAID0, NULL, - rf_IdentifyStripeRAID0, - rf_RAID0DagSelect, - rf_MapSIDToPSIDRAID0, - NULL, - NULL, - NULL, NULL, - NULL, - rf_VerifyParityRAID0, - 0, - DefaultStates, - 0) - }, -#endif /* RF_INCLUDE_RAID0 > 0 */ - -#if RF_INCLUDE_RAID1 > 0 - /* RAID level 1 */ - {'1', "RAID Level 1", - RF_NU( - rf_ConfigureRAID1, - rf_MapSectorRAID1, rf_MapParityRAID1, NULL, - rf_IdentifyStripeRAID1, - rf_RAID1DagSelect, - rf_MapSIDToPSIDRAID1, - NULL, - NULL, - NULL, NULL, - rf_SubmitReconBufferRAID1, - rf_VerifyParityRAID1, - 1, - DefaultStates, - 0) - }, -#endif /* RF_INCLUDE_RAID1 > 0 */ - -#if RF_INCLUDE_RAID4 > 0 - /* RAID level 4 */ - {'4', "RAID Level 4", - RF_NU( - rf_ConfigureRAID4, - rf_MapSectorRAID4, rf_MapParityRAID4, NULL, - rf_IdentifyStripeRAID4, - rf_RaidFiveDagSelect, - rf_MapSIDToPSIDRAID4, - rf_GetDefaultHeadSepLimitRAID4, - rf_GetDefaultNumFloatingReconBuffersRAID4, - NULL, NULL, - rf_SubmitReconBufferBasic, - rf_VerifyParityBasic, - 1, - DefaultStates, - 0) - }, -#endif /* RF_INCLUDE_RAID4 > 0 */ - -#if RF_INCLUDE_RAID5 > 0 - /* RAID level 5 */ - {'5', "RAID Level 5", - RF_NU( - rf_ConfigureRAID5, - rf_MapSectorRAID5, rf_MapParityRAID5, NULL, - rf_IdentifyStripeRAID5, - rf_RaidFiveDagSelect, - rf_MapSIDToPSIDRAID5, - rf_GetDefaultHeadSepLimitRAID5, - rf_GetDefaultNumFloatingReconBuffersRAID5, - NULL, NULL, - rf_SubmitReconBufferBasic, - rf_VerifyParityBasic, - 1, - DefaultStates, - 0) - }, -#endif /* RF_INCLUDE_RAID5 > 0 */ - -#if RF_INCLUDE_EVENODD > 0 - /* Evenodd */ - {'E', "EvenOdd", - RF_NU( - rf_ConfigureEvenOdd, - rf_MapSectorRAID5, rf_MapParityEvenOdd, rf_MapEEvenOdd, - rf_IdentifyStripeEvenOdd, - rf_EODagSelect, - rf_MapSIDToPSIDRAID5, - NULL, - NULL, - NULL, NULL, - NULL, /* no reconstruction, yet */ - rf_VerifyParityEvenOdd, - 2, - DefaultStates, - 0) - }, -#endif /* RF_INCLUDE_EVENODD > 0 */ - -#if RF_INCLUDE_EVENODD > 0 - /* Declustered Evenodd */ - {'e', "Declustered EvenOdd", - RF_NU( - rf_ConfigureDeclusteredPQ, - rf_MapSectorDeclusteredPQ, rf_MapParityDeclusteredPQ, rf_MapQDeclusteredPQ, - rf_IdentifyStripeDeclusteredPQ, - rf_EODagSelect, - rf_MapSIDToPSIDRAID5, - rf_GetDefaultHeadSepLimitDeclustered, - rf_GetDefaultNumFloatingReconBuffersPQ, - NULL, NULL, - NULL, /* no reconstruction, yet */ - rf_VerifyParityEvenOdd, - 2, - DefaultStates, - 0) - }, -#endif /* RF_INCLUDE_EVENODD > 0 */ - -#if RF_INCLUDE_PARITYLOGGING > 0 - /* parity logging */ - {'L', "Parity logging", - RF_NU( - rf_ConfigureParityLogging, - rf_MapSectorParityLogging, rf_MapParityParityLogging, NULL, - rf_IdentifyStripeParityLogging, - rf_ParityLoggingDagSelect, - rf_MapSIDToPSIDParityLogging, - rf_GetDefaultHeadSepLimitParityLogging, - rf_GetDefaultNumFloatingReconBuffersParityLogging, - NULL, NULL, - rf_SubmitReconBufferBasic, - NULL, - 1, - DefaultStates, - 0) - }, -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ - - /* end-of-list marker */ - {'\0', NULL, - RF_NU( - NULL, - NULL, NULL, NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, NULL, - NULL, - NULL, - 0, - NULL, - 0) - } -}; - -RF_LayoutSW_t * -rf_GetLayout(RF_ParityConfig_t parityConfig) -{ - RF_LayoutSW_t *p; - - /* look up the specific layout */ - for (p = &mapsw[0]; p->parityConfig; p++) - if (p->parityConfig == parityConfig) - break; - if (!p->parityConfig) - return (NULL); - RF_ASSERT(p->parityConfig == parityConfig); - return (p); -} - -/***************************************************************************** - * - * ConfigureLayout -- - * - * read the configuration file and set up the RAID layout parameters. - * After reading common params, invokes the layout-specific - * configuration routine to finish the configuration. - * - ****************************************************************************/ -int -rf_ConfigureLayout( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_ParityConfig_t parityConfig; - RF_LayoutSW_t *p; - int retval; - - layoutPtr->sectorsPerStripeUnit = cfgPtr->sectPerSU; - layoutPtr->SUsPerPU = cfgPtr->SUsPerPU; - layoutPtr->SUsPerRU = cfgPtr->SUsPerRU; - parityConfig = cfgPtr->parityConfig; - - if (layoutPtr->sectorsPerStripeUnit <= 0) { - RF_ERRORMSG2("raid%d: Invalid sectorsPerStripeUnit: %d\n", - raidPtr->raidid, - (int)layoutPtr->sectorsPerStripeUnit ); - return (EINVAL); - } - - layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit; - - p = rf_GetLayout(parityConfig); - if (p == NULL) { - RF_ERRORMSG1("Unknown parity configuration '%c'", parityConfig); - return (EINVAL); - } - RF_ASSERT(p->parityConfig == parityConfig); - layoutPtr->map = p; - - /* initialize the specific layout */ - - retval = (p->Configure) (listp, raidPtr, cfgPtr); - - if (retval) - return (retval); - - layoutPtr->dataBytesPerStripe = layoutPtr->dataSectorsPerStripe << raidPtr->logBytesPerSector; - raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; - - if (rf_forceNumFloatingReconBufs >= 0) { - raidPtr->numFloatingReconBufs = rf_forceNumFloatingReconBufs; - } else { - raidPtr->numFloatingReconBufs = rf_GetDefaultNumFloatingReconBuffers(raidPtr); - } - - if (rf_forceHeadSepLimit >= 0) { - raidPtr->headSepLimit = rf_forceHeadSepLimit; - } else { - raidPtr->headSepLimit = rf_GetDefaultHeadSepLimit(raidPtr); - } - - printf("RAIDFRAME: Configure (%s): total number of sectors is %lu (%lu MB)\n", - layoutPtr->map->configName, - (unsigned long) raidPtr->totalSectors, - (unsigned long) (raidPtr->totalSectors / 1024 * (1 << raidPtr->logBytesPerSector) / 1024)); - if (raidPtr->headSepLimit >= 0) { - printf("RAIDFRAME(%s): Using %ld floating recon bufs with head sep limit %ld\n", - layoutPtr->map->configName, (long) raidPtr->numFloatingReconBufs, (long) raidPtr->headSepLimit); - } else { - printf("RAIDFRAME(%s): Using %ld floating recon bufs with no head sep limit\n", - layoutPtr->map->configName, (long) raidPtr->numFloatingReconBufs); - } - - return (0); -} -/* typically there is a 1-1 mapping between stripes and parity stripes. - * however, the declustering code supports packing multiple stripes into - * a single parity stripe, so as to increase the size of the reconstruction - * unit without affecting the size of the stripe unit. This routine finds - * the parity stripe identifier associated with a stripe ID. There is also - * a RaidAddressToParityStripeID macro in layout.h - */ -RF_StripeNum_t -rf_MapStripeIDToParityStripeID(layoutPtr, stripeID, which_ru) - RF_RaidLayout_t *layoutPtr; - RF_StripeNum_t stripeID; - RF_ReconUnitNum_t *which_ru; -{ - RF_StripeNum_t parityStripeID; - - /* quick exit in the common case of SUsPerPU==1 */ - if ((layoutPtr->SUsPerPU == 1) || !layoutPtr->map->MapSIDToPSID) { - *which_ru = 0; - return (stripeID); - } else { - (layoutPtr->map->MapSIDToPSID) (layoutPtr, stripeID, &parityStripeID, which_ru); - } - return (parityStripeID); -} diff --git a/sys/dev/raidframe/rf_layout.h b/sys/dev/raidframe/rf_layout.h deleted file mode 100644 index 2482556..0000000 --- a/sys/dev/raidframe/rf_layout.h +++ /dev/null @@ -1,349 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_layout.h,v 1.5 2001/01/26 04:14:14 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_layout.h -- header file defining layout data structures - */ - -#ifndef _RF__RF_LAYOUT_H_ -#define _RF__RF_LAYOUT_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_alloclist.h> - -#ifndef _KERNEL -#include <stdio.h> -#endif - -/***************************************************************************************** - * - * This structure identifies all layout-specific operations and parameters. - * - ****************************************************************************************/ - -typedef struct RF_LayoutSW_s { - RF_ParityConfig_t parityConfig; - const char *configName; - -#ifndef _KERNEL - /* layout-specific parsing */ - int (*MakeLayoutSpecific) (FILE * fp, RF_Config_t * cfgPtr, void *arg); - void *makeLayoutSpecificArg; -#endif /* !KERNEL */ - -#if RF_UTILITY == 0 - /* initialization routine */ - int (*Configure) (RF_ShutdownList_t ** shutdownListp, RF_Raid_t * raidPtr, RF_Config_t * cfgPtr); - - /* routine to map RAID sector address -> physical (row, col, offset) */ - void (*MapSector) (RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); - - /* routine to map RAID sector address -> physical (r,c,o) of parity - * unit */ - void (*MapParity) (RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); - - /* routine to map RAID sector address -> physical (r,c,o) of Q unit */ - void (*MapQ) (RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, RF_RowCol_t * row, - RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); - - /* routine to identify the disks comprising a stripe */ - void (*IdentifyStripe) (RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); - - /* routine to select a dag */ - void (*SelectionFunc) (RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, - RF_VoidFuncPtr *); -#if 0 - void (**createFunc) (RF_Raid_t *, - RF_AccessStripeMap_t *, - RF_DagHeader_t *, void *, - RF_RaidAccessFlags_t, - RF_AllocListElem_t *); - -#endif - - /* map a stripe ID to a parity stripe ID. This is typically the - * identity mapping */ - void (*MapSIDToPSID) (RF_RaidLayout_t * layoutPtr, RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, RF_ReconUnitNum_t * which_ru); - - /* get default head separation limit (may be NULL) */ - RF_HeadSepLimit_t(*GetDefaultHeadSepLimit) (RF_Raid_t * raidPtr); - - /* get default num recon buffers (may be NULL) */ - int (*GetDefaultNumFloatingReconBuffers) (RF_Raid_t * raidPtr); - - /* get number of spare recon units (may be NULL) */ - RF_ReconUnitCount_t(*GetNumSpareRUs) (RF_Raid_t * raidPtr); - - /* spare table installation (may be NULL) */ - int (*InstallSpareTable) (RF_Raid_t * raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol); - - /* recon buffer submission function */ - int (*SubmitReconBuffer) (RF_ReconBuffer_t * rbuf, int keep_it, - int use_committed); - - /* - * verify that parity information for a stripe is correct - * see rf_parityscan.h for return vals - */ - int (*VerifyParity) (RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t * parityPDA, int correct_it, RF_RaidAccessFlags_t flags); - - /* number of faults tolerated by this mapping */ - int faultsTolerated; - - /* states to step through in an access. Must end with "LastState". The - * default is DefaultStates in rf_layout.c */ - RF_AccessState_t *states; - - RF_AccessStripeMapFlags_t flags; -#endif /* RF_UTILITY == 0 */ -} RF_LayoutSW_t; -/* enables remapping to spare location under dist sparing */ -#define RF_REMAP 1 -#define RF_DONT_REMAP 0 - -/* - * Flags values for RF_AccessStripeMapFlags_t - */ -#define RF_NO_STRIPE_LOCKS 0x0001 /* suppress stripe locks */ -#define RF_DISTRIBUTE_SPARE 0x0002 /* distribute spare space in archs - * that support it */ -#define RF_BD_DECLUSTERED 0x0004 /* declustering uses block designs */ - -/************************************************************************* - * - * this structure forms the layout component of the main Raid - * structure. It describes everything needed to define and perform - * the mapping of logical RAID addresses <-> physical disk addresses. - * - *************************************************************************/ -struct RF_RaidLayout_s { - /* configuration parameters */ - RF_SectorCount_t sectorsPerStripeUnit; /* number of sectors in one - * stripe unit */ - RF_StripeCount_t SUsPerPU; /* stripe units per parity unit */ - RF_StripeCount_t SUsPerRU; /* stripe units per reconstruction - * unit */ - - /* redundant-but-useful info computed from the above, used in all - * layouts */ - RF_StripeCount_t numStripe; /* total number of stripes in the - * array */ - RF_SectorCount_t dataSectorsPerStripe; - RF_StripeCount_t dataStripeUnitsPerDisk; - u_int bytesPerStripeUnit; - u_int dataBytesPerStripe; - RF_StripeCount_t numDataCol; /* number of SUs of data per stripe - * (name here is a la RAID4) */ - RF_StripeCount_t numParityCol; /* number of SUs of parity per stripe. - * Always 1 for now */ - RF_StripeCount_t numParityLogCol; /* number of SUs of parity log - * per stripe. Always 1 for - * now */ - RF_StripeCount_t stripeUnitsPerDisk; - - RF_LayoutSW_t *map; /* ptr to struct holding mapping fns and - * information */ - void *layoutSpecificInfo; /* ptr to a structure holding - * layout-specific params */ -}; -/***************************************************************************************** - * - * The mapping code returns a pointer to a list of AccessStripeMap structures, which - * describes all the mapping information about an access. The list contains one - * AccessStripeMap structure per stripe touched by the access. Each element in the list - * contains a stripe identifier and a pointer to a list of PhysDiskAddr structuress. Each - * element in this latter list describes the physical location of a stripe unit accessed - * within the corresponding stripe. - * - ****************************************************************************************/ - -#define RF_PDA_TYPE_DATA 0 -#define RF_PDA_TYPE_PARITY 1 -#define RF_PDA_TYPE_Q 2 - -struct RF_PhysDiskAddr_s { - RF_RowCol_t row, col; /* disk identifier */ - RF_SectorNum_t startSector; /* sector offset into the disk */ - RF_SectorCount_t numSector; /* number of sectors accessed */ - int type; /* used by higher levels: currently, data, - * parity, or q */ - caddr_t bufPtr; /* pointer to buffer supplying/receiving data */ - RF_RaidAddr_t raidAddress; /* raid address corresponding to this - * physical disk address */ - RF_PhysDiskAddr_t *next; -}; -#define RF_MAX_FAILED_PDA RF_MAXCOL - -struct RF_AccessStripeMap_s { - RF_StripeNum_t stripeID;/* the stripe index */ - RF_RaidAddr_t raidAddress; /* the starting raid address within - * this stripe */ - RF_RaidAddr_t endRaidAddress; /* raid address one sector past the - * end of the access */ - RF_SectorCount_t totalSectorsAccessed; /* total num sectors - * identified in physInfo list */ - RF_StripeCount_t numStripeUnitsAccessed; /* total num elements in - * physInfo list */ - int numDataFailed; /* number of failed data disks accessed */ - int numParityFailed;/* number of failed parity disks accessed (0 - * or 1) */ - int numQFailed; /* number of failed Q units accessed (0 or 1) */ - RF_AccessStripeMapFlags_t flags; /* various flags */ -#if 0 - RF_PhysDiskAddr_t *failedPDA; /* points to the PDA that has failed */ - RF_PhysDiskAddr_t *failedPDAtwo; /* points to the second PDA - * that has failed, if any */ -#else - int numFailedPDAs; /* number of failed phys addrs */ - RF_PhysDiskAddr_t *failedPDAs[RF_MAX_FAILED_PDA]; /* array of failed phys - * addrs */ -#endif - RF_PhysDiskAddr_t *physInfo; /* a list of PhysDiskAddr structs */ - RF_PhysDiskAddr_t *parityInfo; /* list of physical addrs for the - * parity (P of P + Q ) */ - RF_PhysDiskAddr_t *qInfo; /* list of physical addrs for the Q of - * P + Q */ - RF_LockReqDesc_t lockReqDesc; /* used for stripe locking */ - RF_RowCol_t origRow; /* the original row: we may redirect the acc - * to a different row */ - RF_AccessStripeMap_t *next; -}; -/* flag values */ -#define RF_ASM_REDIR_LARGE_WRITE 0x00000001 /* allows large-write creation - * code to redirect failed - * accs */ -#define RF_ASM_BAILOUT_DAG_USED 0x00000002 /* allows us to detect - * recursive calls to the - * bailout write dag */ -#define RF_ASM_FLAGS_LOCK_TRIED 0x00000004 /* we've acquired the lock on - * the first parity range in - * this parity stripe */ -#define RF_ASM_FLAGS_LOCK_TRIED2 0x00000008 /* we've acquired the lock on - * the 2nd parity range in - * this parity stripe */ -#define RF_ASM_FLAGS_FORCE_TRIED 0x00000010 /* we've done the force-recon - * call on this parity stripe */ -#define RF_ASM_FLAGS_RECON_BLOCKED 0x00000020 /* we blocked recon => we must - * unblock it later */ - -struct RF_AccessStripeMapHeader_s { - RF_StripeCount_t numStripes; /* total number of stripes touched by - * this acc */ - RF_AccessStripeMap_t *stripeMap; /* pointer to the actual map. - * Also used for making lists */ - RF_AccessStripeMapHeader_t *next; -}; -/***************************************************************************************** - * - * various routines mapping addresses in the RAID address space. These work across - * all layouts. DON'T PUT ANY LAYOUT-SPECIFIC CODE HERE. - * - ****************************************************************************************/ - -/* return the identifier of the stripe containing the given address */ -#define rf_RaidAddressToStripeID(_layoutPtr_, _addr_) \ - ( ((_addr_) / (_layoutPtr_)->sectorsPerStripeUnit) / (_layoutPtr_)->numDataCol ) - -/* return the raid address of the start of the indicates stripe ID */ -#define rf_StripeIDToRaidAddress(_layoutPtr_, _sid_) \ - ( ((_sid_) * (_layoutPtr_)->sectorsPerStripeUnit) * (_layoutPtr_)->numDataCol ) - -/* return the identifier of the stripe containing the given stripe unit id */ -#define rf_StripeUnitIDToStripeID(_layoutPtr_, _addr_) \ - ( (_addr_) / (_layoutPtr_)->numDataCol ) - -/* return the identifier of the stripe unit containing the given address */ -#define rf_RaidAddressToStripeUnitID(_layoutPtr_, _addr_) \ - ( ((_addr_) / (_layoutPtr_)->sectorsPerStripeUnit) ) - -/* return the RAID address of next stripe boundary beyond the given address */ -#define rf_RaidAddressOfNextStripeBoundary(_layoutPtr_, _addr_) \ - ( (((_addr_)/(_layoutPtr_)->dataSectorsPerStripe)+1) * (_layoutPtr_)->dataSectorsPerStripe ) - -/* return the RAID address of the start of the stripe containing the given address */ -#define rf_RaidAddressOfPrevStripeBoundary(_layoutPtr_, _addr_) \ - ( (((_addr_)/(_layoutPtr_)->dataSectorsPerStripe)+0) * (_layoutPtr_)->dataSectorsPerStripe ) - -/* return the RAID address of next stripe unit boundary beyond the given address */ -#define rf_RaidAddressOfNextStripeUnitBoundary(_layoutPtr_, _addr_) \ - ( (((_addr_)/(_layoutPtr_)->sectorsPerStripeUnit)+1L)*(_layoutPtr_)->sectorsPerStripeUnit ) - -/* return the RAID address of the start of the stripe unit containing RAID address _addr_ */ -#define rf_RaidAddressOfPrevStripeUnitBoundary(_layoutPtr_, _addr_) \ - ( (((_addr_)/(_layoutPtr_)->sectorsPerStripeUnit)+0)*(_layoutPtr_)->sectorsPerStripeUnit ) - -/* returns the offset into the stripe. used by RaidAddressStripeAligned */ -#define rf_RaidAddressStripeOffset(_layoutPtr_, _addr_) \ - ( (_addr_) % ((_layoutPtr_)->dataSectorsPerStripe) ) - -/* returns the offset into the stripe unit. */ -#define rf_StripeUnitOffset(_layoutPtr_, _addr_) \ - ( (_addr_) % ((_layoutPtr_)->sectorsPerStripeUnit) ) - -/* returns nonzero if the given RAID address is stripe-aligned */ -#define rf_RaidAddressStripeAligned( __layoutPtr__, __addr__ ) \ - ( rf_RaidAddressStripeOffset(__layoutPtr__, __addr__) == 0 ) - -/* returns nonzero if the given address is stripe-unit aligned */ -#define rf_StripeUnitAligned( __layoutPtr__, __addr__ ) \ - ( rf_StripeUnitOffset(__layoutPtr__, __addr__) == 0 ) - -/* convert an address expressed in RAID blocks to/from an addr expressed in bytes */ -#define rf_RaidAddressToByte(_raidPtr_, _addr_) \ - ( (_addr_) << ( (_raidPtr_)->logBytesPerSector ) ) - -#define rf_ByteToRaidAddress(_raidPtr_, _addr_) \ - ( (_addr_) >> ( (_raidPtr_)->logBytesPerSector ) ) - -/* convert a raid address to/from a parity stripe ID. Conversion to raid address is easy, - * since we're asking for the address of the first sector in the parity stripe. Conversion to a - * parity stripe ID is more complex, since stripes are not contiguously allocated in - * parity stripes. - */ -#define rf_RaidAddressToParityStripeID(_layoutPtr_, _addr_, _ru_num_) \ - rf_MapStripeIDToParityStripeID( (_layoutPtr_), rf_RaidAddressToStripeID( (_layoutPtr_), (_addr_) ), (_ru_num_) ) - -#define rf_ParityStripeIDToRaidAddress(_layoutPtr_, _psid_) \ - ( (_psid_) * (_layoutPtr_)->SUsPerPU * (_layoutPtr_)->numDataCol * (_layoutPtr_)->sectorsPerStripeUnit ) - -RF_LayoutSW_t *rf_GetLayout(RF_ParityConfig_t parityConfig); -int -rf_ConfigureLayout(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -RF_StripeNum_t -rf_MapStripeIDToParityStripeID(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_ReconUnitNum_t * which_ru); - -#endif /* !_RF__RF_LAYOUT_H_ */ diff --git a/sys/dev/raidframe/rf_map.c b/sys/dev/raidframe/rf_map.c deleted file mode 100644 index 22af549..0000000 --- a/sys/dev/raidframe/rf_map.c +++ /dev/null @@ -1,909 +0,0 @@ -/* $NetBSD: rf_map.c,v 1.5 2000/06/29 00:22:27 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/************************************************************************** - * - * map.c -- main code for mapping RAID addresses to physical disk addresses - * - **************************************************************************/ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_map.h> -#include <dev/raidframe/rf_freelist.h> -#include <dev/raidframe/rf_shutdown.h> - -static void rf_FreePDAList(RF_PhysDiskAddr_t * start, RF_PhysDiskAddr_t * end, int count); -static void -rf_FreeASMList(RF_AccessStripeMap_t * start, RF_AccessStripeMap_t * end, - int count); - -/***************************************************************************************** - * - * MapAccess -- main 1st order mapping routine. - * - * Maps an access in the RAID address space to the corresponding set of physical disk - * addresses. The result is returned as a list of AccessStripeMap structures, one per - * stripe accessed. Each ASM structure contains a pointer to a list of PhysDiskAddr - * structures, which describe the physical locations touched by the user access. Note - * that this routine returns only static mapping information, i.e. the list of physical - * addresses returned does not necessarily identify the set of physical locations that - * will actually be read or written. - * - * The routine also maps the parity. The physical disk location returned always - * indicates the entire parity unit, even when only a subset of it is being accessed. - * This is because an access that is not stripe unit aligned but that spans a stripe - * unit boundary may require access two distinct portions of the parity unit, and we - * can't yet tell which portion(s) we'll actually need. We leave it up to the algorithm - * selection code to decide what subset of the parity unit to access. - * - * Note that addresses in the RAID address space must always be maintained as - * longs, instead of ints. - * - * This routine returns NULL if numBlocks is 0 - * - ****************************************************************************************/ - -RF_AccessStripeMapHeader_t * -rf_MapAccess(raidPtr, raidAddress, numBlocks, buffer, remap) - RF_Raid_t *raidPtr; - RF_RaidAddr_t raidAddress; /* starting address in RAID address - * space */ - RF_SectorCount_t numBlocks; /* number of blocks in RAID address - * space to access */ - caddr_t buffer; /* buffer to supply/receive data */ - int remap; /* 1 => remap addresses to spare space */ -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_AccessStripeMapHeader_t *asm_hdr = NULL; - RF_AccessStripeMap_t *asm_list = NULL, *asm_p = NULL; - int faultsTolerated = layoutPtr->map->faultsTolerated; - RF_RaidAddr_t startAddress = raidAddress; /* we'll change - * raidAddress along the - * way */ - RF_RaidAddr_t endAddress = raidAddress + numBlocks; - RF_RaidDisk_t **disks = raidPtr->Disks; - - RF_PhysDiskAddr_t *pda_p, *pda_q; - RF_StripeCount_t numStripes = 0; - RF_RaidAddr_t stripeRealEndAddress, stripeEndAddress, nextStripeUnitAddress; - RF_RaidAddr_t startAddrWithinStripe, lastRaidAddr; - RF_StripeCount_t totStripes; - RF_StripeNum_t stripeID, lastSID, SUID, lastSUID; - RF_AccessStripeMap_t *asmList, *t_asm; - RF_PhysDiskAddr_t *pdaList, *t_pda; - - /* allocate all the ASMs and PDAs up front */ - lastRaidAddr = raidAddress + numBlocks - 1; - stripeID = rf_RaidAddressToStripeID(layoutPtr, raidAddress); - lastSID = rf_RaidAddressToStripeID(layoutPtr, lastRaidAddr); - totStripes = lastSID - stripeID + 1; - SUID = rf_RaidAddressToStripeUnitID(layoutPtr, raidAddress); - lastSUID = rf_RaidAddressToStripeUnitID(layoutPtr, lastRaidAddr); - - asmList = rf_AllocASMList(totStripes); - pdaList = rf_AllocPDAList(lastSUID - SUID + 1 + faultsTolerated * totStripes); /* may also need pda(s) - * per stripe for parity */ - - if (raidAddress + numBlocks > raidPtr->totalSectors) { - RF_ERRORMSG1("Unable to map access because offset (%d) was invalid\n", - (int) raidAddress); - return (NULL); - } - if (rf_mapDebug) - rf_PrintRaidAddressInfo(raidPtr, raidAddress, numBlocks); - for (; raidAddress < endAddress;) { - /* make the next stripe structure */ - RF_ASSERT(asmList); - t_asm = asmList; - asmList = asmList->next; - bzero((char *) t_asm, sizeof(RF_AccessStripeMap_t)); - if (!asm_p) - asm_list = asm_p = t_asm; - else { - asm_p->next = t_asm; - asm_p = asm_p->next; - } - numStripes++; - - /* map SUs from current location to the end of the stripe */ - asm_p->stripeID = /* rf_RaidAddressToStripeID(layoutPtr, - raidAddress) */ stripeID++; - stripeRealEndAddress = rf_RaidAddressOfNextStripeBoundary(layoutPtr, raidAddress); - stripeEndAddress = RF_MIN(endAddress, stripeRealEndAddress); - asm_p->raidAddress = raidAddress; - asm_p->endRaidAddress = stripeEndAddress; - - /* map each stripe unit in the stripe */ - pda_p = NULL; - startAddrWithinStripe = raidAddress; /* Raid addr of start of - * portion of access - * that is within this - * stripe */ - for (; raidAddress < stripeEndAddress;) { - RF_ASSERT(pdaList); - t_pda = pdaList; - pdaList = pdaList->next; - bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t)); - if (!pda_p) - asm_p->physInfo = pda_p = t_pda; - else { - pda_p->next = t_pda; - pda_p = pda_p->next; - } - - pda_p->type = RF_PDA_TYPE_DATA; - (layoutPtr->map->MapSector) (raidPtr, raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), remap); - - /* mark any failures we find. failedPDA is don't-care - * if there is more than one failure */ - pda_p->raidAddress = raidAddress; /* the RAID address - * corresponding to this - * physical disk address */ - nextStripeUnitAddress = rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, raidAddress); - pda_p->numSector = RF_MIN(endAddress, nextStripeUnitAddress) - raidAddress; - RF_ASSERT(pda_p->numSector != 0); - rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 0); - pda_p->bufPtr = buffer + rf_RaidAddressToByte(raidPtr, (raidAddress - startAddress)); - asm_p->totalSectorsAccessed += pda_p->numSector; - asm_p->numStripeUnitsAccessed++; - asm_p->origRow = pda_p->row; /* redundant but - * harmless to do this - * in every loop - * iteration */ - - raidAddress = RF_MIN(endAddress, nextStripeUnitAddress); - } - - /* Map the parity. At this stage, the startSector and - * numSector fields for the parity unit are always set to - * indicate the entire parity unit. We may modify this after - * mapping the data portion. */ - switch (faultsTolerated) { - case 0: - break; - case 1: /* single fault tolerant */ - RF_ASSERT(pdaList); - t_pda = pdaList; - pdaList = pdaList->next; - bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t)); - pda_p = asm_p->parityInfo = t_pda; - pda_p->type = RF_PDA_TYPE_PARITY; - (layoutPtr->map->MapParity) (raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe), - &(pda_p->row), &(pda_p->col), &(pda_p->startSector), remap); - pda_p->numSector = layoutPtr->sectorsPerStripeUnit; - /* raidAddr may be needed to find unit to redirect to */ - pda_p->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe); - rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 1); - rf_ASMParityAdjust(asm_p->parityInfo, startAddrWithinStripe, endAddress, layoutPtr, asm_p); - - break; - case 2: /* two fault tolerant */ - RF_ASSERT(pdaList && pdaList->next); - t_pda = pdaList; - pdaList = pdaList->next; - bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t)); - pda_p = asm_p->parityInfo = t_pda; - pda_p->type = RF_PDA_TYPE_PARITY; - t_pda = pdaList; - pdaList = pdaList->next; - bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t)); - pda_q = asm_p->qInfo = t_pda; - pda_q->type = RF_PDA_TYPE_Q; - (layoutPtr->map->MapParity) (raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe), - &(pda_p->row), &(pda_p->col), &(pda_p->startSector), remap); - (layoutPtr->map->MapQ) (raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe), - &(pda_q->row), &(pda_q->col), &(pda_q->startSector), remap); - pda_q->numSector = pda_p->numSector = layoutPtr->sectorsPerStripeUnit; - /* raidAddr may be needed to find unit to redirect to */ - pda_p->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe); - pda_q->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe); - /* failure mode stuff */ - rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 1); - rf_ASMCheckStatus(raidPtr, pda_q, asm_p, disks, 1); - rf_ASMParityAdjust(asm_p->parityInfo, startAddrWithinStripe, endAddress, layoutPtr, asm_p); - rf_ASMParityAdjust(asm_p->qInfo, startAddrWithinStripe, endAddress, layoutPtr, asm_p); - break; - } - } - RF_ASSERT(asmList == NULL && pdaList == NULL); - /* make the header structure */ - asm_hdr = rf_AllocAccessStripeMapHeader(); - RF_ASSERT(numStripes == totStripes); - asm_hdr->numStripes = numStripes; - asm_hdr->stripeMap = asm_list; - - if (rf_mapDebug) - rf_PrintAccessStripeMap(asm_hdr); - return (asm_hdr); -} -/***************************************************************************************** - * This routine walks through an ASM list and marks the PDAs that have failed. - * It's called only when a disk failure causes an in-flight DAG to fail. - * The parity may consist of two components, but we want to use only one failedPDA - * pointer. Thus we set failedPDA to point to the first parity component, and rely - * on the rest of the code to do the right thing with this. - ****************************************************************************************/ - -void -rf_MarkFailuresInASMList(raidPtr, asm_h) - RF_Raid_t *raidPtr; - RF_AccessStripeMapHeader_t *asm_h; -{ - RF_RaidDisk_t **disks = raidPtr->Disks; - RF_AccessStripeMap_t *asmap; - RF_PhysDiskAddr_t *pda; - - for (asmap = asm_h->stripeMap; asmap; asmap = asmap->next) { - asmap->numDataFailed = asmap->numParityFailed = asmap->numQFailed = 0; - asmap->numFailedPDAs = 0; - bzero((char *) asmap->failedPDAs, - RF_MAX_FAILED_PDA * sizeof(RF_PhysDiskAddr_t *)); - for (pda = asmap->physInfo; pda; pda = pda->next) { - if (RF_DEAD_DISK(disks[pda->row][pda->col].status)) { - asmap->numDataFailed++; - asmap->failedPDAs[asmap->numFailedPDAs] = pda; - asmap->numFailedPDAs++; - } - } - pda = asmap->parityInfo; - if (pda && RF_DEAD_DISK(disks[pda->row][pda->col].status)) { - asmap->numParityFailed++; - asmap->failedPDAs[asmap->numFailedPDAs] = pda; - asmap->numFailedPDAs++; - } - pda = asmap->qInfo; - if (pda && RF_DEAD_DISK(disks[pda->row][pda->col].status)) { - asmap->numQFailed++; - asmap->failedPDAs[asmap->numFailedPDAs] = pda; - asmap->numFailedPDAs++; - } - } -} -/***************************************************************************************** - * - * DuplicateASM -- duplicates an ASM and returns the new one - * - ****************************************************************************************/ -RF_AccessStripeMap_t * -rf_DuplicateASM(asmap) - RF_AccessStripeMap_t *asmap; -{ - RF_AccessStripeMap_t *new_asm; - RF_PhysDiskAddr_t *pda, *new_pda, *t_pda; - - new_pda = NULL; - new_asm = rf_AllocAccessStripeMapComponent(); - bcopy((char *) asmap, (char *) new_asm, sizeof(RF_AccessStripeMap_t)); - new_asm->numFailedPDAs = 0; /* ??? */ - new_asm->failedPDAs[0] = NULL; - new_asm->physInfo = NULL; - new_asm->parityInfo = NULL; - new_asm->next = NULL; - - for (pda = asmap->physInfo; pda; pda = pda->next) { /* copy the physInfo - * list */ - t_pda = rf_AllocPhysDiskAddr(); - bcopy((char *) pda, (char *) t_pda, sizeof(RF_PhysDiskAddr_t)); - t_pda->next = NULL; - if (!new_asm->physInfo) { - new_asm->physInfo = t_pda; - new_pda = t_pda; - } else { - new_pda->next = t_pda; - new_pda = new_pda->next; - } - if (pda == asmap->failedPDAs[0]) - new_asm->failedPDAs[0] = t_pda; - } - for (pda = asmap->parityInfo; pda; pda = pda->next) { /* copy the parityInfo - * list */ - t_pda = rf_AllocPhysDiskAddr(); - bcopy((char *) pda, (char *) t_pda, sizeof(RF_PhysDiskAddr_t)); - t_pda->next = NULL; - if (!new_asm->parityInfo) { - new_asm->parityInfo = t_pda; - new_pda = t_pda; - } else { - new_pda->next = t_pda; - new_pda = new_pda->next; - } - if (pda == asmap->failedPDAs[0]) - new_asm->failedPDAs[0] = t_pda; - } - return (new_asm); -} -/***************************************************************************************** - * - * DuplicatePDA -- duplicates a PDA and returns the new one - * - ****************************************************************************************/ -RF_PhysDiskAddr_t * -rf_DuplicatePDA(pda) - RF_PhysDiskAddr_t *pda; -{ - RF_PhysDiskAddr_t *new; - - new = rf_AllocPhysDiskAddr(); - bcopy((char *) pda, (char *) new, sizeof(RF_PhysDiskAddr_t)); - return (new); -} -/***************************************************************************************** - * - * routines to allocate and free list elements. All allocation routines zero the - * structure before returning it. - * - * FreePhysDiskAddr is static. It should never be called directly, because - * FreeAccessStripeMap takes care of freeing the PhysDiskAddr list. - * - ****************************************************************************************/ - -static RF_FreeList_t *rf_asmhdr_freelist; -#define RF_MAX_FREE_ASMHDR 128 -#define RF_ASMHDR_INC 16 -#define RF_ASMHDR_INITIAL 32 - -static RF_FreeList_t *rf_asm_freelist; -#define RF_MAX_FREE_ASM 192 -#define RF_ASM_INC 24 -#define RF_ASM_INITIAL 64 - -static RF_FreeList_t *rf_pda_freelist; -#define RF_MAX_FREE_PDA 192 -#define RF_PDA_INC 24 -#define RF_PDA_INITIAL 64 - -/* called at shutdown time. So far, all that is necessary is to release all the free lists */ -static void rf_ShutdownMapModule(void *); -static void -rf_ShutdownMapModule(ignored) - void *ignored; -{ - RF_FREELIST_DESTROY(rf_asmhdr_freelist, next, (RF_AccessStripeMapHeader_t *)); - RF_FREELIST_DESTROY(rf_pda_freelist, next, (RF_PhysDiskAddr_t *)); - RF_FREELIST_DESTROY(rf_asm_freelist, next, (RF_AccessStripeMap_t *)); -} - -int -rf_ConfigureMapModule(listp) - RF_ShutdownList_t **listp; -{ - int rc; - - RF_FREELIST_CREATE(rf_asmhdr_freelist, RF_MAX_FREE_ASMHDR, - RF_ASMHDR_INC, sizeof(RF_AccessStripeMapHeader_t)); - if (rf_asmhdr_freelist == NULL) { - return (ENOMEM); - } - RF_FREELIST_CREATE(rf_asm_freelist, RF_MAX_FREE_ASM, - RF_ASM_INC, sizeof(RF_AccessStripeMap_t)); - if (rf_asm_freelist == NULL) { - RF_FREELIST_DESTROY(rf_asmhdr_freelist, next, (RF_AccessStripeMapHeader_t *)); - return (ENOMEM); - } - RF_FREELIST_CREATE(rf_pda_freelist, RF_MAX_FREE_PDA, - RF_PDA_INC, sizeof(RF_PhysDiskAddr_t)); - if (rf_pda_freelist == NULL) { - RF_FREELIST_DESTROY(rf_asmhdr_freelist, next, (RF_AccessStripeMapHeader_t *)); - RF_FREELIST_DESTROY(rf_pda_freelist, next, (RF_PhysDiskAddr_t *)); - return (ENOMEM); - } - rc = rf_ShutdownCreate(listp, rf_ShutdownMapModule, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownMapModule(NULL); - return (rc); - } - RF_FREELIST_PRIME(rf_asmhdr_freelist, RF_ASMHDR_INITIAL, next, - (RF_AccessStripeMapHeader_t *)); - RF_FREELIST_PRIME(rf_asm_freelist, RF_ASM_INITIAL, next, - (RF_AccessStripeMap_t *)); - RF_FREELIST_PRIME(rf_pda_freelist, RF_PDA_INITIAL, next, - (RF_PhysDiskAddr_t *)); - - return (0); -} - -RF_AccessStripeMapHeader_t * -rf_AllocAccessStripeMapHeader() -{ - RF_AccessStripeMapHeader_t *p; - - RF_FREELIST_GET(rf_asmhdr_freelist, p, next, (RF_AccessStripeMapHeader_t *)); - bzero((char *) p, sizeof(RF_AccessStripeMapHeader_t)); - - return (p); -} - - -void -rf_FreeAccessStripeMapHeader(p) - RF_AccessStripeMapHeader_t *p; -{ - RF_FREELIST_FREE(rf_asmhdr_freelist, p, next); -} - -RF_PhysDiskAddr_t * -rf_AllocPhysDiskAddr() -{ - RF_PhysDiskAddr_t *p; - - RF_FREELIST_GET(rf_pda_freelist, p, next, (RF_PhysDiskAddr_t *)); - bzero((char *) p, sizeof(RF_PhysDiskAddr_t)); - - return (p); -} -/* allocates a list of PDAs, locking the free list only once - * when we have to call calloc, we do it one component at a time to simplify - * the process of freeing the list at program shutdown. This should not be - * much of a performance hit, because it should be very infrequently executed. - */ -RF_PhysDiskAddr_t * -rf_AllocPDAList(count) - int count; -{ - RF_PhysDiskAddr_t *p = NULL; - - RF_FREELIST_GET_N(rf_pda_freelist, p, next, (RF_PhysDiskAddr_t *), count); - return (p); -} - -void -rf_FreePhysDiskAddr(p) - RF_PhysDiskAddr_t *p; -{ - RF_FREELIST_FREE(rf_pda_freelist, p, next); -} - -static void -rf_FreePDAList(l_start, l_end, count) - RF_PhysDiskAddr_t *l_start, *l_end; /* pointers to start and end - * of list */ - int count; /* number of elements in list */ -{ - RF_FREELIST_FREE_N(rf_pda_freelist, l_start, next, (RF_PhysDiskAddr_t *), count); -} - -RF_AccessStripeMap_t * -rf_AllocAccessStripeMapComponent() -{ - RF_AccessStripeMap_t *p; - - RF_FREELIST_GET(rf_asm_freelist, p, next, (RF_AccessStripeMap_t *)); - bzero((char *) p, sizeof(RF_AccessStripeMap_t)); - - return (p); -} -/* this is essentially identical to AllocPDAList. I should combine the two. - * when we have to call calloc, we do it one component at a time to simplify - * the process of freeing the list at program shutdown. This should not be - * much of a performance hit, because it should be very infrequently executed. - */ -RF_AccessStripeMap_t * -rf_AllocASMList(count) - int count; -{ - RF_AccessStripeMap_t *p = NULL; - - RF_FREELIST_GET_N(rf_asm_freelist, p, next, (RF_AccessStripeMap_t *), count); - return (p); -} - -void -rf_FreeAccessStripeMapComponent(p) - RF_AccessStripeMap_t *p; -{ - RF_FREELIST_FREE(rf_asm_freelist, p, next); -} - -static void -rf_FreeASMList(l_start, l_end, count) - RF_AccessStripeMap_t *l_start, *l_end; - int count; -{ - RF_FREELIST_FREE_N(rf_asm_freelist, l_start, next, (RF_AccessStripeMap_t *), count); -} - -void -rf_FreeAccessStripeMap(hdr) - RF_AccessStripeMapHeader_t *hdr; -{ - RF_AccessStripeMap_t *p, *pt = NULL; - RF_PhysDiskAddr_t *pdp, *trailer, *pdaList = NULL, *pdaEnd = NULL; - int count = 0, t, asm_count = 0; - - for (p = hdr->stripeMap; p; p = p->next) { - - /* link the 3 pda lists into the accumulating pda list */ - - if (!pdaList) - pdaList = p->qInfo; - else - pdaEnd->next = p->qInfo; - for (trailer = NULL, pdp = p->qInfo; pdp;) { - trailer = pdp; - pdp = pdp->next; - count++; - } - if (trailer) - pdaEnd = trailer; - - if (!pdaList) - pdaList = p->parityInfo; - else - pdaEnd->next = p->parityInfo; - for (trailer = NULL, pdp = p->parityInfo; pdp;) { - trailer = pdp; - pdp = pdp->next; - count++; - } - if (trailer) - pdaEnd = trailer; - - if (!pdaList) - pdaList = p->physInfo; - else - pdaEnd->next = p->physInfo; - for (trailer = NULL, pdp = p->physInfo; pdp;) { - trailer = pdp; - pdp = pdp->next; - count++; - } - if (trailer) - pdaEnd = trailer; - - pt = p; - asm_count++; - } - - /* debug only */ - for (t = 0, pdp = pdaList; pdp; pdp = pdp->next) - t++; - RF_ASSERT(t == count); - - if (pdaList) - rf_FreePDAList(pdaList, pdaEnd, count); - rf_FreeASMList(hdr->stripeMap, pt, asm_count); - rf_FreeAccessStripeMapHeader(hdr); -} -/* We can't use the large write optimization if there are any failures in the stripe. - * In the declustered layout, there is no way to immediately determine what disks - * constitute a stripe, so we actually have to hunt through the stripe looking for failures. - * The reason we map the parity instead of just using asm->parityInfo->col is because - * the latter may have been already redirected to a spare drive, which would - * mess up the computation of the stripe offset. - * - * ASSUMES AT MOST ONE FAILURE IN THE STRIPE. - */ -int -rf_CheckStripeForFailures(raidPtr, asmap) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *asmap; -{ - RF_RowCol_t trow, tcol, prow, pcol, *diskids, row, i; - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_StripeCount_t stripeOffset; - int numFailures; - RF_RaidAddr_t sosAddr; - RF_SectorNum_t diskOffset, poffset; - RF_RowCol_t testrow; - - /* quick out in the fault-free case. */ - RF_LOCK_MUTEX(raidPtr->mutex); - numFailures = raidPtr->numFailures; - RF_UNLOCK_MUTEX(raidPtr->mutex); - if (numFailures == 0) - return (0); - - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - row = asmap->physInfo->row; - (layoutPtr->map->IdentifyStripe) (raidPtr, asmap->raidAddress, &diskids, &testrow); - (layoutPtr->map->MapParity) (raidPtr, asmap->raidAddress, &prow, &pcol, &poffset, 0); /* get pcol */ - - /* this need not be true if we've redirected the access to a spare in - * another row RF_ASSERT(row == testrow); */ - stripeOffset = 0; - for (i = 0; i < layoutPtr->numDataCol + layoutPtr->numParityCol; i++) { - if (diskids[i] != pcol) { - if (RF_DEAD_DISK(raidPtr->Disks[testrow][diskids[i]].status)) { - if (raidPtr->status[testrow] != rf_rs_reconstructing) - return (1); - RF_ASSERT(raidPtr->reconControl[testrow]->fcol == diskids[i]); - layoutPtr->map->MapSector(raidPtr, - sosAddr + stripeOffset * layoutPtr->sectorsPerStripeUnit, - &trow, &tcol, &diskOffset, 0); - RF_ASSERT((trow == testrow) && (tcol == diskids[i])); - if (!rf_CheckRUReconstructed(raidPtr->reconControl[testrow]->reconMap, diskOffset)) - return (1); - asmap->flags |= RF_ASM_REDIR_LARGE_WRITE; - return (0); - } - stripeOffset++; - } - } - return (0); -} -/* - return the number of failed data units in the stripe. -*/ - -int -rf_NumFailedDataUnitsInStripe(raidPtr, asmap) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *asmap; -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_RowCol_t trow, tcol, row, i; - RF_SectorNum_t diskOffset; - RF_RaidAddr_t sosAddr; - int numFailures; - - /* quick out in the fault-free case. */ - RF_LOCK_MUTEX(raidPtr->mutex); - numFailures = raidPtr->numFailures; - RF_UNLOCK_MUTEX(raidPtr->mutex); - if (numFailures == 0) - return (0); - numFailures = 0; - - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - row = asmap->physInfo->row; - for (i = 0; i < layoutPtr->numDataCol; i++) { - (layoutPtr->map->MapSector) (raidPtr, sosAddr + i * layoutPtr->sectorsPerStripeUnit, - &trow, &tcol, &diskOffset, 0); - if (RF_DEAD_DISK(raidPtr->Disks[trow][tcol].status)) - numFailures++; - } - - return numFailures; -} - - -/***************************************************************************************** - * - * debug routines - * - ****************************************************************************************/ - -void -rf_PrintAccessStripeMap(asm_h) - RF_AccessStripeMapHeader_t *asm_h; -{ - rf_PrintFullAccessStripeMap(asm_h, 0); -} - -void -rf_PrintFullAccessStripeMap(asm_h, prbuf) - RF_AccessStripeMapHeader_t *asm_h; - int prbuf; /* flag to print buffer pointers */ -{ - int i; - RF_AccessStripeMap_t *asmap = asm_h->stripeMap; - RF_PhysDiskAddr_t *p; - printf("%d stripes total\n", (int) asm_h->numStripes); - for (; asmap; asmap = asmap->next) { - /* printf("Num failures: %d\n",asmap->numDataFailed); */ - /* printf("Num sectors: - * %d\n",(int)asmap->totalSectorsAccessed); */ - printf("Stripe %d (%d sectors), failures: %d data, %d parity: ", - (int) asmap->stripeID, - (int) asmap->totalSectorsAccessed, - (int) asmap->numDataFailed, - (int) asmap->numParityFailed); - if (asmap->parityInfo) { - printf("Parity [r%d c%d s%d-%d", asmap->parityInfo->row, asmap->parityInfo->col, - (int) asmap->parityInfo->startSector, - (int) (asmap->parityInfo->startSector + - asmap->parityInfo->numSector - 1)); - if (prbuf) - printf(" b0x%lx", (unsigned long) asmap->parityInfo->bufPtr); - if (asmap->parityInfo->next) { - printf(", r%d c%d s%d-%d", asmap->parityInfo->next->row, - asmap->parityInfo->next->col, - (int) asmap->parityInfo->next->startSector, - (int) (asmap->parityInfo->next->startSector + - asmap->parityInfo->next->numSector - 1)); - if (prbuf) - printf(" b0x%lx", (unsigned long) asmap->parityInfo->next->bufPtr); - RF_ASSERT(asmap->parityInfo->next->next == NULL); - } - printf("]\n\t"); - } - for (i = 0, p = asmap->physInfo; p; p = p->next, i++) { - printf("SU r%d c%d s%d-%d ", p->row, p->col, (int) p->startSector, - (int) (p->startSector + p->numSector - 1)); - if (prbuf) - printf("b0x%lx ", (unsigned long) p->bufPtr); - if (i && !(i & 1)) - printf("\n\t"); - } - printf("\n"); - p = asm_h->stripeMap->failedPDAs[0]; - if (asm_h->stripeMap->numDataFailed + asm_h->stripeMap->numParityFailed > 1) - printf("[multiple failures]\n"); - else - if (asm_h->stripeMap->numDataFailed + asm_h->stripeMap->numParityFailed > 0) - printf("\t[Failed PDA: r%d c%d s%d-%d]\n", p->row, p->col, - (int) p->startSector, (int) (p->startSector + p->numSector - 1)); - } -} - -void -rf_PrintRaidAddressInfo(raidPtr, raidAddr, numBlocks) - RF_Raid_t *raidPtr; - RF_RaidAddr_t raidAddr; - RF_SectorCount_t numBlocks; -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_RaidAddr_t ra, sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr); - - printf("Raid addrs of SU boundaries from start of stripe to end of access:\n\t"); - for (ra = sosAddr; ra <= raidAddr + numBlocks; ra += layoutPtr->sectorsPerStripeUnit) { - printf("%d (0x%x), ", (int) ra, (int) ra); - } - printf("\n"); - printf("Offset into stripe unit: %d (0x%x)\n", - (int) (raidAddr % layoutPtr->sectorsPerStripeUnit), - (int) (raidAddr % layoutPtr->sectorsPerStripeUnit)); -} -/* - given a parity descriptor and the starting address within a stripe, - range restrict the parity descriptor to touch only the correct stuff. -*/ -void -rf_ASMParityAdjust( - RF_PhysDiskAddr_t * toAdjust, - RF_StripeNum_t startAddrWithinStripe, - RF_SectorNum_t endAddress, - RF_RaidLayout_t * layoutPtr, - RF_AccessStripeMap_t * asm_p) -{ - RF_PhysDiskAddr_t *new_pda; - - /* when we're accessing only a portion of one stripe unit, we want the - * parity descriptor to identify only the chunk of parity associated - * with the data. When the access spans exactly one stripe unit - * boundary and is less than a stripe unit in size, it uses two - * disjoint regions of the parity unit. When an access spans more - * than one stripe unit boundary, it uses all of the parity unit. - * - * To better handle the case where stripe units are small, we may - * eventually want to change the 2nd case so that if the SU size is - * below some threshold, we just read/write the whole thing instead of - * breaking it up into two accesses. */ - if (asm_p->numStripeUnitsAccessed == 1) { - int x = (startAddrWithinStripe % layoutPtr->sectorsPerStripeUnit); - toAdjust->startSector += x; - toAdjust->raidAddress += x; - toAdjust->numSector = asm_p->physInfo->numSector; - RF_ASSERT(toAdjust->numSector != 0); - } else - if (asm_p->numStripeUnitsAccessed == 2 && asm_p->totalSectorsAccessed < layoutPtr->sectorsPerStripeUnit) { - int x = (startAddrWithinStripe % layoutPtr->sectorsPerStripeUnit); - - /* create a second pda and copy the parity map info - * into it */ - RF_ASSERT(toAdjust->next == NULL); - new_pda = toAdjust->next = rf_AllocPhysDiskAddr(); - *new_pda = *toAdjust; /* structure assignment */ - new_pda->next = NULL; - - /* adjust the start sector & number of blocks for the - * first parity pda */ - toAdjust->startSector += x; - toAdjust->raidAddress += x; - toAdjust->numSector = rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, startAddrWithinStripe) - startAddrWithinStripe; - RF_ASSERT(toAdjust->numSector != 0); - - /* adjust the second pda */ - new_pda->numSector = endAddress - rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, endAddress); - /* new_pda->raidAddress = - * rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, - * toAdjust->raidAddress); */ - RF_ASSERT(new_pda->numSector != 0); - } -} -/* - Check if a disk has been spared or failed. If spared, - redirect the I/O. - If it has been failed, record it in the asm pointer. - Fourth arg is whether data or parity. -*/ -void -rf_ASMCheckStatus( - RF_Raid_t * raidPtr, - RF_PhysDiskAddr_t * pda_p, - RF_AccessStripeMap_t * asm_p, - RF_RaidDisk_t ** disks, - int parity) -{ - RF_DiskStatus_t dstatus; - RF_RowCol_t frow, fcol; - - dstatus = disks[pda_p->row][pda_p->col].status; - - if (dstatus == rf_ds_spared) { - /* if the disk has been spared, redirect access to the spare */ - frow = pda_p->row; - fcol = pda_p->col; - pda_p->row = disks[frow][fcol].spareRow; - pda_p->col = disks[frow][fcol].spareCol; - } else - if (dstatus == rf_ds_dist_spared) { - /* ditto if disk has been spared to dist spare space */ - RF_RowCol_t or = pda_p->row, oc = pda_p->col; - RF_SectorNum_t oo = pda_p->startSector; - - if (pda_p->type == RF_PDA_TYPE_DATA) - raidPtr->Layout.map->MapSector(raidPtr, pda_p->raidAddress, &pda_p->row, &pda_p->col, &pda_p->startSector, RF_REMAP); - else - raidPtr->Layout.map->MapParity(raidPtr, pda_p->raidAddress, &pda_p->row, &pda_p->col, &pda_p->startSector, RF_REMAP); - - if (rf_mapDebug) { - printf("Redirected r %d c %d o %d -> r%d c %d o %d\n", or, oc, (int) oo, - pda_p->row, pda_p->col, (int) pda_p->startSector); - } - } else - if (RF_DEAD_DISK(dstatus)) { - /* if the disk is inaccessible, mark the - * failure */ - if (parity) - asm_p->numParityFailed++; - else { - asm_p->numDataFailed++; -#if 0 - /* XXX Do we really want this spewing - * out on the console? GO */ - printf("DATA_FAILED!\n"); -#endif - } - asm_p->failedPDAs[asm_p->numFailedPDAs] = pda_p; - asm_p->numFailedPDAs++; -#if 0 - switch (asm_p->numParityFailed + asm_p->numDataFailed) { - case 1: - asm_p->failedPDAs[0] = pda_p; - break; - case 2: - asm_p->failedPDAs[1] = pda_p; - default: - break; - } -#endif - } - /* the redirected access should never span a stripe unit boundary */ - RF_ASSERT(rf_RaidAddressToStripeUnitID(&raidPtr->Layout, pda_p->raidAddress) == - rf_RaidAddressToStripeUnitID(&raidPtr->Layout, pda_p->raidAddress + pda_p->numSector - 1)); - RF_ASSERT(pda_p->col != -1); -} diff --git a/sys/dev/raidframe/rf_map.h b/sys/dev/raidframe/rf_map.h deleted file mode 100644 index d7c6d19..0000000 --- a/sys/dev/raidframe/rf_map.h +++ /dev/null @@ -1,94 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_map.h,v 1.3 1999/02/05 00:06:12 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_map.h */ - -#ifndef _RF__RF_MAP_H_ -#define _RF__RF_MAP_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_raid.h> - -/* mapping structure allocation and free routines */ -RF_AccessStripeMapHeader_t * -rf_MapAccess(RF_Raid_t * raidPtr, - RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks, - caddr_t buffer, int remap); - -void -rf_MarkFailuresInASMList(RF_Raid_t * raidPtr, - RF_AccessStripeMapHeader_t * asm_h); - -RF_AccessStripeMap_t *rf_DuplicateASM(RF_AccessStripeMap_t * asmap); - -RF_PhysDiskAddr_t *rf_DuplicatePDA(RF_PhysDiskAddr_t * pda); - -int rf_ConfigureMapModule(RF_ShutdownList_t ** listp); - -RF_AccessStripeMapHeader_t *rf_AllocAccessStripeMapHeader(void); - -void rf_FreeAccessStripeMapHeader(RF_AccessStripeMapHeader_t * p); - -RF_PhysDiskAddr_t *rf_AllocPhysDiskAddr(void); - -RF_PhysDiskAddr_t *rf_AllocPDAList(int count); - -void rf_FreePhysDiskAddr(RF_PhysDiskAddr_t * p); - -RF_AccessStripeMap_t *rf_AllocAccessStripeMapComponent(void); - -RF_AccessStripeMap_t *rf_AllocASMList(int count); - -void rf_FreeAccessStripeMapComponent(RF_AccessStripeMap_t * p); - -void rf_FreeAccessStripeMap(RF_AccessStripeMapHeader_t * hdr); - -int rf_CheckStripeForFailures(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap); - -int rf_NumFailedDataUnitsInStripe(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap); - -void rf_PrintAccessStripeMap(RF_AccessStripeMapHeader_t * asm_h); - -void rf_PrintFullAccessStripeMap(RF_AccessStripeMapHeader_t * asm_h, int prbuf); - -void -rf_PrintRaidAddressInfo(RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr, - RF_SectorCount_t numBlocks); - -void -rf_ASMParityAdjust(RF_PhysDiskAddr_t * toAdjust, - RF_StripeNum_t startAddrWithinStripe, RF_SectorNum_t endAddress, - RF_RaidLayout_t * layoutPtr, RF_AccessStripeMap_t * asm_p); - -void -rf_ASMCheckStatus(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * pda_p, - RF_AccessStripeMap_t * asm_p, RF_RaidDisk_t ** disks, int parity); - -#endif /* !_RF__RF_MAP_H_ */ diff --git a/sys/dev/raidframe/rf_mcpair.c b/sys/dev/raidframe/rf_mcpair.c deleted file mode 100644 index 7b327ac..0000000 --- a/sys/dev/raidframe/rf_mcpair.c +++ /dev/null @@ -1,143 +0,0 @@ -/* $NetBSD: rf_mcpair.c,v 1.4 2000/09/11 02:23:14 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_mcpair.c - * an mcpair is a structure containing a mutex and a condition variable. - * it's used to block the current thread until some event occurs. - */ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_mcpair.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_freelist.h> -#include <dev/raidframe/rf_shutdown.h> - -#include <sys/proc.h> - -static RF_FreeList_t *rf_mcpair_freelist; - -#define RF_MAX_FREE_MCPAIR 128 -#define RF_MCPAIR_INC 16 -#define RF_MCPAIR_INITIAL 24 - -static int init_mcpair(RF_MCPair_t *); -static void clean_mcpair(RF_MCPair_t *); -static void rf_ShutdownMCPair(void *); - - - -static int -init_mcpair(t) - RF_MCPair_t *t; -{ - int rc; - - rc = rf_mutex_init(&t->mutex, __FUNCTION__); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return (rc); - } - rc = rf_cond_init(&t->cond); - if (rc) { - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_mutex_destroy(&t->mutex); - return (rc); - } - return (0); -} - -static void -clean_mcpair(t) - RF_MCPair_t *t; -{ - rf_mutex_destroy(&t->mutex); - rf_cond_destroy(&t->cond); -} - -static void -rf_ShutdownMCPair(ignored) - void *ignored; -{ - RF_FREELIST_DESTROY_CLEAN(rf_mcpair_freelist, next, (RF_MCPair_t *), clean_mcpair); -} - -int -rf_ConfigureMCPair(listp) - RF_ShutdownList_t **listp; -{ - int rc; - - RF_FREELIST_CREATE(rf_mcpair_freelist, RF_MAX_FREE_MCPAIR, - RF_MCPAIR_INC, sizeof(RF_MCPair_t)); - rc = rf_ShutdownCreate(listp, rf_ShutdownMCPair, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - rf_ShutdownMCPair(NULL); - return (rc); - } - RF_FREELIST_PRIME_INIT(rf_mcpair_freelist, RF_MCPAIR_INITIAL, next, - (RF_MCPair_t *), init_mcpair); - return (0); -} - -RF_MCPair_t * -rf_AllocMCPair() -{ - RF_MCPair_t *t; - - RF_FREELIST_GET_INIT(rf_mcpair_freelist, t, next, (RF_MCPair_t *), init_mcpair); - if (t) { - t->flag = 0; - t->next = NULL; - } - return (t); -} - -void -rf_FreeMCPair(t) - RF_MCPair_t *t; -{ - RF_FREELIST_FREE_CLEAN(rf_mcpair_freelist, t, next, clean_mcpair); -} -/* the callback function used to wake you up when you use an mcpair to wait for something */ -void -rf_MCPairWakeupFunc(mcpair) - RF_MCPair_t *mcpair; -{ - RF_LOCK_MUTEX(mcpair->mutex); - mcpair->flag = 1; - wakeup(&(mcpair->cond)); - RF_UNLOCK_MUTEX(mcpair->mutex); -} diff --git a/sys/dev/raidframe/rf_mcpair.h b/sys/dev/raidframe/rf_mcpair.h deleted file mode 100644 index d43c728..0000000 --- a/sys/dev/raidframe/rf_mcpair.h +++ /dev/null @@ -1,54 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_mcpair.h,v 1.6 2000/09/21 01:45:46 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_mcpair.h - * see comments in rf_mcpair.c - */ - -#ifndef _RF__RF_MCPAIR_H_ -#define _RF__RF_MCPAIR_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> - -struct RF_MCPair_s { - RF_DECLARE_MUTEX(mutex) - RF_DECLARE_COND(cond) - int flag; - RF_MCPair_t *next; -}; -#define RF_WAIT_MCPAIR(_mcp) \ - RF_LTSLEEP(&((_mcp)->cond), PRIBIO, "mcpair", 0, &((_mcp)->mutex)) - -int rf_ConfigureMCPair(RF_ShutdownList_t ** listp); -RF_MCPair_t *rf_AllocMCPair(void); -void rf_FreeMCPair(RF_MCPair_t * t); -void rf_MCPairWakeupFunc(RF_MCPair_t * t); - -#endif /* !_RF__RF_MCPAIR_H_ */ diff --git a/sys/dev/raidframe/rf_memchunk.c b/sys/dev/raidframe/rf_memchunk.c deleted file mode 100644 index b6e8bd9..0000000 --- a/sys/dev/raidframe/rf_memchunk.c +++ /dev/null @@ -1,213 +0,0 @@ -/* $NetBSD: rf_memchunk.c,v 1.4 1999/08/13 03:41:56 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/********************************************************************************* - * rf_memchunk.c - * - * experimental code. I've found that the malloc and free calls in the DAG - * creation code are very expensive. Since for any given workload the DAGs - * created for different accesses are likely to be similar to each other, the - * amount of memory used for any given DAG data structure is likely to be one - * of a small number of values. For example, in UNIX, all reads and writes will - * be less than 8k and will not span stripe unit boundaries. Thus in the absence - * of failure, the only DAGs that will ever get created are single-node reads - * and single-stripe-unit atomic read-modify-writes. So, I'm very likely to - * be continually asking for chunks of memory equal to the sizes of these two - * DAGs. - * - * This leads to the idea of holding on to these chunks of memory when the DAG is - * freed and then, when a new DAG is created, trying to find such a chunk before - * calling malloc. - * - * the "chunk list" is a list of lists. Each header node contains a size value - * and a pointer to a list of chunk descriptors, each of which holds a pointer - * to a chunk of memory of the indicated size. - * - * There is currently no way to purge memory out of the chunk list. My - * initial thought on this is to have a low-priority thread that wakes up every - * 1 or 2 seconds, purges all the chunks with low reuse counts, and sets all - * the reuse counts to zero. - * - * This whole idea may be bad, since malloc may be able to do this more efficiently. - * It's worth a try, though, and it can be turned off by setting useMemChunks to 0. - * - ********************************************************************************/ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_memchunk.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_options.h> -#include <dev/raidframe/rf_shutdown.h> - -typedef struct RF_ChunkHdr_s RF_ChunkHdr_t; -struct RF_ChunkHdr_s { - int size; - RF_ChunkDesc_t *list; - RF_ChunkHdr_t *next; -}; - -static RF_ChunkHdr_t *chunklist, *chunk_hdr_free_list; -static RF_ChunkDesc_t *chunk_desc_free_list; -RF_DECLARE_STATIC_MUTEX(chunkmutex) - static void rf_ShutdownMemChunk(void *); - static RF_ChunkDesc_t *NewMemChunk(int, char *); - - - static void rf_ShutdownMemChunk(ignored) - void *ignored; -{ - RF_ChunkDesc_t *pt, *p; - RF_ChunkHdr_t *hdr, *ht; - - if (rf_memChunkDebug) - printf("Chunklist:\n"); - for (hdr = chunklist; hdr;) { - for (p = hdr->list; p;) { - if (rf_memChunkDebug) - printf("Size %d reuse count %d\n", p->size, p->reuse_count); - pt = p; - p = p->next; - RF_Free(pt->buf, pt->size); - RF_Free(pt, sizeof(*pt)); - } - ht = hdr; - hdr = hdr->next; - RF_Free(ht, sizeof(*ht)); - } - - rf_mutex_destroy(&chunkmutex); -} - -int -rf_ConfigureMemChunk(listp) - RF_ShutdownList_t **listp; -{ - int rc; - - chunklist = NULL; - chunk_hdr_free_list = NULL; - chunk_desc_free_list = NULL; - rc = rf_mutex_init(&chunkmutex, __FUNCTION__); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - } - rc = rf_ShutdownCreate(listp, rf_ShutdownMemChunk, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_mutex_destroy(&chunkmutex); - } - return (rc); -} -/* called to get a chunk descriptor for a newly-allocated chunk of memory - * MUTEX MUST BE LOCKED - * - * free list is not currently used - */ -static RF_ChunkDesc_t * -NewMemChunk(size, buf) - int size; - char *buf; -{ - RF_ChunkDesc_t *p; - - if (chunk_desc_free_list) { - p = chunk_desc_free_list; - chunk_desc_free_list = p->next; - } else - RF_Malloc(p, sizeof(RF_ChunkDesc_t), (RF_ChunkDesc_t *)); - p->size = size; - p->buf = buf; - p->next = NULL; - p->reuse_count = 0; - return (p); -} -/* looks for a chunk of memory of acceptable size. If none, allocates one and returns - * a chunk descriptor for it, but does not install anything in the list. This is done - * when the chunk is released. - */ -RF_ChunkDesc_t * -rf_GetMemChunk(size) - int size; -{ - RF_ChunkHdr_t *hdr = chunklist; - RF_ChunkDesc_t *p = NULL; - char *buf; - - RF_LOCK_MUTEX(chunkmutex); - for (hdr = chunklist; hdr; hdr = hdr->next) - if (hdr->size >= size) { - p = hdr->list; - if (p) { - hdr->list = p->next; - p->next = NULL; - p->reuse_count++; - } - break; - } - if (!p) { - RF_Malloc(buf, size, (char *)); - p = NewMemChunk(size, buf); - } - RF_UNLOCK_MUTEX(chunkmutex); - (void) bzero(p->buf, size); - return (p); -} - -void -rf_ReleaseMemChunk(chunk) - RF_ChunkDesc_t *chunk; -{ - RF_ChunkHdr_t *hdr, *ht = NULL, *new; - - RF_LOCK_MUTEX(chunkmutex); - for (hdr = chunklist; hdr && hdr->size < chunk->size; ht = hdr, hdr = hdr->next); - if (hdr && hdr->size == chunk->size) { - chunk->next = hdr->list; - hdr->list = chunk; - } else { - RF_Malloc(new, sizeof(RF_ChunkHdr_t), (RF_ChunkHdr_t *)); - new->size = chunk->size; - new->list = chunk; - chunk->next = NULL; - if (ht) { - new->next = ht->next; - ht->next = new; - } else { - new->next = hdr; - chunklist = new; - } - } - RF_UNLOCK_MUTEX(chunkmutex); -} diff --git a/sys/dev/raidframe/rf_memchunk.h b/sys/dev/raidframe/rf_memchunk.h deleted file mode 100644 index 5806d20..0000000 --- a/sys/dev/raidframe/rf_memchunk.h +++ /dev/null @@ -1,48 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_memchunk.h,v 1.3 1999/02/05 00:06:13 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* header file for rf_memchunk.c. See comments there */ - -#ifndef _RF__RF_MEMCHUNK_H_ -#define _RF__RF_MEMCHUNK_H_ - -#include <dev/raidframe/rf_types.h> - -struct RF_ChunkDesc_s { - int size; - int reuse_count; - char *buf; - RF_ChunkDesc_t *next; -}; - -int rf_ConfigureMemChunk(RF_ShutdownList_t ** listp); -RF_ChunkDesc_t *rf_GetMemChunk(int size); -void rf_ReleaseMemChunk(RF_ChunkDesc_t * chunk); - -#endif /* !_RF__RF_MEMCHUNK_H_ */ diff --git a/sys/dev/raidframe/rf_nwayxor.c b/sys/dev/raidframe/rf_nwayxor.c deleted file mode 100644 index 170db6a..0000000 --- a/sys/dev/raidframe/rf_nwayxor.c +++ /dev/null @@ -1,451 +0,0 @@ -/* $NetBSD: rf_nwayxor.c,v 1.4 2000/03/30 12:45:41 augustss Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, Daniel Stodolsky - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/************************************************************ - * - * nwayxor.c -- code to do N-way xors for reconstruction - * - * nWayXorN xors N input buffers into the destination buffer. - * adapted from danner's longword_bxor code. - * - ************************************************************/ - -#include <dev/raidframe/rf_nwayxor.h> -#include <dev/raidframe/rf_shutdown.h> - -static int callcount[10]; -static void rf_ShutdownNWayXor(void *); - -static void -rf_ShutdownNWayXor(ignored) - void *ignored; -{ - int i; - - if (rf_showXorCallCounts == 0) - return; - printf("Call counts for n-way xor routines: "); - for (i = 0; i < 10; i++) - printf("%d ", callcount[i]); - printf("\n"); -} - -int -rf_ConfigureNWayXor(listp) - RF_ShutdownList_t **listp; -{ - int i, rc; - - for (i = 0; i < 10; i++) - callcount[i] = 0; - rc = rf_ShutdownCreate(listp, rf_ShutdownNWayXor, NULL); - return (rc); -} - -void -rf_nWayXor1(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; -{ - unsigned long *src = (unsigned long *) src_rbs[0]->buffer; - unsigned long *dest = (unsigned long *) dest_rb->buffer; - unsigned long *end = src + len; - unsigned long d0, d1, d2, d3, s0, s1, s2, s3; - - callcount[1]++; - while (len >= 4) { - d0 = dest[0]; - d1 = dest[1]; - d2 = dest[2]; - d3 = dest[3]; - s0 = src[0]; - s1 = src[1]; - s2 = src[2]; - s3 = src[3]; - dest[0] = d0 ^ s0; - dest[1] = d1 ^ s1; - dest[2] = d2 ^ s2; - dest[3] = d3 ^ s3; - src += 4; - dest += 4; - len -= 4; - } - while (src < end) { - *dest++ ^= *src++; - } -} - -void -rf_nWayXor2(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; -{ - unsigned long *dst = (unsigned long *) dest_rb->buffer; - unsigned long *a = dst; - unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - unsigned long a0, a1, a2, a3, b0, b1, b2, b3; - - callcount[2]++; - /* align dest to cache line */ - while ((((unsigned long) dst) & 0x1f)) { - *dst++ = *a++ ^ *b++ ^ *c++; - len--; - } - while (len > 4) { - a0 = a[0]; - len -= 4; - - a1 = a[1]; - a2 = a[2]; - - a3 = a[3]; - a += 4; - - b0 = b[0]; - b1 = b[1]; - - b2 = b[2]; - b3 = b[3]; - /* start dual issue */ - a0 ^= b0; - b0 = c[0]; - - b += 4; - a1 ^= b1; - - a2 ^= b2; - a3 ^= b3; - - b1 = c[1]; - a0 ^= b0; - - b2 = c[2]; - a1 ^= b1; - - b3 = c[3]; - a2 ^= b2; - - dst[0] = a0; - a3 ^= b3; - dst[1] = a1; - c += 4; - dst[2] = a2; - dst[3] = a3; - dst += 4; - } - while (len) { - *dst++ = *a++ ^ *b++ ^ *c++; - len--; - } -} -/* note that first arg is not incremented but 2nd arg is */ -#define LOAD_FIRST(_dst,_b) \ - a0 = _dst[0]; len -= 4; \ - a1 = _dst[1]; \ - a2 = _dst[2]; \ - a3 = _dst[3]; \ - b0 = _b[0]; \ - b1 = _b[1]; \ - b2 = _b[2]; \ - b3 = _b[3]; _b += 4; - -/* note: arg is incremented */ -#define XOR_AND_LOAD_NEXT(_n) \ - a0 ^= b0; b0 = _n[0]; \ - a1 ^= b1; b1 = _n[1]; \ - a2 ^= b2; b2 = _n[2]; \ - a3 ^= b3; b3 = _n[3]; \ - _n += 4; - -/* arg is incremented */ -#define XOR_AND_STORE(_dst) \ - a0 ^= b0; _dst[0] = a0; \ - a1 ^= b1; _dst[1] = a1; \ - a2 ^= b2; _dst[2] = a2; \ - a3 ^= b3; _dst[3] = a3; \ - _dst += 4; - - -void -rf_nWayXor3(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; -{ - unsigned long *dst = (unsigned long *) dest_rb->buffer; - unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - unsigned long *d = (unsigned long *) src_rbs[2]->buffer; - unsigned long a0, a1, a2, a3, b0, b1, b2, b3; - - callcount[3]++; - /* align dest to cache line */ - while ((((unsigned long) dst) & 0x1f)) { - *dst++ ^= *b++ ^ *c++ ^ *d++; - len--; - } - while (len > 4) { - LOAD_FIRST(dst, b); - XOR_AND_LOAD_NEXT(c); - XOR_AND_LOAD_NEXT(d); - XOR_AND_STORE(dst); - } - while (len) { - *dst++ ^= *b++ ^ *c++ ^ *d++; - len--; - } -} - -void -rf_nWayXor4(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; -{ - unsigned long *dst = (unsigned long *) dest_rb->buffer; - unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - unsigned long *d = (unsigned long *) src_rbs[2]->buffer; - unsigned long *e = (unsigned long *) src_rbs[3]->buffer; - unsigned long a0, a1, a2, a3, b0, b1, b2, b3; - - callcount[4]++; - /* align dest to cache line */ - while ((((unsigned long) dst) & 0x1f)) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++; - len--; - } - while (len > 4) { - LOAD_FIRST(dst, b); - XOR_AND_LOAD_NEXT(c); - XOR_AND_LOAD_NEXT(d); - XOR_AND_LOAD_NEXT(e); - XOR_AND_STORE(dst); - } - while (len) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++; - len--; - } -} - -void -rf_nWayXor5(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; -{ - unsigned long *dst = (unsigned long *) dest_rb->buffer; - unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - unsigned long *d = (unsigned long *) src_rbs[2]->buffer; - unsigned long *e = (unsigned long *) src_rbs[3]->buffer; - unsigned long *f = (unsigned long *) src_rbs[4]->buffer; - unsigned long a0, a1, a2, a3, b0, b1, b2, b3; - - callcount[5]++; - /* align dest to cache line */ - while ((((unsigned long) dst) & 0x1f)) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++; - len--; - } - while (len > 4) { - LOAD_FIRST(dst, b); - XOR_AND_LOAD_NEXT(c); - XOR_AND_LOAD_NEXT(d); - XOR_AND_LOAD_NEXT(e); - XOR_AND_LOAD_NEXT(f); - XOR_AND_STORE(dst); - } - while (len) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++; - len--; - } -} - -void -rf_nWayXor6(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; -{ - unsigned long *dst = (unsigned long *) dest_rb->buffer; - unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - unsigned long *d = (unsigned long *) src_rbs[2]->buffer; - unsigned long *e = (unsigned long *) src_rbs[3]->buffer; - unsigned long *f = (unsigned long *) src_rbs[4]->buffer; - unsigned long *g = (unsigned long *) src_rbs[5]->buffer; - unsigned long a0, a1, a2, a3, b0, b1, b2, b3; - - callcount[6]++; - /* align dest to cache line */ - while ((((unsigned long) dst) & 0x1f)) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++; - len--; - } - while (len > 4) { - LOAD_FIRST(dst, b); - XOR_AND_LOAD_NEXT(c); - XOR_AND_LOAD_NEXT(d); - XOR_AND_LOAD_NEXT(e); - XOR_AND_LOAD_NEXT(f); - XOR_AND_LOAD_NEXT(g); - XOR_AND_STORE(dst); - } - while (len) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++; - len--; - } -} - -void -rf_nWayXor7(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; -{ - unsigned long *dst = (unsigned long *) dest_rb->buffer; - unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - unsigned long *d = (unsigned long *) src_rbs[2]->buffer; - unsigned long *e = (unsigned long *) src_rbs[3]->buffer; - unsigned long *f = (unsigned long *) src_rbs[4]->buffer; - unsigned long *g = (unsigned long *) src_rbs[5]->buffer; - unsigned long *h = (unsigned long *) src_rbs[6]->buffer; - unsigned long a0, a1, a2, a3, b0, b1, b2, b3; - - callcount[7]++; - /* align dest to cache line */ - while ((((unsigned long) dst) & 0x1f)) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++; - len--; - } - while (len > 4) { - LOAD_FIRST(dst, b); - XOR_AND_LOAD_NEXT(c); - XOR_AND_LOAD_NEXT(d); - XOR_AND_LOAD_NEXT(e); - XOR_AND_LOAD_NEXT(f); - XOR_AND_LOAD_NEXT(g); - XOR_AND_LOAD_NEXT(h); - XOR_AND_STORE(dst); - } - while (len) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++; - len--; - } -} - -void -rf_nWayXor8(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; -{ - unsigned long *dst = (unsigned long *) dest_rb->buffer; - unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - unsigned long *d = (unsigned long *) src_rbs[2]->buffer; - unsigned long *e = (unsigned long *) src_rbs[3]->buffer; - unsigned long *f = (unsigned long *) src_rbs[4]->buffer; - unsigned long *g = (unsigned long *) src_rbs[5]->buffer; - unsigned long *h = (unsigned long *) src_rbs[6]->buffer; - unsigned long *i = (unsigned long *) src_rbs[7]->buffer; - unsigned long a0, a1, a2, a3, b0, b1, b2, b3; - - callcount[8]++; - /* align dest to cache line */ - while ((((unsigned long) dst) & 0x1f)) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++; - len--; - } - while (len > 4) { - LOAD_FIRST(dst, b); - XOR_AND_LOAD_NEXT(c); - XOR_AND_LOAD_NEXT(d); - XOR_AND_LOAD_NEXT(e); - XOR_AND_LOAD_NEXT(f); - XOR_AND_LOAD_NEXT(g); - XOR_AND_LOAD_NEXT(h); - XOR_AND_LOAD_NEXT(i); - XOR_AND_STORE(dst); - } - while (len) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++; - len--; - } -} - - -void -rf_nWayXor9(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; -{ - unsigned long *dst = (unsigned long *) dest_rb->buffer; - unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - unsigned long *d = (unsigned long *) src_rbs[2]->buffer; - unsigned long *e = (unsigned long *) src_rbs[3]->buffer; - unsigned long *f = (unsigned long *) src_rbs[4]->buffer; - unsigned long *g = (unsigned long *) src_rbs[5]->buffer; - unsigned long *h = (unsigned long *) src_rbs[6]->buffer; - unsigned long *i = (unsigned long *) src_rbs[7]->buffer; - unsigned long *j = (unsigned long *) src_rbs[8]->buffer; - unsigned long a0, a1, a2, a3, b0, b1, b2, b3; - - callcount[9]++; - /* align dest to cache line */ - while ((((unsigned long) dst) & 0x1f)) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++; - len--; - } - while (len > 4) { - LOAD_FIRST(dst, b); - XOR_AND_LOAD_NEXT(c); - XOR_AND_LOAD_NEXT(d); - XOR_AND_LOAD_NEXT(e); - XOR_AND_LOAD_NEXT(f); - XOR_AND_LOAD_NEXT(g); - XOR_AND_LOAD_NEXT(h); - XOR_AND_LOAD_NEXT(i); - XOR_AND_LOAD_NEXT(j); - XOR_AND_STORE(dst); - } - while (len) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++; - len--; - } -} diff --git a/sys/dev/raidframe/rf_nwayxor.h b/sys/dev/raidframe/rf_nwayxor.h deleted file mode 100644 index 1460d9b..0000000 --- a/sys/dev/raidframe/rf_nwayxor.h +++ /dev/null @@ -1,54 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_nwayxor.h,v 1.3 1999/02/05 00:06:13 oster Exp $ */ -/* - * rf_nwayxor.h - */ -/* - * Copyright (c) 1996 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ -/* - * rf_nwayxor.h -- types and prototypes for nwayxor module - */ - -#ifndef _RF__RF_NWAYXOR_H_ -#define _RF__RF_NWAYXOR_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_reconstruct.h> - -int rf_ConfigureNWayXor(RF_ShutdownList_t ** listp); -void rf_nWayXor1(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); -void rf_nWayXor2(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); -void rf_nWayXor3(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); -void rf_nWayXor4(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); -void rf_nWayXor5(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); -void rf_nWayXor6(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); -void rf_nWayXor7(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); -void rf_nWayXor8(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); -void rf_nWayXor9(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); - -#endif /* !_RF__RF_NWAYXOR_H_ */ diff --git a/sys/dev/raidframe/rf_options.c b/sys/dev/raidframe/rf_options.c deleted file mode 100644 index 107c509..0000000 --- a/sys/dev/raidframe/rf_options.c +++ /dev/null @@ -1,78 +0,0 @@ -/* $NetBSD: rf_options.c,v 1.3 1999/02/05 00:06:13 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * rf_options.c - */ -/* - * Copyright (c) 1996 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - - -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_options.h> - -#ifdef RF_DBG_OPTION -#undef RF_DBG_OPTION -#endif /* RF_DBG_OPTION */ - -#ifdef __STDC__ -#define RF_DBG_OPTION(_option_,_defval_) long rf_##_option_ = _defval_; -#else /* __STDC__ */ -#define RF_DBG_OPTION(_option_,_defval_) long rf_/**/_option_ = _defval_; -#endif /* __STDC__ */ - -#include <dev/raidframe/rf_optnames.h> - -#undef RF_DBG_OPTION - -#ifdef __STDC__ -#define RF_DBG_OPTION(_option_,_defval_) { RF_STRING(_option_), &rf_##_option_ }, -#else /* __STDC__ */ -#define RF_DBG_OPTION(_option_,_defval_) { RF_STRING(_option_), &rf_/**/_option_ }, -#endif /* __STDC__ */ - -RF_DebugName_t rf_debugNames[] = { -#include <dev/raidframe/rf_optnames.h> - {NULL, NULL} -}; -#undef RF_DBG_OPTION - -#ifdef __STDC__ -#define RF_DBG_OPTION(_option_,_defval_) rf_##_option_ = _defval_ ; -#else /* __STDC__ */ -#define RF_DBG_OPTION(_option_,_defval_) rf_/**/_option_ = _defval_ ; -#endif /* __STDC__ */ - -void -rf_ResetDebugOptions() -{ -#include <dev/raidframe/rf_optnames.h> -} diff --git a/sys/dev/raidframe/rf_options.h b/sys/dev/raidframe/rf_options.h deleted file mode 100644 index 22b6341..0000000 --- a/sys/dev/raidframe/rf_options.h +++ /dev/null @@ -1,58 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_options.h,v 1.3 1999/02/05 00:06:13 oster Exp $ */ -/* - * rf_options.h - */ -/* - * Copyright (c) 1996 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_OPTIONS_H_ -#define _RF__RF_OPTIONS_H_ - -#define RF_DEFAULT_LOCK_TABLE_SIZE 256 - -typedef struct RF_DebugNames_s { - char *name; - long *ptr; -} RF_DebugName_t; - -extern RF_DebugName_t rf_debugNames[]; - -#ifdef RF_DBG_OPTION -#undef RF_DBG_OPTION -#endif /* RF_DBG_OPTION */ - -#ifdef __STDC__ -#define RF_DBG_OPTION(_option_,_defval_) extern long rf_##_option_; -#else /* __STDC__ */ -#define RF_DBG_OPTION(_option_,_defval_) extern long rf_/**/_option_; -#endif /* __STDC__ */ -#include <dev/raidframe/rf_optnames.h> - -void rf_ResetDebugOptions(void); - -#endif /* !_RF__RF_OPTIONS_H_ */ diff --git a/sys/dev/raidframe/rf_optnames.h b/sys/dev/raidframe/rf_optnames.h deleted file mode 100644 index f04fbc1..0000000 --- a/sys/dev/raidframe/rf_optnames.h +++ /dev/null @@ -1,105 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_optnames.h,v 1.6 1999/12/07 02:54:08 oster Exp $ */ -/* - * rf_optnames.h - */ -/* - * Copyright (c) 1996 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * Don't protect against multiple inclusion here- we actually want this. - */ - -RF_DBG_OPTION(accessDebug, 0) -RF_DBG_OPTION(accessTraceBufSize, 0) -RF_DBG_OPTION(cscanDebug, 0) /* debug CSCAN sorting */ -RF_DBG_OPTION(dagDebug, 0) -RF_DBG_OPTION(debugPrintUseBuffer, 0) -RF_DBG_OPTION(degDagDebug, 0) -RF_DBG_OPTION(disableAsyncAccs, 0) -RF_DBG_OPTION(diskDebug, 0) -RF_DBG_OPTION(enableAtomicRMW, 0) /* this debug var enables locking of - * the disk arm during small-write - * operations. Setting this variable - * to anything other than 0 will - * result in deadlock. (wvcii) */ -RF_DBG_OPTION(engineDebug, 0) -RF_DBG_OPTION(fifoDebug, 0) /* debug fifo queueing */ -RF_DBG_OPTION(floatingRbufDebug, 0) -RF_DBG_OPTION(forceHeadSepLimit, -1) -RF_DBG_OPTION(forceNumFloatingReconBufs, -1) /* wire down number of - * extra recon buffers - * to use */ -RF_DBG_OPTION(keepAccTotals, 0) /* turn on keep_acc_totals */ -RF_DBG_OPTION(lockTableSize, RF_DEFAULT_LOCK_TABLE_SIZE) -RF_DBG_OPTION(mapDebug, 0) -RF_DBG_OPTION(maxNumTraces, -1) - -RF_DBG_OPTION(memChunkDebug, 0) -RF_DBG_OPTION(memDebug, 0) -RF_DBG_OPTION(memDebugAddress, 0) -RF_DBG_OPTION(numBufsToAccumulate, 1) /* number of buffers to - * accumulate before doing XOR */ -RF_DBG_OPTION(prReconSched, 0) -RF_DBG_OPTION(printDAGsDebug, 0) -RF_DBG_OPTION(printStatesDebug, 0) -RF_DBG_OPTION(protectedSectors, 64L) /* # of sectors at start of - * disk to exclude from RAID - * address space */ -RF_DBG_OPTION(pssDebug, 0) -RF_DBG_OPTION(queueDebug, 0) -RF_DBG_OPTION(quiesceDebug, 0) -RF_DBG_OPTION(raidSectorOffset, 0) /* added to all incoming sectors to - * debug alignment problems */ -RF_DBG_OPTION(reconDebug, 0) -RF_DBG_OPTION(reconbufferDebug, 0) -RF_DBG_OPTION(scanDebug, 0) /* debug SCAN sorting */ -RF_DBG_OPTION(showXorCallCounts, 0) /* show n-way Xor call counts */ -RF_DBG_OPTION(shutdownDebug, 0) /* show shutdown calls */ -RF_DBG_OPTION(sizePercentage, 100) -RF_DBG_OPTION(sstfDebug, 0) /* turn on debugging info for sstf queueing */ -RF_DBG_OPTION(stripeLockDebug, 0) -RF_DBG_OPTION(suppressLocksAndLargeWrites, 0) -RF_DBG_OPTION(suppressTraceDelays, 0) -RF_DBG_OPTION(useMemChunks, 1) -RF_DBG_OPTION(validateDAGDebug, 0) -RF_DBG_OPTION(validateVisitedDebug, 1) /* XXX turn to zero by - * default? */ -RF_DBG_OPTION(verifyParityDebug, 0) -RF_DBG_OPTION(debugKernelAccess, 0) /* DoAccessKernel debugging */ - -#if RF_INCLUDE_PARITYLOGGING > 0 -RF_DBG_OPTION(forceParityLogReint, 0) -RF_DBG_OPTION(numParityRegions, 0) /* number of regions in the array */ -RF_DBG_OPTION(numReintegrationThreads, 1) -RF_DBG_OPTION(parityLogDebug, 0) /* if nonzero, enables debugging of - * parity logging */ -RF_DBG_OPTION(totalInCoreLogCapacity, 1024 * 1024) /* target bytes - * available for in-core - * logs */ -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ - diff --git a/sys/dev/raidframe/rf_paritylog.c b/sys/dev/raidframe/rf_paritylog.c deleted file mode 100644 index 87c33e6..0000000 --- a/sys/dev/raidframe/rf_paritylog.c +++ /dev/null @@ -1,871 +0,0 @@ -/* $NetBSD: rf_paritylog.c,v 1.5 2000/01/07 03:41:01 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* Code for manipulating in-core parity logs - * - */ - -#include <dev/raidframe/rf_archs.h> - -#if RF_INCLUDE_PARITYLOGGING > 0 - -/* - * Append-only log for recording parity "update" and "overwrite" records - */ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_mcpair.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_desc.h> -#include <dev/raidframe/rf_layout.h> -#include <dev/raidframe/rf_diskqueue.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_paritylog.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_map.h> -#include <dev/raidframe/rf_paritylogging.h> -#include <dev/raidframe/rf_paritylogDiskMgr.h> - -static RF_CommonLogData_t * -AllocParityLogCommonData(RF_Raid_t * raidPtr) -{ - RF_CommonLogData_t *common = NULL; - int rc; - - /* Return a struct for holding common parity log information from the - * free list (rf_parityLogDiskQueue.freeCommonList). If the free list - * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */ - - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - if (raidPtr->parityLogDiskQueue.freeCommonList) { - common = raidPtr->parityLogDiskQueue.freeCommonList; - raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next; - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - } else { - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *)); - rc = rf_mutex_init(&common->mutex, __FUNCTION__); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - RF_Free(common, sizeof(RF_CommonLogData_t)); - common = NULL; - } - } - common->next = NULL; - return (common); -} - -static void -FreeParityLogCommonData(RF_CommonLogData_t * common) -{ - RF_Raid_t *raidPtr; - - /* Insert a single struct for holding parity log information (data) - * into the free list (rf_parityLogDiskQueue.freeCommonList). - * NON-BLOCKING */ - - raidPtr = common->raidPtr; - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - common->next = raidPtr->parityLogDiskQueue.freeCommonList; - raidPtr->parityLogDiskQueue.freeCommonList = common; - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); -} - -static RF_ParityLogData_t * -AllocParityLogData(RF_Raid_t * raidPtr) -{ - RF_ParityLogData_t *data = NULL; - - /* Return a struct for holding parity log information from the free - * list (rf_parityLogDiskQueue.freeList). If the free list is empty, - * call RF_Malloc to create a new structure. NON-BLOCKING */ - - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - if (raidPtr->parityLogDiskQueue.freeDataList) { - data = raidPtr->parityLogDiskQueue.freeDataList; - raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next; - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - } else { - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *)); - } - data->next = NULL; - data->prev = NULL; - return (data); -} - - -static void -FreeParityLogData(RF_ParityLogData_t * data) -{ - RF_ParityLogData_t *nextItem; - RF_Raid_t *raidPtr; - - /* Insert a linked list of structs for holding parity log information - * (data) into the free list (parityLogDiskQueue.freeList). - * NON-BLOCKING */ - - raidPtr = data->common->raidPtr; - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - while (data) { - nextItem = data->next; - data->next = raidPtr->parityLogDiskQueue.freeDataList; - raidPtr->parityLogDiskQueue.freeDataList = data; - data = nextItem; - } - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); -} - - -static void -EnqueueParityLogData( - RF_ParityLogData_t * data, - RF_ParityLogData_t ** head, - RF_ParityLogData_t ** tail) -{ - RF_Raid_t *raidPtr; - - /* Insert an in-core parity log (*data) into the head of a disk queue - * (*head, *tail). NON-BLOCKING */ - - raidPtr = data->common->raidPtr; - if (rf_parityLogDebug) - printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); - RF_ASSERT(data->prev == NULL); - RF_ASSERT(data->next == NULL); - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - if (*head) { - /* insert into head of queue */ - RF_ASSERT((*head)->prev == NULL); - RF_ASSERT((*tail)->next == NULL); - data->next = *head; - (*head)->prev = data; - *head = data; - } else { - /* insert into empty list */ - RF_ASSERT(*head == NULL); - RF_ASSERT(*tail == NULL); - *head = data; - *tail = data; - } - RF_ASSERT((*head)->prev == NULL); - RF_ASSERT((*tail)->next == NULL); - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); -} - -static RF_ParityLogData_t * -DequeueParityLogData( - RF_Raid_t * raidPtr, - RF_ParityLogData_t ** head, - RF_ParityLogData_t ** tail, - int ignoreLocks) -{ - RF_ParityLogData_t *data; - - /* Remove and return an in-core parity log from the tail of a disk - * queue (*head, *tail). NON-BLOCKING */ - - /* remove from tail, preserving FIFO order */ - if (!ignoreLocks) - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - data = *tail; - if (data) { - if (*head == *tail) { - /* removing last item from queue */ - *head = NULL; - *tail = NULL; - } else { - *tail = (*tail)->prev; - (*tail)->next = NULL; - RF_ASSERT((*head)->prev == NULL); - RF_ASSERT((*tail)->next == NULL); - } - data->next = NULL; - data->prev = NULL; - if (rf_parityLogDebug) - printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); - } - if (*head) { - RF_ASSERT((*head)->prev == NULL); - RF_ASSERT((*tail)->next == NULL); - } - if (!ignoreLocks) - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - return (data); -} - - -static void -RequeueParityLogData( - RF_ParityLogData_t * data, - RF_ParityLogData_t ** head, - RF_ParityLogData_t ** tail) -{ - RF_Raid_t *raidPtr; - - /* Insert an in-core parity log (*data) into the tail of a disk queue - * (*head, *tail). NON-BLOCKING */ - - raidPtr = data->common->raidPtr; - RF_ASSERT(data); - if (rf_parityLogDebug) - printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - if (*tail) { - /* append to tail of list */ - data->prev = *tail; - data->next = NULL; - (*tail)->next = data; - *tail = data; - } else { - /* inserting into an empty list */ - *head = data; - *tail = data; - (*head)->prev = NULL; - (*tail)->next = NULL; - } - RF_ASSERT((*head)->prev == NULL); - RF_ASSERT((*tail)->next == NULL); - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); -} - -RF_ParityLogData_t * -rf_CreateParityLogData( - RF_ParityRecordType_t operation, - RF_PhysDiskAddr_t * pda, - caddr_t bufPtr, - RF_Raid_t * raidPtr, - int (*wakeFunc) (RF_DagNode_t * node, int status), - void *wakeArg, - RF_AccTraceEntry_t * tracerec, - RF_Etimer_t startTime) -{ - RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL; - RF_CommonLogData_t *common; - RF_PhysDiskAddr_t *diskAddress; - int boundary, offset = 0; - - /* Return an initialized struct of info to be logged. Build one item - * per physical disk address, one item per region. - * - * NON-BLOCKING */ - - diskAddress = pda; - common = AllocParityLogCommonData(raidPtr); - RF_ASSERT(common); - - common->operation = operation; - common->bufPtr = bufPtr; - common->raidPtr = raidPtr; - common->wakeFunc = wakeFunc; - common->wakeArg = wakeArg; - common->tracerec = tracerec; - common->startTime = startTime; - common->cnt = 0; - - if (rf_parityLogDebug) - printf("[entering CreateParityLogData]\n"); - while (diskAddress) { - common->cnt++; - data = AllocParityLogData(raidPtr); - RF_ASSERT(data); - data->common = common; - data->next = NULL; - data->prev = NULL; - data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector); - if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) { - /* disk address does not cross a region boundary */ - data->diskAddress = *diskAddress; - data->bufOffset = offset; - offset = offset + diskAddress->numSector; - EnqueueParityLogData(data, &resultHead, &resultTail); - /* adjust disk address */ - diskAddress = diskAddress->next; - } else { - /* disk address crosses a region boundary */ - /* find address where region is crossed */ - boundary = 0; - while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary)) - boundary++; - - /* enter data before the boundary */ - data->diskAddress = *diskAddress; - data->diskAddress.numSector = boundary; - data->bufOffset = offset; - offset += boundary; - EnqueueParityLogData(data, &resultHead, &resultTail); - /* adjust disk address */ - diskAddress->startSector += boundary; - diskAddress->numSector -= boundary; - } - } - if (rf_parityLogDebug) - printf("[leaving CreateParityLogData]\n"); - return (resultHead); -} - - -RF_ParityLogData_t * -rf_SearchAndDequeueParityLogData( - RF_Raid_t * raidPtr, - int regionID, - RF_ParityLogData_t ** head, - RF_ParityLogData_t ** tail, - int ignoreLocks) -{ - RF_ParityLogData_t *w; - - /* Remove and return an in-core parity log from a specified region - * (regionID). If a matching log is not found, return NULL. - * - * NON-BLOCKING. */ - - /* walk backward through a list, looking for an entry with a matching - * region ID */ - if (!ignoreLocks) - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - w = (*tail); - while (w) { - if (w->regionID == regionID) { - /* remove an element from the list */ - if (w == *tail) { - if (*head == *tail) { - /* removing only element in the list */ - *head = NULL; - *tail = NULL; - } else { - /* removing last item in the list */ - *tail = (*tail)->prev; - (*tail)->next = NULL; - RF_ASSERT((*head)->prev == NULL); - RF_ASSERT((*tail)->next == NULL); - } - } else { - if (w == *head) { - /* removing first item in the list */ - *head = (*head)->next; - (*head)->prev = NULL; - RF_ASSERT((*head)->prev == NULL); - RF_ASSERT((*tail)->next == NULL); - } else { - /* removing an item from the middle of - * the list */ - w->prev->next = w->next; - w->next->prev = w->prev; - RF_ASSERT((*head)->prev == NULL); - RF_ASSERT((*tail)->next == NULL); - } - } - w->prev = NULL; - w->next = NULL; - if (rf_parityLogDebug) - printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector); - return (w); - } else - w = w->prev; - } - if (!ignoreLocks) - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - return (NULL); -} - -static RF_ParityLogData_t * -DequeueMatchingLogData( - RF_Raid_t * raidPtr, - RF_ParityLogData_t ** head, - RF_ParityLogData_t ** tail) -{ - RF_ParityLogData_t *logDataList, *logData; - int regionID; - - /* Remove and return an in-core parity log from the tail of a disk - * queue (*head, *tail). Then remove all matching (identical - * regionIDs) logData and return as a linked list. - * - * NON-BLOCKING */ - - logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE); - if (logDataList) { - regionID = logDataList->regionID; - logData = logDataList; - logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); - while (logData->next) { - logData = logData->next; - logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); - } - } - return (logDataList); -} - - -static RF_ParityLog_t * -AcquireParityLog( - RF_ParityLogData_t * logData, - int finish) -{ - RF_ParityLog_t *log = NULL; - RF_Raid_t *raidPtr; - - /* Grab a log buffer from the pool and return it. If no buffers are - * available, return NULL. NON-BLOCKING */ - raidPtr = logData->common->raidPtr; - RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); - if (raidPtr->parityLogPool.parityLogs) { - log = raidPtr->parityLogPool.parityLogs; - raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next; - log->regionID = logData->regionID; - log->numRecords = 0; - log->next = NULL; - raidPtr->logsInUse++; - RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); - } else { - /* no logs available, so place ourselves on the queue of work - * waiting on log buffers this is done while - * parityLogPool.mutex is held, to ensure synchronization with - * ReleaseParityLogs. */ - if (rf_parityLogDebug) - printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish); - if (finish) - RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); - else - EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); - } - RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); - return (log); -} - -void -rf_ReleaseParityLogs( - RF_Raid_t * raidPtr, - RF_ParityLog_t * firstLog) -{ - RF_ParityLogData_t *logDataList; - RF_ParityLog_t *log, *lastLog; - int cnt; - - /* Insert a linked list of parity logs (firstLog) to the free list - * (parityLogPool.parityLogPool) - * - * NON-BLOCKING. */ - - RF_ASSERT(firstLog); - - /* Before returning logs to global free list, service all requests - * which are blocked on logs. Holding mutexes for parityLogPool and - * parityLogDiskQueue forces synchronization with AcquireParityLog(). */ - RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); - log = firstLog; - if (firstLog) - firstLog = firstLog->next; - log->numRecords = 0; - log->next = NULL; - while (logDataList && log) { - RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE); - if (rf_parityLogDebug) - printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID); - if (log == NULL) { - log = firstLog; - if (firstLog) { - firstLog = firstLog->next; - log->numRecords = 0; - log->next = NULL; - } - } - RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - if (log) - logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); - } - /* return remaining logs to pool */ - if (log) { - log->next = firstLog; - firstLog = log; - } - if (firstLog) { - lastLog = firstLog; - raidPtr->logsInUse--; - RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); - while (lastLog->next) { - lastLog = lastLog->next; - raidPtr->logsInUse--; - RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); - } - lastLog->next = raidPtr->parityLogPool.parityLogs; - raidPtr->parityLogPool.parityLogs = firstLog; - cnt = 0; - log = raidPtr->parityLogPool.parityLogs; - while (log) { - cnt++; - log = log->next; - } - RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs); - } - RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); -} - -static void -ReintLog( - RF_Raid_t * raidPtr, - int regionID, - RF_ParityLog_t * log) -{ - RF_ASSERT(log); - - /* Insert an in-core parity log (log) into the disk queue of - * reintegration work. Set the flag (reintInProgress) for the - * specified region (regionID) to indicate that reintegration is in - * progress for this region. NON-BLOCKING */ - - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); - raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint - * complete */ - - if (rf_parityLogDebug) - printf("[requesting reintegration of region %d]\n", log->regionID); - /* move record to reintegration queue */ - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - log->next = raidPtr->parityLogDiskQueue.reintQueue; - raidPtr->parityLogDiskQueue.reintQueue = log; - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); -} - -static void -FlushLog( - RF_Raid_t * raidPtr, - RF_ParityLog_t * log) -{ - /* insert a core log (log) into a list of logs - * (parityLogDiskQueue.flushQueue) waiting to be written to disk. - * NON-BLOCKING */ - - RF_ASSERT(log); - RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); - RF_ASSERT(log->next == NULL); - /* move log to flush queue */ - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - log->next = raidPtr->parityLogDiskQueue.flushQueue; - raidPtr->parityLogDiskQueue.flushQueue = log; - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); -} - -static int -DumpParityLogToDisk( - int finish, - RF_ParityLogData_t * logData) -{ - int i, diskCount, regionID = logData->regionID; - RF_ParityLog_t *log; - RF_Raid_t *raidPtr; - - raidPtr = logData->common->raidPtr; - - /* Move a core log to disk. If the log disk is full, initiate - * reintegration. - * - * Return (0) if we can enqueue the dump immediately, otherwise return - * (1) to indicate we are blocked on reintegration and control of the - * thread should be relinquished. - * - * Caller must hold regionInfo[regionID].mutex - * - * NON-BLOCKING */ - - if (rf_parityLogDebug) - printf("[dumping parity log to disk, region %d]\n", regionID); - log = raidPtr->regionInfo[regionID].coreLog; - RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); - RF_ASSERT(log->next == NULL); - - /* if reintegration is in progress, must queue work */ - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); - if (raidPtr->regionInfo[regionID].reintInProgress) { - /* Can not proceed since this region is currently being - * reintegrated. We can not block, so queue remaining work and - * return */ - if (rf_parityLogDebug) - printf("[region %d waiting on reintegration]\n", regionID); - /* XXX not sure about the use of finish - shouldn't this - * always be "Enqueue"? */ - if (finish) - RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); - else - EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); - return (1); /* relenquish control of this thread */ - } - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); - raidPtr->regionInfo[regionID].coreLog = NULL; - if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity) - /* IMPORTANT!! this loop bound assumes region disk holds an - * integral number of core logs */ - { - /* update disk map for this region */ - diskCount = raidPtr->regionInfo[regionID].diskCount; - for (i = 0; i < raidPtr->numSectorsPerLog; i++) { - raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation; - raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr; - } - log->diskOffset = diskCount; - raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog; - FlushLog(raidPtr, log); - } else { - /* no room for log on disk, send it to disk manager and - * request reintegration */ - RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity); - ReintLog(raidPtr, regionID, log); - } - if (rf_parityLogDebug) - printf("[finished dumping parity log to disk, region %d]\n", regionID); - return (0); -} - -int -rf_ParityLogAppend( - RF_ParityLogData_t * logData, - int finish, - RF_ParityLog_t ** incomingLog, - int clearReintFlag) -{ - int regionID, logItem, itemDone; - RF_ParityLogData_t *item; - int punt, done = RF_FALSE; - RF_ParityLog_t *log; - RF_Raid_t *raidPtr; - RF_Etimer_t timer; - int (*wakeFunc) (RF_DagNode_t * node, int status); - void *wakeArg; - - /* Add parity to the appropriate log, one sector at a time. This - * routine is called is called by dag functions ParityLogUpdateFunc - * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING. - * - * Parity to be logged is contained in a linked-list (logData). When - * this routine returns, every sector in the list will be in one of - * three places: 1) entered into the parity log 2) queued, waiting on - * reintegration 3) queued, waiting on a core log - * - * Blocked work is passed to the ParityLoggingDiskManager for completion. - * Later, as conditions which required the block are removed, the work - * reenters this routine with the "finish" parameter set to "RF_TRUE." - * - * NON-BLOCKING */ - - RF_ASSERT(logData != NULL); - raidPtr = logData->common->raidPtr; - /* lock the region for the first item in logData */ - regionID = logData->regionID; - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); - - if (clearReintFlag) { - /* Enable flushing for this region. Holding both locks - * provides a synchronization barrier with DumpParityLogToDisk */ - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE); - raidPtr->regionInfo[regionID].diskCount = 0; - raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now - * enabled */ - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - } - /* process each item in logData */ - while (logData) { - /* remove an item from logData */ - item = logData; - logData = logData->next; - item->next = NULL; - item->prev = NULL; - - if (rf_parityLogDebug) - printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector); - - /* see if we moved to a new region */ - if (regionID != item->regionID) { - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - regionID = item->regionID; - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); - } - punt = RF_FALSE;/* Set to RF_TRUE if work is blocked. This - * can happen in one of two ways: 1) no core - * log (AcquireParityLog) 2) waiting on - * reintegration (DumpParityLogToDisk) If punt - * is RF_TRUE, the dataItem was queued, so - * skip to next item. */ - - /* process item, one sector at a time, until all sectors - * processed or we punt */ - if (item->diskAddress.numSector > 0) - done = RF_FALSE; - else - RF_ASSERT(0); - while (!punt && !done) { - /* verify that a core log exists for this region */ - if (!raidPtr->regionInfo[regionID].coreLog) { - /* Attempt to acquire a parity log. If - * acquisition fails, queue remaining work in - * data item and move to nextItem. */ - if (incomingLog) - if (*incomingLog) { - RF_ASSERT((*incomingLog)->next == NULL); - raidPtr->regionInfo[regionID].coreLog = *incomingLog; - raidPtr->regionInfo[regionID].coreLog->regionID = regionID; - *incomingLog = NULL; - } else - raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); - else - raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); - /* Note: AcquireParityLog either returns a log - * or enqueues currentItem */ - } - if (!raidPtr->regionInfo[regionID].coreLog) - punt = RF_TRUE; /* failed to find a core log */ - else { - RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); - /* verify that the log has room for new - * entries */ - /* if log is full, dump it to disk and grab a - * new log */ - if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) { - /* log is full, dump it to disk */ - if (DumpParityLogToDisk(finish, item)) - punt = RF_TRUE; /* dump unsuccessful, - * blocked on - * reintegration */ - else { - /* dump was successful */ - if (incomingLog) - if (*incomingLog) { - RF_ASSERT((*incomingLog)->next == NULL); - raidPtr->regionInfo[regionID].coreLog = *incomingLog; - raidPtr->regionInfo[regionID].coreLog->regionID = regionID; - *incomingLog = NULL; - } else - raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); - else - raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); - /* if a core log is not - * available, must queue work - * and return */ - if (!raidPtr->regionInfo[regionID].coreLog) - punt = RF_TRUE; /* blocked on log - * availability */ - } - } - } - /* if we didn't punt on this item, attempt to add a - * sector to the core log */ - if (!punt) { - RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); - /* at this point, we have a core log with - * enough room for a sector */ - /* copy a sector into the log */ - log = raidPtr->regionInfo[regionID].coreLog; - RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog); - logItem = log->numRecords++; - log->records[logItem].parityAddr = item->diskAddress; - RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr); - RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity); - log->records[logItem].parityAddr.numSector = 1; - log->records[logItem].operation = item->common->operation; - bcopy((item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (1 << item->common->raidPtr->logBytesPerSector)); - item->diskAddress.numSector--; - item->diskAddress.startSector++; - if (item->diskAddress.numSector == 0) - done = RF_TRUE; - } - } - - if (!punt) { - /* Processed this item completely, decrement count of - * items to be processed. */ - RF_ASSERT(item->diskAddress.numSector == 0); - RF_LOCK_MUTEX(item->common->mutex); - item->common->cnt--; - if (item->common->cnt == 0) - itemDone = RF_TRUE; - else - itemDone = RF_FALSE; - RF_UNLOCK_MUTEX(item->common->mutex); - if (itemDone) { - /* Finished processing all log data for this - * IO Return structs to free list and invoke - * wakeup function. */ - timer = item->common->startTime; /* grab initial value of - * timer */ - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer); - if (rf_parityLogDebug) - printf("[waking process for region %d]\n", item->regionID); - wakeFunc = item->common->wakeFunc; - wakeArg = item->common->wakeArg; - FreeParityLogCommonData(item->common); - FreeParityLogData(item); - (wakeFunc) (wakeArg, 0); - } else - FreeParityLogData(item); - } - } - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - if (rf_parityLogDebug) - printf("[exiting ParityLogAppend]\n"); - return (0); -} - - -void -rf_EnableParityLogging(RF_Raid_t * raidPtr) -{ - int regionID; - - for (regionID = 0; regionID < rf_numParityRegions; regionID++) { - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE; - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - } - if (rf_parityLogDebug) - printf("[parity logging enabled]\n"); -} -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ diff --git a/sys/dev/raidframe/rf_paritylog.h b/sys/dev/raidframe/rf_paritylog.h deleted file mode 100644 index 1f2b80d..0000000 --- a/sys/dev/raidframe/rf_paritylog.h +++ /dev/null @@ -1,181 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_paritylog.h,v 1.3 1999/02/05 00:06:14 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* header file for parity log - * - */ - -#ifndef _RF__RF_PARITYLOG_H_ -#define _RF__RF_PARITYLOG_H_ - -#include <dev/raidframe/rf_types.h> - -#define RF_DEFAULT_NUM_SECTORS_PER_LOG 64 - -typedef int RF_RegionId_t; - -typedef enum RF_ParityRecordType_e { - RF_STOP, - RF_UPDATE, - RF_OVERWRITE -} RF_ParityRecordType_t; - -struct RF_CommonLogData_s { - RF_DECLARE_MUTEX(mutex) /* protects cnt */ - int cnt; /* when 0, time to call wakeFunc */ - RF_Raid_t *raidPtr; -/* int (*wakeFunc)(RF_Buf_t); */ - int (*wakeFunc) (RF_DagNode_t * node, int status); - void *wakeArg; - RF_AccTraceEntry_t *tracerec; - RF_Etimer_t startTime; - caddr_t bufPtr; - RF_ParityRecordType_t operation; - RF_CommonLogData_t *next; -}; - -struct RF_ParityLogData_s { - RF_RegionId_t regionID; /* this struct guaranteed to span a single - * region */ - int bufOffset; /* offset from common->bufPtr */ - RF_PhysDiskAddr_t diskAddress; - RF_CommonLogData_t *common; /* info shared by one or more - * parityLogData structs */ - RF_ParityLogData_t *next; - RF_ParityLogData_t *prev; -}; - -struct RF_ParityLogAppendQueue_s { - RF_DECLARE_MUTEX(mutex) -}; - -struct RF_ParityLogRecord_s { - RF_PhysDiskAddr_t parityAddr; - RF_ParityRecordType_t operation; -}; - -struct RF_ParityLog_s { - RF_RegionId_t regionID; - int numRecords; - int diskOffset; - RF_ParityLogRecord_t *records; - caddr_t bufPtr; - RF_ParityLog_t *next; -}; - -struct RF_ParityLogQueue_s { - RF_DECLARE_MUTEX(mutex) - RF_ParityLog_t *parityLogs; -}; - -struct RF_RegionBufferQueue_s { - RF_DECLARE_MUTEX(mutex) - RF_DECLARE_COND(cond) - int bufferSize; - int totalBuffers; /* size of array 'buffers' */ - int availableBuffers; /* num available 'buffers' */ - int emptyBuffersIndex; /* stick next freed buffer here */ - int availBuffersIndex; /* grab next buffer from here */ - caddr_t *buffers; /* array buffers used to hold parity */ -}; -#define RF_PLOG_CREATED (1<<0)/* thread is created */ -#define RF_PLOG_RUNNING (1<<1)/* thread is running */ -#define RF_PLOG_TERMINATE (1<<2)/* thread is terminated (should exit) */ -#define RF_PLOG_SHUTDOWN (1<<3)/* thread is aware and exiting/exited */ - -struct RF_ParityLogDiskQueue_s { - RF_DECLARE_MUTEX(mutex) /* protects all vars in this struct */ - RF_DECLARE_COND(cond) - int threadState; /* is thread running, should it shutdown (see - * above) */ - RF_ParityLog_t *flushQueue; /* list of parity logs to be flushed - * to log disk */ - RF_ParityLog_t *reintQueue; /* list of parity logs waiting to be - * reintegrated */ - RF_ParityLogData_t *bufHead; /* head of FIFO list of log data, - * waiting on a buffer */ - RF_ParityLogData_t *bufTail; /* tail of FIFO list of log data, - * waiting on a buffer */ - RF_ParityLogData_t *reintHead; /* head of FIFO list of log data, - * waiting on reintegration */ - RF_ParityLogData_t *reintTail; /* tail of FIFO list of log data, - * waiting on reintegration */ - RF_ParityLogData_t *logBlockHead; /* queue of work, blocked - * until a log is available */ - RF_ParityLogData_t *logBlockTail; - RF_ParityLogData_t *reintBlockHead; /* queue of work, blocked - * until reintegration is - * complete */ - RF_ParityLogData_t *reintBlockTail; - RF_CommonLogData_t *freeCommonList; /* list of unused common data - * structs */ - RF_ParityLogData_t *freeDataList; /* list of unused log data - * structs */ -}; - -struct RF_DiskMap_s { - RF_PhysDiskAddr_t parityAddr; - RF_ParityRecordType_t operation; -}; - -struct RF_RegionInfo_s { - RF_DECLARE_MUTEX(mutex) /* protects: diskCount, diskMap, - * loggingEnabled, coreLog */ - RF_DECLARE_MUTEX(reintMutex) /* protects: reintInProgress */ - int reintInProgress;/* flag used to suspend flushing operations */ - RF_SectorCount_t capacity; /* capacity of this region in sectors */ - RF_SectorNum_t regionStartAddr; /* starting disk address for this - * region */ - RF_SectorNum_t parityStartAddr; /* starting disk address for this - * region */ - RF_SectorCount_t numSectorsParity; /* number of parity sectors - * protected by this region */ - RF_SectorCount_t diskCount; /* num of sectors written to this - * region's disk log */ - RF_DiskMap_t *diskMap; /* in-core map of what's in this region's disk - * log */ - int loggingEnabled; /* logging enable for this region */ - RF_ParityLog_t *coreLog;/* in-core log for this region */ -}; - -RF_ParityLogData_t * -rf_CreateParityLogData(RF_ParityRecordType_t operation, - RF_PhysDiskAddr_t * pda, caddr_t bufPtr, RF_Raid_t * raidPtr, - int (*wakeFunc) (RF_DagNode_t * node, int status), - void *wakeArg, RF_AccTraceEntry_t * tracerec, - RF_Etimer_t startTime); - RF_ParityLogData_t *rf_SearchAndDequeueParityLogData(RF_Raid_t * raidPtr, - RF_RegionId_t regionID, RF_ParityLogData_t ** head, - RF_ParityLogData_t ** tail, int ignoreLocks); - void rf_ReleaseParityLogs(RF_Raid_t * raidPtr, RF_ParityLog_t * firstLog); - int rf_ParityLogAppend(RF_ParityLogData_t * logData, int finish, - RF_ParityLog_t ** incomingLog, int clearReintFlag); - void rf_EnableParityLogging(RF_Raid_t * raidPtr); - -#endif /* !_RF__RF_PARITYLOG_H_ */ diff --git a/sys/dev/raidframe/rf_paritylogDiskMgr.c b/sys/dev/raidframe/rf_paritylogDiskMgr.c deleted file mode 100644 index 5d864e2..0000000 --- a/sys/dev/raidframe/rf_paritylogDiskMgr.c +++ /dev/null @@ -1,703 +0,0 @@ -/* $NetBSD: rf_paritylogDiskMgr.c,v 1.10 2000/01/15 01:57:57 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ -/* Code for flushing and reintegration operations related to parity logging. - * - */ - -#include <dev/raidframe/rf_archs.h> - -#if RF_INCLUDE_PARITYLOGGING > 0 - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_mcpair.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_desc.h> -#include <dev/raidframe/rf_layout.h> -#include <dev/raidframe/rf_diskqueue.h> -#include <dev/raidframe/rf_paritylog.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_paritylogging.h> -#include <dev/raidframe/rf_engine.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_map.h> -#include <dev/raidframe/rf_parityscan.h> -#include <dev/raidframe/rf_kintf.h> - -#include <dev/raidframe/rf_paritylogDiskMgr.h> - -static caddr_t AcquireReintBuffer(RF_RegionBufferQueue_t *); - -static caddr_t -AcquireReintBuffer(pool) - RF_RegionBufferQueue_t *pool; -{ - caddr_t bufPtr = NULL; - - /* Return a region buffer from the free list (pool). If the free list - * is empty, WAIT. BLOCKING */ - - RF_LOCK_MUTEX(pool->mutex); - if (pool->availableBuffers > 0) { - bufPtr = pool->buffers[pool->availBuffersIndex]; - pool->availableBuffers--; - pool->availBuffersIndex++; - if (pool->availBuffersIndex == pool->totalBuffers) - pool->availBuffersIndex = 0; - RF_UNLOCK_MUTEX(pool->mutex); - } else { - RF_PANIC(); /* should never happen in correct config, - * single reint */ - RF_WAIT_COND(pool->cond, pool->mutex); - } - return (bufPtr); -} - -static void -ReleaseReintBuffer( - RF_RegionBufferQueue_t * pool, - caddr_t bufPtr) -{ - /* Insert a region buffer (bufPtr) into the free list (pool). - * NON-BLOCKING */ - - RF_LOCK_MUTEX(pool->mutex); - pool->availableBuffers++; - pool->buffers[pool->emptyBuffersIndex] = bufPtr; - pool->emptyBuffersIndex++; - if (pool->emptyBuffersIndex == pool->totalBuffers) - pool->emptyBuffersIndex = 0; - RF_ASSERT(pool->availableBuffers <= pool->totalBuffers); - RF_UNLOCK_MUTEX(pool->mutex); - RF_SIGNAL_COND(pool->cond); -} - - - -static void -ReadRegionLog( - RF_RegionId_t regionID, - RF_MCPair_t * rrd_mcpair, - caddr_t regionBuffer, - RF_Raid_t * raidPtr, - RF_DagHeader_t ** rrd_dag_h, - RF_AllocListElem_t ** rrd_alloclist, - RF_PhysDiskAddr_t ** rrd_pda) -{ - /* Initiate the read a region log from disk. Once initiated, return - * to the calling routine. - * - * NON-BLOCKING */ - - RF_AccTraceEntry_t *tracerec; - RF_DagNode_t *rrd_rdNode; - - /* create DAG to read region log from disk */ - rf_MakeAllocList(*rrd_alloclist); - *rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer, - rf_DiskReadFunc, rf_DiskReadUndoFunc, - "Rrl", *rrd_alloclist, - RF_DAG_FLAGS_NONE, - RF_IO_NORMAL_PRIORITY); - - /* create and initialize PDA for the core log */ - /* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t - * *)); */ - *rrd_pda = rf_AllocPDAList(1); - rf_MapLogParityLogging(raidPtr, regionID, 0, &((*rrd_pda)->row), - &((*rrd_pda)->col), &((*rrd_pda)->startSector)); - (*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity; - - if ((*rrd_pda)->next) { - (*rrd_pda)->next = NULL; - printf("set rrd_pda->next to NULL\n"); - } - /* initialize DAG parameters */ - RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); - bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t)); - (*rrd_dag_h)->tracerec = tracerec; - rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0]; - rrd_rdNode->params[0].p = *rrd_pda; -/* rrd_rdNode->params[1] = regionBuffer; */ - rrd_rdNode->params[2].v = 0; - rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, 0, 0); - - /* launch region log read dag */ - rf_DispatchDAG(*rrd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, - (void *) rrd_mcpair); -} - - - -static void -WriteCoreLog( - RF_ParityLog_t * log, - RF_MCPair_t * fwr_mcpair, - RF_Raid_t * raidPtr, - RF_DagHeader_t ** fwr_dag_h, - RF_AllocListElem_t ** fwr_alloclist, - RF_PhysDiskAddr_t ** fwr_pda) -{ - RF_RegionId_t regionID = log->regionID; - RF_AccTraceEntry_t *tracerec; - RF_SectorNum_t regionOffset; - RF_DagNode_t *fwr_wrNode; - - /* Initiate the write of a core log to a region log disk. Once - * initiated, return to the calling routine. - * - * NON-BLOCKING */ - - /* create DAG to write a core log to a region log disk */ - rf_MakeAllocList(*fwr_alloclist); - *fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr, - rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - "Wcl", *fwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY); - - /* create and initialize PDA for the region log */ - /* RF_Malloc(*fwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t - * *)); */ - *fwr_pda = rf_AllocPDAList(1); - regionOffset = log->diskOffset; - rf_MapLogParityLogging(raidPtr, regionID, regionOffset, - &((*fwr_pda)->row), &((*fwr_pda)->col), - &((*fwr_pda)->startSector)); - (*fwr_pda)->numSector = raidPtr->numSectorsPerLog; - - /* initialize DAG parameters */ - RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); - bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t)); - (*fwr_dag_h)->tracerec = tracerec; - fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0]; - fwr_wrNode->params[0].p = *fwr_pda; -/* fwr_wrNode->params[1] = log->bufPtr; */ - fwr_wrNode->params[2].v = 0; - fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, 0, 0); - - /* launch the dag to write the core log to disk */ - rf_DispatchDAG(*fwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, - (void *) fwr_mcpair); -} - - -static void -ReadRegionParity( - RF_RegionId_t regionID, - RF_MCPair_t * prd_mcpair, - caddr_t parityBuffer, - RF_Raid_t * raidPtr, - RF_DagHeader_t ** prd_dag_h, - RF_AllocListElem_t ** prd_alloclist, - RF_PhysDiskAddr_t ** prd_pda) -{ - /* Initiate the read region parity from disk. Once initiated, return - * to the calling routine. - * - * NON-BLOCKING */ - - RF_AccTraceEntry_t *tracerec; - RF_DagNode_t *prd_rdNode; - - /* create DAG to read region parity from disk */ - rf_MakeAllocList(*prd_alloclist); - *prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc, - rf_DiskReadUndoFunc, "Rrp", - *prd_alloclist, RF_DAG_FLAGS_NONE, - RF_IO_NORMAL_PRIORITY); - - /* create and initialize PDA for region parity */ - /* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t - * *)); */ - *prd_pda = rf_AllocPDAList(1); - rf_MapRegionParity(raidPtr, regionID, &((*prd_pda)->row), - &((*prd_pda)->col), &((*prd_pda)->startSector), - &((*prd_pda)->numSector)); - if (rf_parityLogDebug) - printf("[reading %d sectors of parity from region %d]\n", - (int) (*prd_pda)->numSector, regionID); - if ((*prd_pda)->next) { - (*prd_pda)->next = NULL; - printf("set prd_pda->next to NULL\n"); - } - /* initialize DAG parameters */ - RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); - bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t)); - (*prd_dag_h)->tracerec = tracerec; - prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0]; - prd_rdNode->params[0].p = *prd_pda; - prd_rdNode->params[1].p = parityBuffer; - prd_rdNode->params[2].v = 0; - prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, 0, 0); - if (rf_validateDAGDebug) - rf_ValidateDAG(*prd_dag_h); - /* launch region parity read dag */ - rf_DispatchDAG(*prd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, - (void *) prd_mcpair); -} - -static void -WriteRegionParity( - RF_RegionId_t regionID, - RF_MCPair_t * pwr_mcpair, - caddr_t parityBuffer, - RF_Raid_t * raidPtr, - RF_DagHeader_t ** pwr_dag_h, - RF_AllocListElem_t ** pwr_alloclist, - RF_PhysDiskAddr_t ** pwr_pda) -{ - /* Initiate the write of region parity to disk. Once initiated, return - * to the calling routine. - * - * NON-BLOCKING */ - - RF_AccTraceEntry_t *tracerec; - RF_DagNode_t *pwr_wrNode; - - /* create DAG to write region log from disk */ - rf_MakeAllocList(*pwr_alloclist); - *pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer, - rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - "Wrp", *pwr_alloclist, - RF_DAG_FLAGS_NONE, - RF_IO_NORMAL_PRIORITY); - - /* create and initialize PDA for region parity */ - /* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t - * *)); */ - *pwr_pda = rf_AllocPDAList(1); - rf_MapRegionParity(raidPtr, regionID, &((*pwr_pda)->row), - &((*pwr_pda)->col), &((*pwr_pda)->startSector), - &((*pwr_pda)->numSector)); - - /* initialize DAG parameters */ - RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); - bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t)); - (*pwr_dag_h)->tracerec = tracerec; - pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0]; - pwr_wrNode->params[0].p = *pwr_pda; -/* pwr_wrNode->params[1] = parityBuffer; */ - pwr_wrNode->params[2].v = 0; - pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, 0, 0); - - /* launch the dag to write region parity to disk */ - rf_DispatchDAG(*pwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, - (void *) pwr_mcpair); -} - -static void -FlushLogsToDisk( - RF_Raid_t * raidPtr, - RF_ParityLog_t * logList) -{ - /* Flush a linked list of core logs to the log disk. Logs contain the - * disk location where they should be written. Logs were written in - * FIFO order and that order must be preserved. - * - * Recommended optimizations: 1) allow multiple flushes to occur - * simultaneously 2) coalesce contiguous flush operations - * - * BLOCKING */ - - RF_ParityLog_t *log; - RF_RegionId_t regionID; - RF_MCPair_t *fwr_mcpair; - RF_DagHeader_t *fwr_dag_h; - RF_AllocListElem_t *fwr_alloclist; - RF_PhysDiskAddr_t *fwr_pda; - - fwr_mcpair = rf_AllocMCPair(); - RF_LOCK_MUTEX(fwr_mcpair->mutex); - - RF_ASSERT(logList); - log = logList; - while (log) { - regionID = log->regionID; - - /* create and launch a DAG to write the core log */ - if (rf_parityLogDebug) - printf("[initiating write of core log for region %d]\n", regionID); - fwr_mcpair->flag = RF_FALSE; - WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h, - &fwr_alloclist, &fwr_pda); - - /* wait for the DAG to complete */ - while (!fwr_mcpair->flag) - RF_WAIT_COND(fwr_mcpair->cond, fwr_mcpair->mutex); - if (fwr_dag_h->status != rf_enable) { - RF_ERRORMSG1("Unable to write core log to disk (region %d)\n", regionID); - RF_ASSERT(0); - } - /* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */ - rf_FreePhysDiskAddr(fwr_pda); - rf_FreeDAG(fwr_dag_h); - rf_FreeAllocList(fwr_alloclist); - - log = log->next; - } - RF_UNLOCK_MUTEX(fwr_mcpair->mutex); - rf_FreeMCPair(fwr_mcpair); - rf_ReleaseParityLogs(raidPtr, logList); -} - -static void -ReintegrateRegion( - RF_Raid_t * raidPtr, - RF_RegionId_t regionID, - RF_ParityLog_t * coreLog) -{ - RF_MCPair_t *rrd_mcpair = NULL, *prd_mcpair, *pwr_mcpair; - RF_DagHeader_t *rrd_dag_h, *prd_dag_h, *pwr_dag_h; - RF_AllocListElem_t *rrd_alloclist, *prd_alloclist, *pwr_alloclist; - RF_PhysDiskAddr_t *rrd_pda, *prd_pda, *pwr_pda; - caddr_t parityBuffer, regionBuffer = NULL; - - /* Reintegrate a region (regionID). - * - * 1. acquire region and parity buffers - * 2. read log from disk - * 3. read parity from disk - * 4. apply log to parity - * 5. apply core log to parity - * 6. write new parity to disk - * - * BLOCKING */ - - if (rf_parityLogDebug) - printf("[reintegrating region %d]\n", regionID); - - /* initiate read of region parity */ - if (rf_parityLogDebug) - printf("[initiating read of parity for region %d]\n",regionID); - parityBuffer = AcquireReintBuffer(&raidPtr->parityBufferPool); - prd_mcpair = rf_AllocMCPair(); - RF_LOCK_MUTEX(prd_mcpair->mutex); - prd_mcpair->flag = RF_FALSE; - ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr, - &prd_dag_h, &prd_alloclist, &prd_pda); - - /* if region log nonempty, initiate read */ - if (raidPtr->regionInfo[regionID].diskCount > 0) { - if (rf_parityLogDebug) - printf("[initiating read of disk log for region %d]\n", - regionID); - regionBuffer = AcquireReintBuffer(&raidPtr->regionBufferPool); - rrd_mcpair = rf_AllocMCPair(); - RF_LOCK_MUTEX(rrd_mcpair->mutex); - rrd_mcpair->flag = RF_FALSE; - ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr, - &rrd_dag_h, &rrd_alloclist, &rrd_pda); - } - /* wait on read of region parity to complete */ - while (!prd_mcpair->flag) { - RF_WAIT_COND(prd_mcpair->cond, prd_mcpair->mutex); - } - RF_UNLOCK_MUTEX(prd_mcpair->mutex); - if (prd_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to read parity from disk\n"); - /* add code to fail the parity disk */ - RF_ASSERT(0); - } - /* apply core log to parity */ - /* if (coreLog) ApplyLogsToParity(coreLog, parityBuffer); */ - - if (raidPtr->regionInfo[regionID].diskCount > 0) { - /* wait on read of region log to complete */ - while (!rrd_mcpair->flag) - RF_WAIT_COND(rrd_mcpair->cond, rrd_mcpair->mutex); - RF_UNLOCK_MUTEX(rrd_mcpair->mutex); - if (rrd_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to read region log from disk\n"); - /* add code to fail the log disk */ - RF_ASSERT(0); - } - /* apply region log to parity */ - /* ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */ - /* release resources associated with region log */ - /* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */ - rf_FreePhysDiskAddr(rrd_pda); - rf_FreeDAG(rrd_dag_h); - rf_FreeAllocList(rrd_alloclist); - rf_FreeMCPair(rrd_mcpair); - ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer); - } - /* write reintegrated parity to disk */ - if (rf_parityLogDebug) - printf("[initiating write of parity for region %d]\n", - regionID); - pwr_mcpair = rf_AllocMCPair(); - RF_LOCK_MUTEX(pwr_mcpair->mutex); - pwr_mcpair->flag = RF_FALSE; - WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr, - &pwr_dag_h, &pwr_alloclist, &pwr_pda); - while (!pwr_mcpair->flag) - RF_WAIT_COND(pwr_mcpair->cond, pwr_mcpair->mutex); - RF_UNLOCK_MUTEX(pwr_mcpair->mutex); - if (pwr_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to write parity to disk\n"); - /* add code to fail the parity disk */ - RF_ASSERT(0); - } - /* release resources associated with read of old parity */ - /* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */ - rf_FreePhysDiskAddr(prd_pda); - rf_FreeDAG(prd_dag_h); - rf_FreeAllocList(prd_alloclist); - rf_FreeMCPair(prd_mcpair); - - /* release resources associated with write of new parity */ - ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer); - /* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */ - rf_FreePhysDiskAddr(pwr_pda); - rf_FreeDAG(pwr_dag_h); - rf_FreeAllocList(pwr_alloclist); - rf_FreeMCPair(pwr_mcpair); - - if (rf_parityLogDebug) - printf("[finished reintegrating region %d]\n", regionID); -} - - - -static void -ReintegrateLogs( - RF_Raid_t * raidPtr, - RF_ParityLog_t * logList) -{ - RF_ParityLog_t *log, *freeLogList = NULL; - RF_ParityLogData_t *logData, *logDataList; - RF_RegionId_t regionID; - - RF_ASSERT(logList); - while (logList) { - log = logList; - logList = logList->next; - log->next = NULL; - regionID = log->regionID; - ReintegrateRegion(raidPtr, regionID, log); - log->numRecords = 0; - - /* remove all items which are blocked on reintegration of this - * region */ - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID, - &raidPtr->parityLogDiskQueue.reintBlockHead, - &raidPtr->parityLogDiskQueue.reintBlockTail, - RF_TRUE); - logDataList = logData; - while (logData) { - logData->next = rf_SearchAndDequeueParityLogData( - raidPtr, regionID, - &raidPtr->parityLogDiskQueue.reintBlockHead, - &raidPtr->parityLogDiskQueue.reintBlockTail, - RF_TRUE); - logData = logData->next; - } - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - - /* process blocked log data and clear reintInProgress flag for - * this region */ - if (logDataList) - rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE); - else { - /* Enable flushing for this region. Holding both - * locks provides a synchronization barrier with - * DumpParityLogToDisk */ - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - raidPtr->regionInfo[regionID].diskCount = 0; - raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now - * enabled */ - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - } - /* if log wasn't used, attach it to the list of logs to be - * returned */ - if (log) { - log->next = freeLogList; - freeLogList = log; - } - } - if (freeLogList) - rf_ReleaseParityLogs(raidPtr, freeLogList); -} - -int -rf_ShutdownLogging(RF_Raid_t * raidPtr) -{ - /* shutdown parity logging 1) disable parity logging in all regions 2) - * reintegrate all regions */ - - RF_SectorCount_t diskCount; - RF_RegionId_t regionID; - RF_ParityLog_t *log; - - if (rf_parityLogDebug) - printf("[shutting down parity logging]\n"); - /* Since parity log maps are volatile, we must reintegrate all - * regions. */ - if (rf_forceParityLogReint) { - for (regionID = 0; regionID < rf_numParityRegions; regionID++) { - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - raidPtr->regionInfo[regionID].loggingEnabled = - RF_FALSE; - log = raidPtr->regionInfo[regionID].coreLog; - raidPtr->regionInfo[regionID].coreLog = NULL; - diskCount = raidPtr->regionInfo[regionID].diskCount; - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - if (diskCount > 0 || log != NULL) - ReintegrateRegion(raidPtr, regionID, log); - if (log != NULL) - rf_ReleaseParityLogs(raidPtr, log); - } - } - if (rf_parityLogDebug) { - printf("[parity logging disabled]\n"); - printf("[should be done!]\n"); - } - return (0); -} - -int -rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr) -{ - RF_ParityLog_t *reintQueue, *flushQueue; - int workNeeded, done = RF_FALSE; - int s; - - /* Main program for parity logging disk thread. This routine waits - * for work to appear in either the flush or reintegration queues and - * is responsible for flushing core logs to the log disk as well as - * reintegrating parity regions. - * - * BLOCKING */ - - s = splbio(); - - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - - /* - * Inform our creator that we're running. Don't bother doing the - * mutex lock/unlock dance- we locked above, and we'll unlock - * below with nothing to do, yet. - */ - raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING; - RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); - - /* empty the work queues */ - flushQueue = raidPtr->parityLogDiskQueue.flushQueue; - raidPtr->parityLogDiskQueue.flushQueue = NULL; - reintQueue = raidPtr->parityLogDiskQueue.reintQueue; - raidPtr->parityLogDiskQueue.reintQueue = NULL; - workNeeded = (flushQueue || reintQueue); - - while (!done) { - while (workNeeded) { - /* First, flush all logs in the flush queue, freeing - * buffers Second, reintegrate all regions which are - * reported as full. Third, append queued log data - * until blocked. - * - * Note: Incoming appends (ParityLogAppend) can block on - * either 1. empty buffer pool 2. region under - * reintegration To preserve a global FIFO ordering of - * appends, buffers are not released to the world - * until those appends blocked on buffers are removed - * from the append queue. Similarly, regions which - * are reintegrated are not opened for general use - * until the append queue has been emptied. */ - - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - - /* empty flushQueue, using free'd log buffers to - * process bufTail */ - if (flushQueue) - FlushLogsToDisk(raidPtr, flushQueue); - - /* empty reintQueue, flushing from reintTail as we go */ - if (reintQueue) - ReintegrateLogs(raidPtr, reintQueue); - - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - flushQueue = raidPtr->parityLogDiskQueue.flushQueue; - raidPtr->parityLogDiskQueue.flushQueue = NULL; - reintQueue = raidPtr->parityLogDiskQueue.reintQueue; - raidPtr->parityLogDiskQueue.reintQueue = NULL; - workNeeded = (flushQueue || reintQueue); - } - /* no work is needed at this point */ - if (raidPtr->parityLogDiskQueue.threadState & RF_PLOG_TERMINATE) { - /* shutdown parity logging 1. disable parity logging - * in all regions 2. reintegrate all regions */ - done = RF_TRUE; /* thread disabled, no work needed */ - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - rf_ShutdownLogging(raidPtr); - } - if (!done) { - /* thread enabled, no work needed, so sleep */ - if (rf_parityLogDebug) - printf("[parity logging disk manager sleeping]\n"); - RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, - raidPtr->parityLogDiskQueue.mutex); - if (rf_parityLogDebug) - printf("[parity logging disk manager just woke up]\n"); - flushQueue = raidPtr->parityLogDiskQueue.flushQueue; - raidPtr->parityLogDiskQueue.flushQueue = NULL; - reintQueue = raidPtr->parityLogDiskQueue.reintQueue; - raidPtr->parityLogDiskQueue.reintQueue = NULL; - workNeeded = (flushQueue || reintQueue); - } - } - /* - * Announce that we're done. - */ - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN; - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); - - splx(s); - - /* - * In the NetBSD kernel, the thread must exit; returning would - * cause the proc trampoline to attempt to return to userspace. - */ - kthread_exit(0); /* does not return */ -} -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ diff --git a/sys/dev/raidframe/rf_paritylogDiskMgr.h b/sys/dev/raidframe/rf_paritylogDiskMgr.h deleted file mode 100644 index bdcc2a5..0000000 --- a/sys/dev/raidframe/rf_paritylogDiskMgr.h +++ /dev/null @@ -1,42 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_paritylogDiskMgr.h,v 1.3 1999/02/05 00:06:14 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* header file for parity log disk mgr code - * - */ - -#ifndef _RF__RF_PARITYLOGDISKMGR_H_ -#define _RF__RF_PARITYLOGDISKMGR_H_ - -#include <dev/raidframe/rf_types.h> - -int rf_ShutdownLogging(RF_Raid_t * raidPtr); -int rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr); - -#endif /* !_RF__RF_PARITYLOGDISKMGR_H_ */ diff --git a/sys/dev/raidframe/rf_paritylogging.c b/sys/dev/raidframe/rf_paritylogging.c deleted file mode 100644 index 2f9cf5e..0000000 --- a/sys/dev/raidframe/rf_paritylogging.c +++ /dev/null @@ -1,1076 +0,0 @@ -/* $NetBSD: rf_paritylogging.c,v 1.10 2000/02/12 16:06:27 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - - -/* - parity logging configuration, dag selection, and mapping is implemented here - */ - -#include <dev/raidframe/rf_archs.h> - -#if RF_INCLUDE_PARITYLOGGING > 0 - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_dagffrd.h> -#include <dev/raidframe/rf_dagffwr.h> -#include <dev/raidframe/rf_dagdegrd.h> -#include <dev/raidframe/rf_dagdegwr.h> -#include <dev/raidframe/rf_paritylog.h> -#include <dev/raidframe/rf_paritylogDiskMgr.h> -#include <dev/raidframe/rf_paritylogging.h> -#include <dev/raidframe/rf_parityloggingdags.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_map.h> -#include <dev/raidframe/rf_utils.h> -#include <dev/raidframe/rf_shutdown.h> -#include <dev/raidframe/rf_kintf.h> - -typedef struct RF_ParityLoggingConfigInfo_s { - RF_RowCol_t **stripeIdentifier; /* filled in at config time & used by - * IdentifyStripe */ -} RF_ParityLoggingConfigInfo_t; - -static void FreeRegionInfo(RF_Raid_t * raidPtr, RF_RegionId_t regionID); -static void rf_ShutdownParityLogging(RF_ThreadArg_t arg); -static void rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t arg); -static void rf_ShutdownParityLoggingPool(RF_ThreadArg_t arg); -static void rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t arg); -static void rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t arg); -static void rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t arg); - -int -rf_ConfigureParityLogging( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - int i, j, startdisk, rc; - RF_SectorCount_t totalLogCapacity, fragmentation, lastRegionCapacity; - RF_SectorCount_t parityBufferCapacity, maxRegionParityRange; - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_ParityLoggingConfigInfo_t *info; - RF_ParityLog_t *l = NULL, *next; - caddr_t lHeapPtr; - - if (rf_numParityRegions <= 0) - return(EINVAL); - - /* - * We create multiple entries on the shutdown list here, since - * this configuration routine is fairly complicated in and of - * itself, and this makes backing out of a failed configuration - * much simpler. - */ - - raidPtr->numSectorsPerLog = RF_DEFAULT_NUM_SECTORS_PER_LOG; - - /* create a parity logging configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_ParityLoggingConfigInfo_t), - (RF_ParityLoggingConfigInfo_t *), - raidPtr->cleanupList); - if (info == NULL) - return (ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - - RF_ASSERT(raidPtr->numRow == 1); - - /* the stripe identifier must identify the disks in each stripe, IN - * THE ORDER THAT THEY APPEAR IN THE STRIPE. */ - info->stripeIdentifier = rf_make_2d_array((raidPtr->numCol), - (raidPtr->numCol), - raidPtr->cleanupList); - if (info->stripeIdentifier == NULL) - return (ENOMEM); - - startdisk = 0; - for (i = 0; i < (raidPtr->numCol); i++) { - for (j = 0; j < (raidPtr->numCol); j++) { - info->stripeIdentifier[i][j] = (startdisk + j) % - (raidPtr->numCol - 1); - } - if ((--startdisk) < 0) - startdisk = raidPtr->numCol - 1 - 1; - } - - /* fill in the remaining layout parameters */ - layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << - raidPtr->logBytesPerSector; - layoutPtr->numParityCol = 1; - layoutPtr->numParityLogCol = 1; - layoutPtr->numDataCol = raidPtr->numCol - layoutPtr->numParityCol - - layoutPtr->numParityLogCol; - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * - layoutPtr->sectorsPerStripeUnit; - layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk; - raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * - layoutPtr->sectorsPerStripeUnit; - - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * - layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - - /* configure parity log parameters - * - * parameter comment/constraints - * ------------------------------------------- - * numParityRegions* all regions (except possibly last) - * of equal size - * totalInCoreLogCapacity* amount of memory in bytes available - * for in-core logs (default 1 MB) - * numSectorsPerLog# capacity of an in-core log in sectors - * (1 * disk track) - * numParityLogs total number of in-core logs, - * should be at least numParityRegions - * regionLogCapacity size of a region log (except possibly - * last one) in sectors - * totalLogCapacity total amount of log space in sectors - * - * where '*' denotes a user settable parameter. - * Note that logs are fixed to be the size of a disk track, - * value #defined in rf_paritylog.h - * - */ - - totalLogCapacity = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit * layoutPtr->numParityLogCol; - raidPtr->regionLogCapacity = totalLogCapacity / rf_numParityRegions; - if (rf_parityLogDebug) - printf("bytes per sector %d\n", raidPtr->bytesPerSector); - - /* reduce fragmentation within a disk region by adjusting the number - * of regions in an attempt to allow an integral number of logs to fit - * into a disk region */ - fragmentation = raidPtr->regionLogCapacity % raidPtr->numSectorsPerLog; - if (fragmentation > 0) - for (i = 1; i < (raidPtr->numSectorsPerLog / 2); i++) { - if (((totalLogCapacity / (rf_numParityRegions + i)) % - raidPtr->numSectorsPerLog) < fragmentation) { - rf_numParityRegions++; - raidPtr->regionLogCapacity = totalLogCapacity / - rf_numParityRegions; - fragmentation = raidPtr->regionLogCapacity % - raidPtr->numSectorsPerLog; - } - if (((totalLogCapacity / (rf_numParityRegions - i)) % - raidPtr->numSectorsPerLog) < fragmentation) { - rf_numParityRegions--; - raidPtr->regionLogCapacity = totalLogCapacity / - rf_numParityRegions; - fragmentation = raidPtr->regionLogCapacity % - raidPtr->numSectorsPerLog; - } - } - /* ensure integral number of regions per log */ - raidPtr->regionLogCapacity = (raidPtr->regionLogCapacity / - raidPtr->numSectorsPerLog) * - raidPtr->numSectorsPerLog; - - raidPtr->numParityLogs = rf_totalInCoreLogCapacity / - (raidPtr->bytesPerSector * raidPtr->numSectorsPerLog); - /* to avoid deadlock, must ensure that enough logs exist for each - * region to have one simultaneously */ - if (raidPtr->numParityLogs < rf_numParityRegions) - raidPtr->numParityLogs = rf_numParityRegions; - - /* create region information structs */ - printf("Allocating %d bytes for in-core parity region info\n", - (int) (rf_numParityRegions * sizeof(RF_RegionInfo_t))); - RF_Malloc(raidPtr->regionInfo, - (rf_numParityRegions * sizeof(RF_RegionInfo_t)), - (RF_RegionInfo_t *)); - if (raidPtr->regionInfo == NULL) - return (ENOMEM); - - /* last region may not be full capacity */ - lastRegionCapacity = raidPtr->regionLogCapacity; - while ((rf_numParityRegions - 1) * raidPtr->regionLogCapacity + - lastRegionCapacity > totalLogCapacity) - lastRegionCapacity = lastRegionCapacity - - raidPtr->numSectorsPerLog; - - raidPtr->regionParityRange = raidPtr->sectorsPerDisk / - rf_numParityRegions; - maxRegionParityRange = raidPtr->regionParityRange; - -/* i can't remember why this line is in the code -wvcii 6/30/95 */ -/* if (raidPtr->sectorsPerDisk % rf_numParityRegions > 0) - regionParityRange++; */ - - /* build pool of unused parity logs */ - printf("Allocating %d bytes for %d parity logs\n", - raidPtr->numParityLogs * raidPtr->numSectorsPerLog * - raidPtr->bytesPerSector, - raidPtr->numParityLogs); - RF_Malloc(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * - raidPtr->numSectorsPerLog * raidPtr->bytesPerSector, - (caddr_t)); - if (raidPtr->parityLogBufferHeap == NULL) - return (ENOMEM); - lHeapPtr = raidPtr->parityLogBufferHeap; - rc = rf_mutex_init(&raidPtr->parityLogPool.mutex, "RF_PARITYLOGGING1"); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * - raidPtr->numSectorsPerLog * raidPtr->bytesPerSector); - return (ENOMEM); - } - for (i = 0; i < raidPtr->numParityLogs; i++) { - if (i == 0) { - RF_Calloc(raidPtr->parityLogPool.parityLogs, 1, - sizeof(RF_ParityLog_t), (RF_ParityLog_t *)); - if (raidPtr->parityLogPool.parityLogs == NULL) { - RF_Free(raidPtr->parityLogBufferHeap, - raidPtr->numParityLogs * - raidPtr->numSectorsPerLog * - raidPtr->bytesPerSector); - return (ENOMEM); - } - l = raidPtr->parityLogPool.parityLogs; - } else { - RF_Calloc(l->next, 1, sizeof(RF_ParityLog_t), - (RF_ParityLog_t *)); - if (l->next == NULL) { - RF_Free(raidPtr->parityLogBufferHeap, - raidPtr->numParityLogs * - raidPtr->numSectorsPerLog * - raidPtr->bytesPerSector); - for (l = raidPtr->parityLogPool.parityLogs; - l; - l = next) { - next = l->next; - if (l->records) - RF_Free(l->records, (raidPtr->numSectorsPerLog * sizeof(RF_ParityLogRecord_t))); - RF_Free(l, sizeof(RF_ParityLog_t)); - } - return (ENOMEM); - } - l = l->next; - } - l->bufPtr = lHeapPtr; - lHeapPtr += raidPtr->numSectorsPerLog * - raidPtr->bytesPerSector; - RF_Malloc(l->records, (raidPtr->numSectorsPerLog * - sizeof(RF_ParityLogRecord_t)), - (RF_ParityLogRecord_t *)); - if (l->records == NULL) { - RF_Free(raidPtr->parityLogBufferHeap, - raidPtr->numParityLogs * - raidPtr->numSectorsPerLog * - raidPtr->bytesPerSector); - for (l = raidPtr->parityLogPool.parityLogs; - l; - l = next) { - next = l->next; - if (l->records) - RF_Free(l->records, - (raidPtr->numSectorsPerLog * - sizeof(RF_ParityLogRecord_t))); - RF_Free(l, sizeof(RF_ParityLog_t)); - } - return (ENOMEM); - } - } - rc = rf_ShutdownCreate(listp, rf_ShutdownParityLoggingPool, raidPtr); - if (rc) { - RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownParityLoggingPool(raidPtr); - return (rc); - } - /* build pool of region buffers */ - rc = rf_mutex_init(&raidPtr->regionBufferPool.mutex, "RF_PARITYLOGGING3"); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - return (ENOMEM); - } - rc = rf_cond_init(&raidPtr->regionBufferPool.cond); - if (rc) { - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - rf_mutex_destroy(&raidPtr->regionBufferPool.mutex); - return (ENOMEM); - } - raidPtr->regionBufferPool.bufferSize = raidPtr->regionLogCapacity * - raidPtr->bytesPerSector; - printf("regionBufferPool.bufferSize %d\n", - raidPtr->regionBufferPool.bufferSize); - - /* for now, only one region at a time may be reintegrated */ - raidPtr->regionBufferPool.totalBuffers = 1; - - raidPtr->regionBufferPool.availableBuffers = - raidPtr->regionBufferPool.totalBuffers; - raidPtr->regionBufferPool.availBuffersIndex = 0; - raidPtr->regionBufferPool.emptyBuffersIndex = 0; - printf("Allocating %d bytes for regionBufferPool\n", - (int) (raidPtr->regionBufferPool.totalBuffers * - sizeof(caddr_t))); - RF_Malloc(raidPtr->regionBufferPool.buffers, - raidPtr->regionBufferPool.totalBuffers * sizeof(caddr_t), - (caddr_t *)); - if (raidPtr->regionBufferPool.buffers == NULL) { - rf_mutex_destroy(&raidPtr->regionBufferPool.mutex); - rf_cond_destroy(&raidPtr->regionBufferPool.cond); - return (ENOMEM); - } - for (i = 0; i < raidPtr->regionBufferPool.totalBuffers; i++) { - printf("Allocating %d bytes for regionBufferPool#%d\n", - (int) (raidPtr->regionBufferPool.bufferSize * - sizeof(char)), i); - RF_Malloc(raidPtr->regionBufferPool.buffers[i], - raidPtr->regionBufferPool.bufferSize * sizeof(char), - (caddr_t)); - if (raidPtr->regionBufferPool.buffers[i] == NULL) { - rf_mutex_destroy(&raidPtr->regionBufferPool.mutex); - rf_cond_destroy(&raidPtr->regionBufferPool.cond); - for (j = 0; j < i; j++) { - RF_Free(raidPtr->regionBufferPool.buffers[i], - raidPtr->regionBufferPool.bufferSize * - sizeof(char)); - } - RF_Free(raidPtr->regionBufferPool.buffers, - raidPtr->regionBufferPool.totalBuffers * - sizeof(caddr_t)); - return (ENOMEM); - } - printf("raidPtr->regionBufferPool.buffers[%d] = %lx\n", i, - (long) raidPtr->regionBufferPool.buffers[i]); - } - rc = rf_ShutdownCreate(listp, - rf_ShutdownParityLoggingRegionBufferPool, - raidPtr); - if (rc) { - RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownParityLoggingRegionBufferPool(raidPtr); - return (rc); - } - /* build pool of parity buffers */ - parityBufferCapacity = maxRegionParityRange; - rc = rf_mutex_init(&raidPtr->parityBufferPool.mutex, "RF_PARITYLOGGING3"); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - return (rc); - } - rc = rf_cond_init(&raidPtr->parityBufferPool.cond); - if (rc) { - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - rf_mutex_destroy(&raidPtr->parityBufferPool.mutex); - return (ENOMEM); - } - raidPtr->parityBufferPool.bufferSize = parityBufferCapacity * - raidPtr->bytesPerSector; - printf("parityBufferPool.bufferSize %d\n", - raidPtr->parityBufferPool.bufferSize); - - /* for now, only one region at a time may be reintegrated */ - raidPtr->parityBufferPool.totalBuffers = 1; - - raidPtr->parityBufferPool.availableBuffers = - raidPtr->parityBufferPool.totalBuffers; - raidPtr->parityBufferPool.availBuffersIndex = 0; - raidPtr->parityBufferPool.emptyBuffersIndex = 0; - printf("Allocating %d bytes for parityBufferPool of %d units\n", - (int) (raidPtr->parityBufferPool.totalBuffers * - sizeof(caddr_t)), - raidPtr->parityBufferPool.totalBuffers ); - RF_Malloc(raidPtr->parityBufferPool.buffers, - raidPtr->parityBufferPool.totalBuffers * sizeof(caddr_t), - (caddr_t *)); - if (raidPtr->parityBufferPool.buffers == NULL) { - rf_mutex_destroy(&raidPtr->parityBufferPool.mutex); - rf_cond_destroy(&raidPtr->parityBufferPool.cond); - return (ENOMEM); - } - for (i = 0; i < raidPtr->parityBufferPool.totalBuffers; i++) { - printf("Allocating %d bytes for parityBufferPool#%d\n", - (int) (raidPtr->parityBufferPool.bufferSize * - sizeof(char)),i); - RF_Malloc(raidPtr->parityBufferPool.buffers[i], - raidPtr->parityBufferPool.bufferSize * sizeof(char), - (caddr_t)); - if (raidPtr->parityBufferPool.buffers == NULL) { - rf_mutex_destroy(&raidPtr->parityBufferPool.mutex); - rf_cond_destroy(&raidPtr->parityBufferPool.cond); - for (j = 0; j < i; j++) { - RF_Free(raidPtr->parityBufferPool.buffers[i], - raidPtr->regionBufferPool.bufferSize * - sizeof(char)); - } - RF_Free(raidPtr->parityBufferPool.buffers, - raidPtr->regionBufferPool.totalBuffers * - sizeof(caddr_t)); - return (ENOMEM); - } - printf("parityBufferPool.buffers[%d] = %lx\n", i, - (long) raidPtr->parityBufferPool.buffers[i]); - } - rc = rf_ShutdownCreate(listp, - rf_ShutdownParityLoggingParityBufferPool, - raidPtr); - if (rc) { - RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownParityLoggingParityBufferPool(raidPtr); - return (rc); - } - /* initialize parityLogDiskQueue */ - rc = rf_create_managed_mutex(listp, - &raidPtr->parityLogDiskQueue.mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - return (rc); - } - rc = rf_create_managed_cond(listp, &raidPtr->parityLogDiskQueue.cond); - if (rc) { - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - return (rc); - } - raidPtr->parityLogDiskQueue.flushQueue = NULL; - raidPtr->parityLogDiskQueue.reintQueue = NULL; - raidPtr->parityLogDiskQueue.bufHead = NULL; - raidPtr->parityLogDiskQueue.bufTail = NULL; - raidPtr->parityLogDiskQueue.reintHead = NULL; - raidPtr->parityLogDiskQueue.reintTail = NULL; - raidPtr->parityLogDiskQueue.logBlockHead = NULL; - raidPtr->parityLogDiskQueue.logBlockTail = NULL; - raidPtr->parityLogDiskQueue.reintBlockHead = NULL; - raidPtr->parityLogDiskQueue.reintBlockTail = NULL; - raidPtr->parityLogDiskQueue.freeDataList = NULL; - raidPtr->parityLogDiskQueue.freeCommonList = NULL; - - rc = rf_ShutdownCreate(listp, - rf_ShutdownParityLoggingDiskQueue, - raidPtr); - if (rc) { - RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return (rc); - } - for (i = 0; i < rf_numParityRegions; i++) { - rc = rf_mutex_init(&raidPtr->regionInfo[i].mutex, "RF_PARITYLOGGING3"); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - for (j = 0; j < i; j++) - FreeRegionInfo(raidPtr, j); - RF_Free(raidPtr->regionInfo, - (rf_numParityRegions * - sizeof(RF_RegionInfo_t))); - return (ENOMEM); - } - rc = rf_mutex_init(&raidPtr->regionInfo[i].reintMutex, "RF_PARITYLOGGING4"); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_mutex_destroy(&raidPtr->regionInfo[i].mutex); - for (j = 0; j < i; j++) - FreeRegionInfo(raidPtr, j); - RF_Free(raidPtr->regionInfo, - (rf_numParityRegions * - sizeof(RF_RegionInfo_t))); - return (ENOMEM); - } - raidPtr->regionInfo[i].reintInProgress = RF_FALSE; - raidPtr->regionInfo[i].regionStartAddr = - raidPtr->regionLogCapacity * i; - raidPtr->regionInfo[i].parityStartAddr = - raidPtr->regionParityRange * i; - if (i < rf_numParityRegions - 1) { - raidPtr->regionInfo[i].capacity = - raidPtr->regionLogCapacity; - raidPtr->regionInfo[i].numSectorsParity = - raidPtr->regionParityRange; - } else { - raidPtr->regionInfo[i].capacity = - lastRegionCapacity; - raidPtr->regionInfo[i].numSectorsParity = - raidPtr->sectorsPerDisk - - raidPtr->regionParityRange * i; - if (raidPtr->regionInfo[i].numSectorsParity > - maxRegionParityRange) - maxRegionParityRange = - raidPtr->regionInfo[i].numSectorsParity; - } - raidPtr->regionInfo[i].diskCount = 0; - RF_ASSERT(raidPtr->regionInfo[i].capacity + - raidPtr->regionInfo[i].regionStartAddr <= - totalLogCapacity); - RF_ASSERT(raidPtr->regionInfo[i].parityStartAddr + - raidPtr->regionInfo[i].numSectorsParity <= - raidPtr->sectorsPerDisk); - printf("Allocating %d bytes for region %d\n", - (int) (raidPtr->regionInfo[i].capacity * - sizeof(RF_DiskMap_t)), i); - RF_Malloc(raidPtr->regionInfo[i].diskMap, - (raidPtr->regionInfo[i].capacity * - sizeof(RF_DiskMap_t)), - (RF_DiskMap_t *)); - if (raidPtr->regionInfo[i].diskMap == NULL) { - rf_mutex_destroy(&raidPtr->regionInfo[i].mutex); - rf_mutex_destroy(&raidPtr->regionInfo[i].reintMutex); - for (j = 0; j < i; j++) - FreeRegionInfo(raidPtr, j); - RF_Free(raidPtr->regionInfo, - (rf_numParityRegions * - sizeof(RF_RegionInfo_t))); - return (ENOMEM); - } - raidPtr->regionInfo[i].loggingEnabled = RF_FALSE; - raidPtr->regionInfo[i].coreLog = NULL; - } - rc = rf_ShutdownCreate(listp, - rf_ShutdownParityLoggingRegionInfo, - raidPtr); - if (rc) { - RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownParityLoggingRegionInfo(raidPtr); - return (rc); - } - RF_ASSERT(raidPtr->parityLogDiskQueue.threadState == 0); - raidPtr->parityLogDiskQueue.threadState = RF_PLOG_CREATED; - rc = RF_CREATE_THREAD(raidPtr->pLogDiskThreadHandle, - rf_ParityLoggingDiskManager, raidPtr,"rf_log"); - if (rc) { - raidPtr->parityLogDiskQueue.threadState = 0; - RF_ERRORMSG3("Unable to create parity logging disk thread file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - return (ENOMEM); - } - /* wait for thread to start */ - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - while (!(raidPtr->parityLogDiskQueue.threadState & RF_PLOG_RUNNING)) { - RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, - raidPtr->parityLogDiskQueue.mutex); - } - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - - rc = rf_ShutdownCreate(listp, rf_ShutdownParityLogging, raidPtr); - if (rc) { - RF_ERRORMSG1("Got rc=%d adding parity logging shutdown event\n", rc); - rf_ShutdownParityLogging(raidPtr); - return (rc); - } - if (rf_parityLogDebug) { - printf(" size of disk log in sectors: %d\n", - (int) totalLogCapacity); - printf(" total number of parity regions is %d\n", (int) rf_numParityRegions); - printf(" nominal sectors of log per parity region is %d\n", (int) raidPtr->regionLogCapacity); - printf(" nominal region fragmentation is %d sectors\n", (int) fragmentation); - printf(" total number of parity logs is %d\n", raidPtr->numParityLogs); - printf(" parity log size is %d sectors\n", raidPtr->numSectorsPerLog); - printf(" total in-core log space is %d bytes\n", (int) rf_totalInCoreLogCapacity); - } - rf_EnableParityLogging(raidPtr); - - return (0); -} - -static void -FreeRegionInfo( - RF_Raid_t * raidPtr, - RF_RegionId_t regionID) -{ - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - RF_Free(raidPtr->regionInfo[regionID].diskMap, - (raidPtr->regionInfo[regionID].capacity * - sizeof(RF_DiskMap_t))); - if (!rf_forceParityLogReint && raidPtr->regionInfo[regionID].coreLog) { - rf_ReleaseParityLogs(raidPtr, - raidPtr->regionInfo[regionID].coreLog); - raidPtr->regionInfo[regionID].coreLog = NULL; - } else { - RF_ASSERT(raidPtr->regionInfo[regionID].coreLog == NULL); - RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == 0); - } - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - rf_mutex_destroy(&raidPtr->regionInfo[regionID].mutex); - rf_mutex_destroy(&raidPtr->regionInfo[regionID].reintMutex); -} - - -static void -FreeParityLogQueue( - RF_Raid_t * raidPtr, - RF_ParityLogQueue_t * queue) -{ - RF_ParityLog_t *l1, *l2; - - RF_LOCK_MUTEX(queue->mutex); - l1 = queue->parityLogs; - while (l1) { - l2 = l1; - l1 = l2->next; - RF_Free(l2->records, (raidPtr->numSectorsPerLog * - sizeof(RF_ParityLogRecord_t))); - RF_Free(l2, sizeof(RF_ParityLog_t)); - } - RF_UNLOCK_MUTEX(queue->mutex); - rf_mutex_destroy(&queue->mutex); -} - - -static void -FreeRegionBufferQueue(RF_RegionBufferQueue_t * queue) -{ - int i; - - RF_LOCK_MUTEX(queue->mutex); - if (queue->availableBuffers != queue->totalBuffers) { - printf("Attempt to free region queue which is still in use!\n"); - RF_ASSERT(0); - } - for (i = 0; i < queue->totalBuffers; i++) - RF_Free(queue->buffers[i], queue->bufferSize); - RF_Free(queue->buffers, queue->totalBuffers * sizeof(caddr_t)); - RF_UNLOCK_MUTEX(queue->mutex); - rf_mutex_destroy(&queue->mutex); -} - -static void -rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t arg) -{ - RF_Raid_t *raidPtr; - RF_RegionId_t i; - - raidPtr = (RF_Raid_t *) arg; - if (rf_parityLogDebug) { - printf("raid%d: ShutdownParityLoggingRegionInfo\n", - raidPtr->raidid); - } - /* free region information structs */ - for (i = 0; i < rf_numParityRegions; i++) - FreeRegionInfo(raidPtr, i); - RF_Free(raidPtr->regionInfo, (rf_numParityRegions * - sizeof(raidPtr->regionInfo))); - raidPtr->regionInfo = NULL; -} - -static void -rf_ShutdownParityLoggingPool(RF_ThreadArg_t arg) -{ - RF_Raid_t *raidPtr; - - raidPtr = (RF_Raid_t *) arg; - if (rf_parityLogDebug) { - printf("raid%d: ShutdownParityLoggingPool\n", raidPtr->raidid); - } - /* free contents of parityLogPool */ - FreeParityLogQueue(raidPtr, &raidPtr->parityLogPool); - RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * - raidPtr->numSectorsPerLog * raidPtr->bytesPerSector); -} - -static void -rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t arg) -{ - RF_Raid_t *raidPtr; - - raidPtr = (RF_Raid_t *) arg; - if (rf_parityLogDebug) { - printf("raid%d: ShutdownParityLoggingRegionBufferPool\n", - raidPtr->raidid); - } - FreeRegionBufferQueue(&raidPtr->regionBufferPool); -} - -static void -rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t arg) -{ - RF_Raid_t *raidPtr; - - raidPtr = (RF_Raid_t *) arg; - if (rf_parityLogDebug) { - printf("raid%d: ShutdownParityLoggingParityBufferPool\n", - raidPtr->raidid); - } - FreeRegionBufferQueue(&raidPtr->parityBufferPool); -} - -static void -rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t arg) -{ - RF_ParityLogData_t *d; - RF_CommonLogData_t *c; - RF_Raid_t *raidPtr; - - raidPtr = (RF_Raid_t *) arg; - if (rf_parityLogDebug) { - printf("raid%d: ShutdownParityLoggingDiskQueue\n", - raidPtr->raidid); - } - /* free disk manager stuff */ - RF_ASSERT(raidPtr->parityLogDiskQueue.bufHead == NULL); - RF_ASSERT(raidPtr->parityLogDiskQueue.bufTail == NULL); - RF_ASSERT(raidPtr->parityLogDiskQueue.reintHead == NULL); - RF_ASSERT(raidPtr->parityLogDiskQueue.reintTail == NULL); - while (raidPtr->parityLogDiskQueue.freeDataList) { - d = raidPtr->parityLogDiskQueue.freeDataList; - raidPtr->parityLogDiskQueue.freeDataList = - raidPtr->parityLogDiskQueue.freeDataList->next; - RF_Free(d, sizeof(RF_ParityLogData_t)); - } - while (raidPtr->parityLogDiskQueue.freeCommonList) { - c = raidPtr->parityLogDiskQueue.freeCommonList; - rf_mutex_destroy(&c->mutex); - raidPtr->parityLogDiskQueue.freeCommonList = - raidPtr->parityLogDiskQueue.freeCommonList->next; - RF_Free(c, sizeof(RF_CommonLogData_t)); - } -} - -static void -rf_ShutdownParityLogging(RF_ThreadArg_t arg) -{ - RF_Raid_t *raidPtr; - - raidPtr = (RF_Raid_t *) arg; - if (rf_parityLogDebug) { - printf("raid%d: ShutdownParityLogging\n", raidPtr->raidid); - } - /* shutdown disk thread */ - /* This has the desirable side-effect of forcing all regions to be - * reintegrated. This is necessary since all parity log maps are - * currently held in volatile memory. */ - - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_TERMINATE; - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); - /* - * pLogDiskThread will now terminate when queues are cleared - * now wait for it to be done - */ - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - while (!(raidPtr->parityLogDiskQueue.threadState & RF_PLOG_SHUTDOWN)) { - RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, - raidPtr->parityLogDiskQueue.mutex); - } - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - if (rf_parityLogDebug) { - printf("raid%d: ShutdownParityLogging done (thread completed)\n", raidPtr->raidid); - } -} - -int -rf_GetDefaultNumFloatingReconBuffersParityLogging(RF_Raid_t * raidPtr) -{ - return (20); -} - -RF_HeadSepLimit_t -rf_GetDefaultHeadSepLimitParityLogging(RF_Raid_t * raidPtr) -{ - return (10); -} -/* return the region ID for a given RAID address */ -RF_RegionId_t -rf_MapRegionIDParityLogging( - RF_Raid_t * raidPtr, - RF_SectorNum_t address) -{ - RF_RegionId_t regionID; - -/* regionID = address / (raidPtr->regionParityRange * raidPtr->Layout.numDataCol); */ - regionID = address / raidPtr->regionParityRange; - if (regionID == rf_numParityRegions) { - /* last region may be larger than other regions */ - regionID--; - } - RF_ASSERT(address >= raidPtr->regionInfo[regionID].parityStartAddr); - RF_ASSERT(address < raidPtr->regionInfo[regionID].parityStartAddr + - raidPtr->regionInfo[regionID].numSectorsParity); - RF_ASSERT(regionID < rf_numParityRegions); - return (regionID); -} - - -/* given a logical RAID sector, determine physical disk address of data */ -void -rf_MapSectorParityLogging( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_StripeNum_t SUID = raidSector / - raidPtr->Layout.sectorsPerStripeUnit; - *row = 0; - /* *col = (SUID % (raidPtr->numCol - - * raidPtr->Layout.numParityLogCol)); */ - *col = SUID % raidPtr->Layout.numDataCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * - raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); -} - - -/* given a logical RAID sector, determine physical disk address of parity */ -void -rf_MapParityParityLogging( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_StripeNum_t SUID = raidSector / - raidPtr->Layout.sectorsPerStripeUnit; - - *row = 0; - /* *col = - * raidPtr->Layout.numDataCol-(SUID/raidPtr->Layout.numDataCol)%(raidPt - * r->numCol - raidPtr->Layout.numParityLogCol); */ - *col = raidPtr->Layout.numDataCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * - raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); -} - - -/* given a regionID and sector offset, determine the physical disk address of the parity log */ -void -rf_MapLogParityLogging( - RF_Raid_t * raidPtr, - RF_RegionId_t regionID, - RF_SectorNum_t regionOffset, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * startSector) -{ - *row = 0; - *col = raidPtr->numCol - 1; - *startSector = raidPtr->regionInfo[regionID].regionStartAddr + regionOffset; -} - - -/* given a regionID, determine the physical disk address of the logged - parity for that region */ -void -rf_MapRegionParity( - RF_Raid_t * raidPtr, - RF_RegionId_t regionID, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * startSector, - RF_SectorCount_t * numSector) -{ - *row = 0; - *col = raidPtr->numCol - 2; - *startSector = raidPtr->regionInfo[regionID].parityStartAddr; - *numSector = raidPtr->regionInfo[regionID].numSectorsParity; -} - - -/* given a logical RAID address, determine the participating disks in - the stripe */ -void -rf_IdentifyStripeParityLogging( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) -{ - RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, - addr); - RF_ParityLoggingConfigInfo_t *info = (RF_ParityLoggingConfigInfo_t *) - raidPtr->Layout.layoutSpecificInfo; - *outRow = 0; - *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol]; -} - - -void -rf_MapSIDToPSIDParityLogging( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru) -{ - *which_ru = 0; - *psID = stripeID; -} - - -/* select an algorithm for performing an access. Returns two pointers, - * one to a function that will return information about the DAG, and - * another to a function that will create the dag. - */ -void -rf_ParityLoggingDagSelect( - RF_Raid_t * raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t * asmp, - RF_VoidFuncPtr * createFunc) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_PhysDiskAddr_t *failedPDA = NULL; - RF_RowCol_t frow, fcol; - RF_RowStatus_t rstat; - int prior_recon; - - RF_ASSERT(RF_IO_IS_R_OR_W(type)); - - if (asmp->numDataFailed + asmp->numParityFailed > 1) { - RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); - /* *infoFunc = */ *createFunc = NULL; - return; - } else - if (asmp->numDataFailed + asmp->numParityFailed == 1) { - - /* if under recon & already reconstructed, redirect - * the access to the spare drive and eliminate the - * failure indication */ - failedPDA = asmp->failedPDAs[0]; - frow = failedPDA->row; - fcol = failedPDA->col; - rstat = raidPtr->status[failedPDA->row]; - prior_recon = (rstat == rf_rs_reconfigured) || ( - (rstat == rf_rs_reconstructing) ? - rf_CheckRUReconstructed(raidPtr->reconControl[frow]->reconMap, failedPDA->startSector) : 0 - ); - if (prior_recon) { - RF_RowCol_t or = failedPDA->row, oc = failedPDA->col; - RF_SectorNum_t oo = failedPDA->startSector; - if (layoutPtr->map->flags & - RF_DISTRIBUTE_SPARE) { - /* redirect to dist spare space */ - - if (failedPDA == asmp->parityInfo) { - - /* parity has failed */ - (layoutPtr->map->MapParity) (raidPtr, failedPDA->raidAddress, &failedPDA->row, - &failedPDA->col, &failedPDA->startSector, RF_REMAP); - - if (asmp->parityInfo->next) { /* redir 2nd component, - * if any */ - RF_PhysDiskAddr_t *p = asmp->parityInfo->next; - RF_SectorNum_t SUoffs = p->startSector % layoutPtr->sectorsPerStripeUnit; - p->row = failedPDA->row; - p->col = failedPDA->col; - p->startSector = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, failedPDA->startSector) + - SUoffs; /* cheating: - * startSector is not - * really a RAID address */ - } - } else - if (asmp->parityInfo->next && failedPDA == asmp->parityInfo->next) { - RF_ASSERT(0); /* should not ever - * happen */ - } else { - - /* data has failed */ - (layoutPtr->map->MapSector) (raidPtr, failedPDA->raidAddress, &failedPDA->row, - &failedPDA->col, &failedPDA->startSector, RF_REMAP); - - } - - } else { - /* redirect to dedicated spare space */ - - failedPDA->row = raidPtr->Disks[frow][fcol].spareRow; - failedPDA->col = raidPtr->Disks[frow][fcol].spareCol; - - /* the parity may have two distinct - * components, both of which may need - * to be redirected */ - if (asmp->parityInfo->next) { - if (failedPDA == asmp->parityInfo) { - failedPDA->next->row = failedPDA->row; - failedPDA->next->col = failedPDA->col; - } else - if (failedPDA == asmp->parityInfo->next) { /* paranoid: should never occur */ - asmp->parityInfo->row = failedPDA->row; - asmp->parityInfo->col = failedPDA->col; - } - } - } - - RF_ASSERT(failedPDA->col != -1); - - if (rf_dagDebug || rf_mapDebug) { - printf("raid%d: Redirected type '%c' r %d c %d o %ld -> r %d c %d o %ld\n", - raidPtr->raidid, type, or, oc, (long) oo, failedPDA->row, failedPDA->col, (long) failedPDA->startSector); - } - asmp->numDataFailed = asmp->numParityFailed = 0; - } - } - if (type == RF_IO_TYPE_READ) { - - if (asmp->numDataFailed == 0) - *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_CreateRaidFiveDegradedReadDAG; - - } else { - - - /* if mirroring, always use large writes. If the access - * requires two distinct parity updates, always do a small - * write. If the stripe contains a failure but the access - * does not, do a small write. The first conditional - * (numStripeUnitsAccessed <= numDataCol/2) uses a - * less-than-or-equal rather than just a less-than because - * when G is 3 or 4, numDataCol/2 is 1, and I want - * single-stripe-unit updates to use just one disk. */ - if ((asmp->numDataFailed + asmp->numParityFailed) == 0) { - if (((asmp->numStripeUnitsAccessed <= - (layoutPtr->numDataCol / 2)) && - (layoutPtr->numDataCol != 1)) || - (asmp->parityInfo->next != NULL) || - rf_CheckStripeForFailures(raidPtr, asmp)) { - *createFunc = (RF_VoidFuncPtr) rf_CreateParityLoggingSmallWriteDAG; - } else - *createFunc = (RF_VoidFuncPtr) rf_CreateParityLoggingLargeWriteDAG; - } else - if (asmp->numParityFailed == 1) - *createFunc = (RF_VoidFuncPtr) rf_CreateNonRedundantWriteDAG; - else - if (asmp->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit) - *createFunc = NULL; - else - *createFunc = (RF_VoidFuncPtr) rf_CreateDegradedWriteDAG; - } -} -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ diff --git a/sys/dev/raidframe/rf_paritylogging.h b/sys/dev/raidframe/rf_paritylogging.h deleted file mode 100644 index 5b7dd25..0000000 --- a/sys/dev/raidframe/rf_paritylogging.h +++ /dev/null @@ -1,70 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_paritylogging.h,v 1.3 1999/02/05 00:06:14 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* header file for Parity Logging */ - -#ifndef _RF__RF_PARITYLOGGING_H_ -#define _RF__RF_PARITYLOGGING_H_ - -int -rf_ConfigureParityLogging(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int rf_GetDefaultNumFloatingReconBuffersParityLogging(RF_Raid_t * raidPtr); -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitParityLogging(RF_Raid_t * raidPtr); -RF_RegionId_t -rf_MapRegionIDParityLogging(RF_Raid_t * raidPtr, - RF_SectorNum_t address); -void -rf_MapSectorParityLogging(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, - int remap); -void -rf_MapParityParityLogging(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, - int remap); -void -rf_MapLogParityLogging(RF_Raid_t * raidPtr, RF_RegionId_t regionID, - RF_SectorNum_t regionOffset, RF_RowCol_t * row, RF_RowCol_t * col, - RF_SectorNum_t * startSector); -void -rf_MapRegionParity(RF_Raid_t * raidPtr, RF_RegionId_t regionID, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * startSector, - RF_SectorCount_t * numSector); -void -rf_IdentifyStripeParityLogging(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -void -rf_MapSIDToPSIDParityLogging(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru); -void -rf_ParityLoggingDagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); - -#endif /* !_RF__RF_PARITYLOGGING_H_ */ diff --git a/sys/dev/raidframe/rf_parityloggingdags.c b/sys/dev/raidframe/rf_parityloggingdags.c deleted file mode 100644 index 30a5892..0000000 --- a/sys/dev/raidframe/rf_parityloggingdags.c +++ /dev/null @@ -1,675 +0,0 @@ -/* $NetBSD: rf_parityloggingdags.c,v 1.4 2000/01/07 03:41:04 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#include <dev/raidframe/rf_archs.h> - -#if RF_INCLUDE_PARITYLOGGING > 0 - -/* - DAGs specific to parity logging are created here - */ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_paritylog.h> -#include <dev/raidframe/rf_memchunk.h> -#include <dev/raidframe/rf_general.h> - -#include <dev/raidframe/rf_parityloggingdags.h> - -/****************************************************************************** - * - * creates a DAG to perform a large-write operation: - * - * / Rod \ / Wnd \ - * H -- NIL- Rod - NIL - Wnd ------ NIL - T - * \ Rod / \ Xor - Lpo / - * - * The writes are not done until the reads complete because if they were done in - * parallel, a failure on one of the reads could leave the parity in an inconsistent - * state, so that the retry with a new DAG would produce erroneous parity. - * - * Note: this DAG has the nasty property that none of the buffers allocated for reading - * old data can be freed until the XOR node fires. Need to fix this. - * - * The last two arguments are the number of faults tolerated, and function for the - * redundancy calculation. The undo for the redundancy calc is assumed to be null - * - *****************************************************************************/ - -void -rf_CommonCreateParityLoggingLargeWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - int nfaults, - int (*redFunc) (RF_DagNode_t *)) -{ - RF_DagNode_t *nodes, *wndNodes, *rodNodes = NULL, *syncNode, *xorNode, - *lpoNode, *blockNode, *unblockNode, *termNode; - int nWndNodes, nRodNodes, i; - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_AccessStripeMapHeader_t *new_asm_h[2]; - int nodeNum, asmNum; - RF_ReconUnitNum_t which_ru; - char *sosBuffer, *eosBuffer; - RF_PhysDiskAddr_t *pda; - RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru); - - if (rf_dagDebug) - printf("[Creating parity-logging large-write DAG]\n"); - RF_ASSERT(nfaults == 1);/* this arch only single fault tolerant */ - dag_h->creator = "ParityLoggingLargeWriteDAG"; - - /* alloc the Wnd nodes, the xor node, and the Lpo node */ - nWndNodes = asmap->numStripeUnitsAccessed; - RF_CallocAndAdd(nodes, nWndNodes + 6, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - i = 0; - wndNodes = &nodes[i]; - i += nWndNodes; - xorNode = &nodes[i]; - i += 1; - lpoNode = &nodes[i]; - i += 1; - blockNode = &nodes[i]; - i += 1; - syncNode = &nodes[i]; - i += 1; - unblockNode = &nodes[i]; - i += 1; - termNode = &nodes[i]; - i += 1; - - dag_h->numCommitNodes = nWndNodes + 1; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h, new_asm_h, &nRodNodes, &sosBuffer, &eosBuffer, allocList); - if (nRodNodes > 0) - RF_CallocAndAdd(rodNodes, nRodNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - - /* begin node initialization */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nRodNodes + 1, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nWndNodes + 1, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(syncNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nWndNodes + 1, nRodNodes + 1, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - - /* initialize the Rod nodes */ - for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) { - if (new_asm_h[asmNum]) { - pda = new_asm_h[asmNum]->stripeMap->physInfo; - while (pda) { - rf_InitNode(&rodNodes[nodeNum], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rod", allocList); - rodNodes[nodeNum].params[0].p = pda; - rodNodes[nodeNum].params[1].p = pda->bufPtr; - rodNodes[nodeNum].params[2].v = parityStripeID; - rodNodes[nodeNum].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - nodeNum++; - pda = pda->next; - } - } - } - RF_ASSERT(nodeNum == nRodNodes); - - /* initialize the wnd nodes */ - pda = asmap->physInfo; - for (i = 0; i < nWndNodes; i++) { - rf_InitNode(&wndNodes[i], rf_wait, RF_TRUE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList); - RF_ASSERT(pda != NULL); - wndNodes[i].params[0].p = pda; - wndNodes[i].params[1].p = pda->bufPtr; - wndNodes[i].params[2].v = parityStripeID; - wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - pda = pda->next; - } - - /* initialize the redundancy node */ - rf_InitNode(xorNode, rf_wait, RF_TRUE, redFunc, rf_NullNodeUndoFunc, NULL, 1, 1, 2 * (nWndNodes + nRodNodes) + 1, 1, dag_h, "Xr ", allocList); - xorNode->flags |= RF_DAGNODE_FLAG_YIELD; - for (i = 0; i < nWndNodes; i++) { - xorNode->params[2 * i + 0] = wndNodes[i].params[0]; /* pda */ - xorNode->params[2 * i + 1] = wndNodes[i].params[1]; /* buf ptr */ - } - for (i = 0; i < nRodNodes; i++) { - xorNode->params[2 * (nWndNodes + i) + 0] = rodNodes[i].params[0]; /* pda */ - xorNode->params[2 * (nWndNodes + i) + 1] = rodNodes[i].params[1]; /* buf ptr */ - } - xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr; /* xor node needs to get - * at RAID information */ - - /* look for an Rod node that reads a complete SU. If none, alloc a - * buffer to receive the parity info. Note that we can't use a new - * data buffer because it will not have gotten written when the xor - * occurs. */ - for (i = 0; i < nRodNodes; i++) - if (((RF_PhysDiskAddr_t *) rodNodes[i].params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit) - break; - if (i == nRodNodes) { - RF_CallocAndAdd(xorNode->results[0], 1, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (void *), allocList); - } else { - xorNode->results[0] = rodNodes[i].params[1].p; - } - - /* initialize the Lpo node */ - rf_InitNode(lpoNode, rf_wait, RF_FALSE, rf_ParityLogOverwriteFunc, rf_ParityLogOverwriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Lpo", allocList); - - lpoNode->params[0].p = asmap->parityInfo; - lpoNode->params[1].p = xorNode->results[0]; - RF_ASSERT(asmap->parityInfo->next == NULL); /* parityInfo must - * describe entire - * parity unit */ - - /* connect nodes to form graph */ - - /* connect dag header to block node */ - RF_ASSERT(dag_h->numSuccedents == 1); - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - /* connect the block node to the Rod nodes */ - RF_ASSERT(blockNode->numSuccedents == nRodNodes + 1); - for (i = 0; i < nRodNodes; i++) { - RF_ASSERT(rodNodes[i].numAntecedents == 1); - blockNode->succedents[i] = &rodNodes[i]; - rodNodes[i].antecedents[0] = blockNode; - rodNodes[i].antType[0] = rf_control; - } - - /* connect the block node to the sync node */ - /* necessary if nRodNodes == 0 */ - RF_ASSERT(syncNode->numAntecedents == nRodNodes + 1); - blockNode->succedents[nRodNodes] = syncNode; - syncNode->antecedents[0] = blockNode; - syncNode->antType[0] = rf_control; - - /* connect the Rod nodes to the syncNode */ - for (i = 0; i < nRodNodes; i++) { - rodNodes[i].succedents[0] = syncNode; - syncNode->antecedents[1 + i] = &rodNodes[i]; - syncNode->antType[1 + i] = rf_control; - } - - /* connect the sync node to the xor node */ - RF_ASSERT(syncNode->numSuccedents == nWndNodes + 1); - RF_ASSERT(xorNode->numAntecedents == 1); - syncNode->succedents[0] = xorNode; - xorNode->antecedents[0] = syncNode; - xorNode->antType[0] = rf_trueData; /* carry forward from sync */ - - /* connect the sync node to the Wnd nodes */ - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNodes->numAntecedents == 1); - syncNode->succedents[1 + i] = &wndNodes[i]; - wndNodes[i].antecedents[0] = syncNode; - wndNodes[i].antType[0] = rf_control; - } - - /* connect the xor node to the Lpo node */ - RF_ASSERT(xorNode->numSuccedents == 1); - RF_ASSERT(lpoNode->numAntecedents == 1); - xorNode->succedents[0] = lpoNode; - lpoNode->antecedents[0] = xorNode; - lpoNode->antType[0] = rf_trueData; - - /* connect the Wnd nodes to the unblock node */ - RF_ASSERT(unblockNode->numAntecedents == nWndNodes + 1); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNodes->numSuccedents == 1); - wndNodes[i].succedents[0] = unblockNode; - unblockNode->antecedents[i] = &wndNodes[i]; - unblockNode->antType[i] = rf_control; - } - - /* connect the Lpo node to the unblock node */ - RF_ASSERT(lpoNode->numSuccedents == 1); - lpoNode->succedents[0] = unblockNode; - unblockNode->antecedents[nWndNodes] = lpoNode; - unblockNode->antType[nWndNodes] = rf_control; - - /* connect unblock node to terminator */ - RF_ASSERT(unblockNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - unblockNode->succedents[0] = termNode; - termNode->antecedents[0] = unblockNode; - termNode->antType[0] = rf_control; -} - - - - -/****************************************************************************** - * - * creates a DAG to perform a small-write operation (either raid 5 or pq), which is as follows: - * - * Header - * | - * Block - * / | ... \ \ - * / | \ \ - * Rod Rod Rod Rop - * | \ /| \ / | \/ | - * | | | /\ | - * Wnd Wnd Wnd X - * | \ / | - * | \ / | - * \ \ / Lpo - * \ \ / / - * +-> Unblock <-+ - * | - * T - * - * - * R = Read, W = Write, X = Xor, o = old, n = new, d = data, p = parity. - * When the access spans a stripe unit boundary and is less than one SU in size, there will - * be two Rop -- X -- Wnp branches. I call this the "double-XOR" case. - * The second output from each Rod node goes to the X node. In the double-XOR - * case, there are exactly 2 Rod nodes, and each sends one output to one X node. - * There is one Rod -- Wnd -- T branch for each stripe unit being updated. - * - * The block and unblock nodes are unused. See comment above CreateFaultFreeReadDAG. - * - * Note: this DAG ignores all the optimizations related to making the RMWs atomic. - * it also has the nasty property that none of the buffers allocated for reading - * old data & parity can be freed until the XOR node fires. Need to fix this. - * - * A null qfuncs indicates single fault tolerant - *****************************************************************************/ - -void -rf_CommonCreateParityLoggingSmallWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - RF_RedFuncs_t * pfuncs, - RF_RedFuncs_t * qfuncs) -{ - RF_DagNode_t *xorNodes, *blockNode, *unblockNode, *nodes; - RF_DagNode_t *readDataNodes, *readParityNodes; - RF_DagNode_t *writeDataNodes, *lpuNodes; - RF_DagNode_t *unlockDataNodes = NULL, *termNode; - RF_PhysDiskAddr_t *pda = asmap->physInfo; - int numDataNodes = asmap->numStripeUnitsAccessed; - int numParityNodes = (asmap->parityInfo->next) ? 2 : 1; - int i, j, nNodes, totalNumNodes; - RF_ReconUnitNum_t which_ru; - int (*func) (RF_DagNode_t * node), (*undoFunc) (RF_DagNode_t * node); - int (*qfunc) (RF_DagNode_t * node); - char *name, *qname; - RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru); - long nfaults = qfuncs ? 2 : 1; - int lu_flag = (rf_enableAtomicRMW) ? 1 : 0; /* lock/unlock flag */ - - if (rf_dagDebug) - printf("[Creating parity-logging small-write DAG]\n"); - RF_ASSERT(numDataNodes > 0); - RF_ASSERT(nfaults == 1); - dag_h->creator = "ParityLoggingSmallWriteDAG"; - - /* DAG creation occurs in three steps: 1. count the number of nodes in - * the DAG 2. create the nodes 3. initialize the nodes 4. connect the - * nodes */ - - /* Step 1. compute number of nodes in the graph */ - - /* number of nodes: a read and write for each data unit a redundancy - * computation node for each parity node a read and Lpu for each - * parity unit a block and unblock node (2) a terminator node if - * atomic RMW an unlock node for each data unit, redundancy unit */ - totalNumNodes = (2 * numDataNodes) + numParityNodes + (2 * numParityNodes) + 3; - if (lu_flag) - totalNumNodes += numDataNodes; - - nNodes = numDataNodes + numParityNodes; - - dag_h->numCommitNodes = numDataNodes + numParityNodes; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* Step 2. create the nodes */ - RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - i = 0; - blockNode = &nodes[i]; - i += 1; - unblockNode = &nodes[i]; - i += 1; - readDataNodes = &nodes[i]; - i += numDataNodes; - readParityNodes = &nodes[i]; - i += numParityNodes; - writeDataNodes = &nodes[i]; - i += numDataNodes; - lpuNodes = &nodes[i]; - i += numParityNodes; - xorNodes = &nodes[i]; - i += numParityNodes; - termNode = &nodes[i]; - i += 1; - if (lu_flag) { - unlockDataNodes = &nodes[i]; - i += numDataNodes; - } - RF_ASSERT(i == totalNumNodes); - - /* Step 3. initialize the nodes */ - /* initialize block node (Nil) */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", allocList); - - /* initialize unblock node (Nil) */ - rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nNodes, 0, 0, dag_h, "Nil", allocList); - - /* initialize terminatory node (Trm) */ - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - - /* initialize nodes which read old data (Rod) */ - for (i = 0; i < numDataNodes; i++) { - rf_InitNode(&readDataNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, nNodes, 1, 4, 0, dag_h, "Rod", allocList); - RF_ASSERT(pda != NULL); - readDataNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - readDataNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old - * data */ - readDataNodes[i].params[2].v = parityStripeID; - readDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru); - pda = pda->next; - readDataNodes[i].propList[0] = NULL; - readDataNodes[i].propList[1] = NULL; - } - - /* initialize nodes which read old parity (Rop) */ - pda = asmap->parityInfo; - i = 0; - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(pda != NULL); - rf_InitNode(&readParityNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, nNodes, 1, 4, 0, dag_h, "Rop", allocList); - readParityNodes[i].params[0].p = pda; - readParityNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old - * parity */ - readParityNodes[i].params[2].v = parityStripeID; - readParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - readParityNodes[i].propList[0] = NULL; - pda = pda->next; - } - - /* initialize nodes which write new data (Wnd) */ - pda = asmap->physInfo; - for (i = 0; i < numDataNodes; i++) { - RF_ASSERT(pda != NULL); - rf_InitNode(&writeDataNodes[i], rf_wait, RF_TRUE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, nNodes, 4, 0, dag_h, "Wnd", allocList); - writeDataNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - writeDataNodes[i].params[1].p = pda->bufPtr; /* buffer holding new - * data to be written */ - writeDataNodes[i].params[2].v = parityStripeID; - writeDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - - if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockDataNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Und", allocList); - unlockDataNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - unlockDataNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru); - } - pda = pda->next; - } - - - /* initialize nodes which compute new parity */ - /* we use the simple XOR func in the double-XOR case, and when we're - * accessing only a portion of one stripe unit. the distinction - * between the two is that the regular XOR func assumes that the - * targbuf is a full SU in size, and examines the pda associated with - * the buffer to decide where within the buffer to XOR the data, - * whereas the simple XOR func just XORs the data into the start of - * the buffer. */ - if ((numParityNodes == 2) || ((numDataNodes == 1) && (asmap->totalSectorsAccessed < raidPtr->Layout.sectorsPerStripeUnit))) { - func = pfuncs->simple; - undoFunc = rf_NullNodeUndoFunc; - name = pfuncs->SimpleName; - if (qfuncs) { - qfunc = qfuncs->simple; - qname = qfuncs->SimpleName; - } - } else { - func = pfuncs->regular; - undoFunc = rf_NullNodeUndoFunc; - name = pfuncs->RegularName; - if (qfuncs) { - qfunc = qfuncs->regular; - qname = qfuncs->RegularName; - } - } - /* initialize the xor nodes: params are {pda,buf} from {Rod,Wnd,Rop} - * nodes, and raidPtr */ - if (numParityNodes == 2) { /* double-xor case */ - for (i = 0; i < numParityNodes; i++) { - rf_InitNode(&xorNodes[i], rf_wait, RF_TRUE, func, undoFunc, NULL, 1, nNodes, 7, 1, dag_h, name, allocList); /* no wakeup func for - * xor */ - xorNodes[i].flags |= RF_DAGNODE_FLAG_YIELD; - xorNodes[i].params[0] = readDataNodes[i].params[0]; - xorNodes[i].params[1] = readDataNodes[i].params[1]; - xorNodes[i].params[2] = readParityNodes[i].params[0]; - xorNodes[i].params[3] = readParityNodes[i].params[1]; - xorNodes[i].params[4] = writeDataNodes[i].params[0]; - xorNodes[i].params[5] = writeDataNodes[i].params[1]; - xorNodes[i].params[6].p = raidPtr; - xorNodes[i].results[0] = readParityNodes[i].params[1].p; /* use old parity buf as - * target buf */ - } - } else { - /* there is only one xor node in this case */ - rf_InitNode(&xorNodes[0], rf_wait, RF_TRUE, func, undoFunc, NULL, 1, nNodes, (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, name, allocList); - xorNodes[0].flags |= RF_DAGNODE_FLAG_YIELD; - for (i = 0; i < numDataNodes + 1; i++) { - /* set up params related to Rod and Rop nodes */ - xorNodes[0].params[2 * i + 0] = readDataNodes[i].params[0]; /* pda */ - xorNodes[0].params[2 * i + 1] = readDataNodes[i].params[1]; /* buffer pointer */ - } - for (i = 0; i < numDataNodes; i++) { - /* set up params related to Wnd and Wnp nodes */ - xorNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = writeDataNodes[i].params[0]; /* pda */ - xorNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = writeDataNodes[i].params[1]; /* buffer pointer */ - } - xorNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr; /* xor node needs to get - * at RAID information */ - xorNodes[0].results[0] = readParityNodes[0].params[1].p; - } - - /* initialize the log node(s) */ - pda = asmap->parityInfo; - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(pda); - rf_InitNode(&lpuNodes[i], rf_wait, RF_FALSE, rf_ParityLogUpdateFunc, rf_ParityLogUpdateUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Lpu", allocList); - lpuNodes[i].params[0].p = pda; /* PhysDiskAddr of parity */ - lpuNodes[i].params[1].p = xorNodes[i].results[0]; /* buffer pointer to - * parity */ - pda = pda->next; - } - - - /* Step 4. connect the nodes */ - - /* connect header to block node */ - RF_ASSERT(dag_h->numSuccedents == 1); - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - /* connect block node to read old data nodes */ - RF_ASSERT(blockNode->numSuccedents == (numDataNodes + numParityNodes)); - for (i = 0; i < numDataNodes; i++) { - blockNode->succedents[i] = &readDataNodes[i]; - RF_ASSERT(readDataNodes[i].numAntecedents == 1); - readDataNodes[i].antecedents[0] = blockNode; - readDataNodes[i].antType[0] = rf_control; - } - - /* connect block node to read old parity nodes */ - for (i = 0; i < numParityNodes; i++) { - blockNode->succedents[numDataNodes + i] = &readParityNodes[i]; - RF_ASSERT(readParityNodes[i].numAntecedents == 1); - readParityNodes[i].antecedents[0] = blockNode; - readParityNodes[i].antType[0] = rf_control; - } - - /* connect read old data nodes to write new data nodes */ - for (i = 0; i < numDataNodes; i++) { - RF_ASSERT(readDataNodes[i].numSuccedents == numDataNodes + numParityNodes); - for (j = 0; j < numDataNodes; j++) { - RF_ASSERT(writeDataNodes[j].numAntecedents == numDataNodes + numParityNodes); - readDataNodes[i].succedents[j] = &writeDataNodes[j]; - writeDataNodes[j].antecedents[i] = &readDataNodes[i]; - if (i == j) - writeDataNodes[j].antType[i] = rf_antiData; - else - writeDataNodes[j].antType[i] = rf_control; - } - } - - /* connect read old data nodes to xor nodes */ - for (i = 0; i < numDataNodes; i++) - for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(xorNodes[j].numAntecedents == numDataNodes + numParityNodes); - readDataNodes[i].succedents[numDataNodes + j] = &xorNodes[j]; - xorNodes[j].antecedents[i] = &readDataNodes[i]; - xorNodes[j].antType[i] = rf_trueData; - } - - /* connect read old parity nodes to write new data nodes */ - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(readParityNodes[i].numSuccedents == numDataNodes + numParityNodes); - for (j = 0; j < numDataNodes; j++) { - readParityNodes[i].succedents[j] = &writeDataNodes[j]; - writeDataNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i]; - writeDataNodes[j].antType[numDataNodes + i] = rf_control; - } - } - - /* connect read old parity nodes to xor nodes */ - for (i = 0; i < numParityNodes; i++) - for (j = 0; j < numParityNodes; j++) { - readParityNodes[i].succedents[numDataNodes + j] = &xorNodes[j]; - xorNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i]; - xorNodes[j].antType[numDataNodes + i] = rf_trueData; - } - - /* connect xor nodes to write new parity nodes */ - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(xorNodes[i].numSuccedents == 1); - RF_ASSERT(lpuNodes[i].numAntecedents == 1); - xorNodes[i].succedents[0] = &lpuNodes[i]; - lpuNodes[i].antecedents[0] = &xorNodes[i]; - lpuNodes[i].antType[0] = rf_trueData; - } - - for (i = 0; i < numDataNodes; i++) { - if (lu_flag) { - /* connect write new data nodes to unlock nodes */ - RF_ASSERT(writeDataNodes[i].numSuccedents == 1); - RF_ASSERT(unlockDataNodes[i].numAntecedents == 1); - writeDataNodes[i].succedents[0] = &unlockDataNodes[i]; - unlockDataNodes[i].antecedents[0] = &writeDataNodes[i]; - unlockDataNodes[i].antType[0] = rf_control; - - /* connect unlock nodes to unblock node */ - RF_ASSERT(unlockDataNodes[i].numSuccedents == 1); - RF_ASSERT(unblockNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); - unlockDataNodes[i].succedents[0] = unblockNode; - unblockNode->antecedents[i] = &unlockDataNodes[i]; - unblockNode->antType[i] = rf_control; - } else { - /* connect write new data nodes to unblock node */ - RF_ASSERT(writeDataNodes[i].numSuccedents == 1); - RF_ASSERT(unblockNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); - writeDataNodes[i].succedents[0] = unblockNode; - unblockNode->antecedents[i] = &writeDataNodes[i]; - unblockNode->antType[i] = rf_control; - } - } - - /* connect write new parity nodes to unblock node */ - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(lpuNodes[i].numSuccedents == 1); - lpuNodes[i].succedents[0] = unblockNode; - unblockNode->antecedents[numDataNodes + i] = &lpuNodes[i]; - unblockNode->antType[numDataNodes + i] = rf_control; - } - - /* connect unblock node to terminator */ - RF_ASSERT(unblockNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - unblockNode->succedents[0] = termNode; - termNode->antecedents[0] = unblockNode; - termNode->antType[0] = rf_control; -} - - -void -rf_CreateParityLoggingSmallWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - RF_RedFuncs_t * pfuncs, - RF_RedFuncs_t * qfuncs) -{ - dag_h->creator = "ParityLoggingSmallWriteDAG"; - rf_CommonCreateParityLoggingSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_xorFuncs, NULL); -} - - -void -rf_CreateParityLoggingLargeWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - int nfaults, - int (*redFunc) (RF_DagNode_t *)) -{ - dag_h->creator = "ParityLoggingSmallWriteDAG"; - rf_CommonCreateParityLoggingLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularXorFunc); -} -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ diff --git a/sys/dev/raidframe/rf_parityloggingdags.h b/sys/dev/raidframe/rf_parityloggingdags.h deleted file mode 100644 index dc0fc9b..0000000 --- a/sys/dev/raidframe/rf_parityloggingdags.h +++ /dev/null @@ -1,59 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_parityloggingdags.h,v 1.3 1999/02/05 00:06:14 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/**************************************************************************** - * * - * rf_parityloggingdags.h -- header file for parity logging dags * - * * - ****************************************************************************/ - -#ifndef _RF__RF_PARITYLOGGINGDAGS_H_ -#define _RF__RF_PARITYLOGGINGDAGS_H_ - -/* routines that create DAGs */ -void -rf_CommonCreateParityLoggingLargeWriteDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, - void *bp, RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, - int nfaults, int (*redFunc) (RF_DagNode_t *)); - void rf_CommonCreateParityLoggingSmallWriteDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, - void *bp, RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, - RF_RedFuncs_t * pfuncs, RF_RedFuncs_t * qfuncs); - - void rf_CreateParityLoggingLargeWriteDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, - void *bp, RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, - int nfaults, int (*redFunc) (RF_DagNode_t *)); - void rf_CreateParityLoggingSmallWriteDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, - void *bp, RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, - RF_RedFuncs_t * pfuncs, RF_RedFuncs_t * qfuncs); - -#endif /* !_RF__RF_PARITYLOGGINGDAGS_H_ */ diff --git a/sys/dev/raidframe/rf_parityscan.c b/sys/dev/raidframe/rf_parityscan.c deleted file mode 100644 index 34834cb..0000000 --- a/sys/dev/raidframe/rf_parityscan.c +++ /dev/null @@ -1,445 +0,0 @@ -/* $NetBSD: rf_parityscan.c,v 1.9 2000/05/28 03:00:31 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************** - * - * rf_parityscan.c -- misc utilities related to parity verification - * - *****************************************************************************/ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_mcpair.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_engine.h> -#include <dev/raidframe/rf_parityscan.h> -#include <dev/raidframe/rf_map.h> -#include <dev/raidframe/rf_kintf.h> - -/***************************************************************************************** - * - * walk through the entire arry and write new parity. - * This works by creating two DAGs, one to read a stripe of data and one to - * write new parity. The first is executed, the data is xored together, and - * then the second is executed. To avoid constantly building and tearing down - * the DAGs, we create them a priori and fill them in with the mapping - * information as we go along. - * - * there should never be more than one thread running this. - * - ****************************************************************************************/ - -int -rf_RewriteParity(raidPtr) - RF_Raid_t *raidPtr; -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_AccessStripeMapHeader_t *asm_h; - int ret_val; - int rc; - RF_PhysDiskAddr_t pda; - RF_SectorNum_t i; - - if (raidPtr->Layout.map->faultsTolerated == 0) { - /* There isn't any parity. Call it "okay." */ - return (RF_PARITY_OKAY); - } - if (raidPtr->status[0] != rf_rs_optimal) { - /* - * We're in degraded mode. Don't try to verify parity now! - * XXX: this should be a "we don't want to", not a - * "we can't" error. - */ - return (RF_PARITY_COULD_NOT_VERIFY); - } - - ret_val = 0; - - pda.startSector = 0; - pda.numSector = raidPtr->Layout.sectorsPerStripeUnit; - rc = RF_PARITY_OKAY; - - for (i = 0; i < raidPtr->totalSectors && - rc <= RF_PARITY_CORRECTED; - i += layoutPtr->dataSectorsPerStripe) { - if (raidPtr->waitShutdown) { - /* Someone is pulling the plug on this set... - abort the re-write */ - return (1); - } - asm_h = rf_MapAccess(raidPtr, i, - layoutPtr->dataSectorsPerStripe, - NULL, RF_DONT_REMAP); - raidPtr->parity_rewrite_stripes_done = - i / layoutPtr->dataSectorsPerStripe ; - rc = rf_VerifyParity(raidPtr, asm_h->stripeMap, 1, 0); - - switch (rc) { - case RF_PARITY_OKAY: - case RF_PARITY_CORRECTED: - break; - case RF_PARITY_BAD: - printf("Parity bad during correction\n"); - ret_val = 1; - break; - case RF_PARITY_COULD_NOT_CORRECT: - printf("Could not correct bad parity\n"); - ret_val = 1; - break; - case RF_PARITY_COULD_NOT_VERIFY: - printf("Could not verify parity\n"); - ret_val = 1; - break; - default: - printf("Bad rc=%d from VerifyParity in RewriteParity\n", rc); - ret_val = 1; - } - rf_FreeAccessStripeMap(asm_h); - } - return (ret_val); -} -/***************************************************************************************** - * - * verify that the parity in a particular stripe is correct. - * we validate only the range of parity defined by parityPDA, since - * this is all we have locked. The way we do this is to create an asm - * that maps the whole stripe and then range-restrict it to the parity - * region defined by the parityPDA. - * - ****************************************************************************************/ -int -rf_VerifyParity(raidPtr, aasm, correct_it, flags) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *aasm; - int correct_it; - RF_RaidAccessFlags_t flags; -{ - RF_PhysDiskAddr_t *parityPDA; - RF_AccessStripeMap_t *doasm; - RF_LayoutSW_t *lp; - int lrc, rc; - - lp = raidPtr->Layout.map; - if (lp->faultsTolerated == 0) { - /* - * There isn't any parity. Call it "okay." - */ - return (RF_PARITY_OKAY); - } - rc = RF_PARITY_OKAY; - if (lp->VerifyParity) { - for (doasm = aasm; doasm; doasm = doasm->next) { - for (parityPDA = doasm->parityInfo; parityPDA; - parityPDA = parityPDA->next) { - lrc = lp->VerifyParity(raidPtr, - doasm->raidAddress, - parityPDA, - correct_it, flags); - if (lrc > rc) { - /* see rf_parityscan.h for why this - * works */ - rc = lrc; - } - } - } - } else { - rc = RF_PARITY_COULD_NOT_VERIFY; - } - return (rc); -} - -int -rf_VerifyParityBasic(raidPtr, raidAddr, parityPDA, correct_it, flags) - RF_Raid_t *raidPtr; - RF_RaidAddr_t raidAddr; - RF_PhysDiskAddr_t *parityPDA; - int correct_it; - RF_RaidAccessFlags_t flags; -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_RaidAddr_t startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, - raidAddr); - RF_SectorCount_t numsector = parityPDA->numSector; - int numbytes = rf_RaidAddressToByte(raidPtr, numsector); - int bytesPerStripe = numbytes * layoutPtr->numDataCol; - RF_DagHeader_t *rd_dag_h, *wr_dag_h; /* read, write dag */ - RF_DagNode_t *blockNode, *unblockNode, *wrBlock, *wrUnblock; - RF_AccessStripeMapHeader_t *asm_h; - RF_AccessStripeMap_t *asmap; - RF_AllocListElem_t *alloclist; - RF_PhysDiskAddr_t *pda; - char *pbuf, *buf, *end_p, *p; - int i, retcode; - RF_ReconUnitNum_t which_ru; - RF_StripeNum_t psID = rf_RaidAddressToParityStripeID(layoutPtr, - raidAddr, - &which_ru); - int stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; - RF_AccTraceEntry_t tracerec; - RF_MCPair_t *mcpair; - - retcode = RF_PARITY_OKAY; - - mcpair = rf_AllocMCPair(); - rf_MakeAllocList(alloclist); - RF_MallocAndAdd(buf, numbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol), (char *), alloclist); - RF_CallocAndAdd(pbuf, 1, numbytes, (char *), alloclist); /* use calloc to make - * sure buffer is zeroed */ - end_p = buf + bytesPerStripe; - - rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, numbytes, buf, rf_DiskReadFunc, rf_DiskReadUndoFunc, - "Rod", alloclist, flags, RF_IO_NORMAL_PRIORITY); - blockNode = rd_dag_h->succedents[0]; - unblockNode = blockNode->succedents[0]->succedents[0]; - - /* map the stripe and fill in the PDAs in the dag */ - asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, buf, RF_DONT_REMAP); - asmap = asm_h->stripeMap; - - for (pda = asmap->physInfo, i = 0; i < layoutPtr->numDataCol; i++, pda = pda->next) { - RF_ASSERT(pda); - rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); - RF_ASSERT(pda->numSector != 0); - if (rf_TryToRedirectPDA(raidPtr, pda, 0)) - goto out; /* no way to verify parity if disk is - * dead. return w/ good status */ - blockNode->succedents[i]->params[0].p = pda; - blockNode->succedents[i]->params[2].v = psID; - blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - - RF_ASSERT(!asmap->parityInfo->next); - rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->parityInfo, 0, 1); - RF_ASSERT(asmap->parityInfo->numSector != 0); - if (rf_TryToRedirectPDA(raidPtr, asmap->parityInfo, 1)) - goto out; - blockNode->succedents[layoutPtr->numDataCol]->params[0].p = asmap->parityInfo; - - /* fire off the DAG */ - bzero((char *) &tracerec, sizeof(tracerec)); - rd_dag_h->tracerec = &tracerec; - - if (rf_verifyParityDebug) { - printf("Parity verify read dag:\n"); - rf_PrintDAGList(rd_dag_h); - } - RF_LOCK_MUTEX(mcpair->mutex); - mcpair->flag = 0; - rf_DispatchDAG(rd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, - (void *) mcpair); - while (!mcpair->flag) - RF_WAIT_COND(mcpair->cond, mcpair->mutex); - RF_UNLOCK_MUTEX(mcpair->mutex); - if (rd_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to verify parity: can't read the stripe\n"); - retcode = RF_PARITY_COULD_NOT_VERIFY; - goto out; - } - for (p = buf; p < end_p; p += numbytes) { - rf_bxor(p, pbuf, numbytes, NULL); - } - for (i = 0; i < numbytes; i++) { -#if 0 - if (pbuf[i] != 0 || buf[bytesPerStripe + i] != 0) { - printf("Bytes: %d %d %d\n", i, pbuf[i], buf[bytesPerStripe + i]); - } -#endif - if (pbuf[i] != buf[bytesPerStripe + i]) { - if (!correct_it) - RF_ERRORMSG3("Parity verify error: byte %d of parity is 0x%x should be 0x%x\n", - i, (u_char) buf[bytesPerStripe + i], (u_char) pbuf[i]); - retcode = RF_PARITY_BAD; - break; - } - } - - if (retcode && correct_it) { - wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, pbuf, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - "Wnp", alloclist, flags, RF_IO_NORMAL_PRIORITY); - wrBlock = wr_dag_h->succedents[0]; - wrUnblock = wrBlock->succedents[0]->succedents[0]; - wrBlock->succedents[0]->params[0].p = asmap->parityInfo; - wrBlock->succedents[0]->params[2].v = psID; - wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - bzero((char *) &tracerec, sizeof(tracerec)); - wr_dag_h->tracerec = &tracerec; - if (rf_verifyParityDebug) { - printf("Parity verify write dag:\n"); - rf_PrintDAGList(wr_dag_h); - } - RF_LOCK_MUTEX(mcpair->mutex); - mcpair->flag = 0; - rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, - (void *) mcpair); - while (!mcpair->flag) - RF_WAIT_COND(mcpair->cond, mcpair->mutex); - RF_UNLOCK_MUTEX(mcpair->mutex); - if (wr_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to correct parity in VerifyParity: can't write the stripe\n"); - retcode = RF_PARITY_COULD_NOT_CORRECT; - } - rf_FreeDAG(wr_dag_h); - if (retcode == RF_PARITY_BAD) - retcode = RF_PARITY_CORRECTED; - } -out: - rf_FreeAccessStripeMap(asm_h); - rf_FreeAllocList(alloclist); - rf_FreeDAG(rd_dag_h); - rf_FreeMCPair(mcpair); - return (retcode); -} - -int -rf_TryToRedirectPDA(raidPtr, pda, parity) - RF_Raid_t *raidPtr; - RF_PhysDiskAddr_t *pda; - int parity; -{ - if (raidPtr->Disks[pda->row][pda->col].status == rf_ds_reconstructing) { - if (rf_CheckRUReconstructed(raidPtr->reconControl[pda->row]->reconMap, pda->startSector)) { - if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { - RF_RowCol_t or = pda->row, oc = pda->col; - RF_SectorNum_t os = pda->startSector; - if (parity) { - (raidPtr->Layout.map->MapParity) (raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP); - if (rf_verifyParityDebug) - printf("VerifyParity: Redir P r %d c %d sect %ld -> r %d c %d sect %ld\n", - or, oc, (long) os, pda->row, pda->col, (long) pda->startSector); - } else { - (raidPtr->Layout.map->MapSector) (raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP); - if (rf_verifyParityDebug) - printf("VerifyParity: Redir D r %d c %d sect %ld -> r %d c %d sect %ld\n", - or, oc, (long) os, pda->row, pda->col, (long) pda->startSector); - } - } else { - RF_RowCol_t spRow = raidPtr->Disks[pda->row][pda->col].spareRow; - RF_RowCol_t spCol = raidPtr->Disks[pda->row][pda->col].spareCol; - pda->row = spRow; - pda->col = spCol; - } - } - } - if (RF_DEAD_DISK(raidPtr->Disks[pda->row][pda->col].status)) - return (1); - return (0); -} -/***************************************************************************************** - * - * currently a stub. - * - * takes as input an ASM describing a write operation and containing one failure, and - * verifies that the parity was correctly updated to reflect the write. - * - * if it's a data unit that's failed, we read the other data units in the stripe and - * the parity unit, XOR them together, and verify that we get the data intended for - * the failed disk. Since it's easy, we also validate that the right data got written - * to the surviving data disks. - * - * If it's the parity that failed, there's really no validation we can do except the - * above verification that the right data got written to all disks. This is because - * the new data intended for the failed disk is supplied in the ASM, but this is of - * course not the case for the new parity. - * - ****************************************************************************************/ -int -rf_VerifyDegrModeWrite(raidPtr, asmh) - RF_Raid_t *raidPtr; - RF_AccessStripeMapHeader_t *asmh; -{ - return (0); -} -/* creates a simple DAG with a header, a block-recon node at level 1, - * nNodes nodes at level 2, an unblock-recon node at level 3, and - * a terminator node at level 4. The stripe address field in - * the block and unblock nodes are not touched, nor are the pda - * fields in the second-level nodes, so they must be filled in later. - * - * commit point is established at unblock node - this means that any - * failure during dag execution causes the dag to fail - */ -RF_DagHeader_t * -rf_MakeSimpleDAG(raidPtr, nNodes, bytesPerSU, databuf, doFunc, undoFunc, name, alloclist, flags, priority) - RF_Raid_t *raidPtr; - int nNodes; - int bytesPerSU; - char *databuf; - int (*doFunc) (RF_DagNode_t * node); - int (*undoFunc) (RF_DagNode_t * node); - char *name; /* node names at the second level */ - RF_AllocListElem_t *alloclist; - RF_RaidAccessFlags_t flags; - int priority; -{ - RF_DagHeader_t *dag_h; - RF_DagNode_t *nodes, *termNode, *blockNode, *unblockNode; - int i; - - /* create the nodes, the block & unblock nodes, and the terminator - * node */ - RF_CallocAndAdd(nodes, nNodes + 3, sizeof(RF_DagNode_t), (RF_DagNode_t *), alloclist); - blockNode = &nodes[nNodes]; - unblockNode = blockNode + 1; - termNode = unblockNode + 1; - - dag_h = rf_AllocDAGHeader(); - dag_h->raidPtr = (void *) raidPtr; - dag_h->allocList = NULL;/* we won't use this alloc list */ - dag_h->status = rf_enable; - dag_h->numSuccedents = 1; - dag_h->creator = "SimpleDAG"; - - /* this dag can not commit until the unblock node is reached errors - * prior to the commit point imply the dag has failed */ - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - - dag_h->succedents[0] = blockNode; - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", alloclist); - rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nNodes, 0, 0, dag_h, "Nil", alloclist); - unblockNode->succedents[0] = termNode; - for (i = 0; i < nNodes; i++) { - blockNode->succedents[i] = unblockNode->antecedents[i] = &nodes[i]; - unblockNode->antType[i] = rf_control; - rf_InitNode(&nodes[i], rf_wait, RF_FALSE, doFunc, undoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, alloclist); - nodes[i].succedents[0] = unblockNode; - nodes[i].antecedents[0] = blockNode; - nodes[i].antType[0] = rf_control; - nodes[i].params[1].p = (databuf + (i * bytesPerSU)); - } - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", alloclist); - termNode->antecedents[0] = unblockNode; - termNode->antType[0] = rf_control; - return (dag_h); -} diff --git a/sys/dev/raidframe/rf_parityscan.h b/sys/dev/raidframe/rf_parityscan.h deleted file mode 100644 index babca41..0000000 --- a/sys/dev/raidframe/rf_parityscan.h +++ /dev/null @@ -1,67 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_parityscan.h,v 1.3 1999/02/05 00:06:14 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_PARITYSCAN_H_ -#define _RF__RF_PARITYSCAN_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_alloclist.h> - -int rf_RewriteParity(RF_Raid_t * raidPtr); -int -rf_VerifyParityBasic(RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t * parityPDA, int correct_it, RF_RaidAccessFlags_t flags); -int -rf_VerifyParity(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * stripeMap, - int correct_it, RF_RaidAccessFlags_t flags); -int rf_TryToRedirectPDA(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * pda, int parity); -int rf_VerifyDegrModeWrite(RF_Raid_t * raidPtr, RF_AccessStripeMapHeader_t * asmh); -RF_DagHeader_t * -rf_MakeSimpleDAG(RF_Raid_t * raidPtr, int nNodes, - int bytesPerSU, char *databuf, - int (*doFunc) (RF_DagNode_t *), - int (*undoFunc) (RF_DagNode_t *), - char *name, RF_AllocListElem_t * alloclist, - RF_RaidAccessFlags_t flags, int priority); - -#define RF_DO_CORRECT_PARITY 1 -#define RF_DONT_CORRECT_PARITY 0 - -/* - * Return vals for VerifyParity operation - * - * Ordering is important here. - */ -#define RF_PARITY_OKAY 0 /* or no parity information */ -#define RF_PARITY_CORRECTED 1 -#define RF_PARITY_BAD 2 -#define RF_PARITY_COULD_NOT_CORRECT 3 -#define RF_PARITY_COULD_NOT_VERIFY 4 - -#endif /* !_RF__RF_PARITYSCAN_H_ */ diff --git a/sys/dev/raidframe/rf_pq.c b/sys/dev/raidframe/rf_pq.c deleted file mode 100644 index 98b53e8..0000000 --- a/sys/dev/raidframe/rf_pq.c +++ /dev/null @@ -1,928 +0,0 @@ -/* $NetBSD: rf_pq.c,v 1.7 2000/01/07 03:41:02 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Daniel Stodolsky - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * Code for RAID level 6 (P + Q) disk array architecture. - */ - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagffrd.h> -#include <dev/raidframe/rf_dagffwr.h> -#include <dev/raidframe/rf_dagdegrd.h> -#include <dev/raidframe/rf_dagdegwr.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_pqdeg.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_map.h> -#include <dev/raidframe/rf_pq.h> - -RF_RedFuncs_t rf_pFuncs = {rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P"}; -RF_RedFuncs_t rf_pRecoveryFuncs = {rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func"}; - -int -rf_RegularONPFunc(node) - RF_DagNode_t *node; -{ - return (rf_RegularXorFunc(node)); -} -/* - same as simpleONQ func, but the coefficient is always 1 -*/ - -int -rf_SimpleONPFunc(node) - RF_DagNode_t *node; -{ - return (rf_SimpleXorFunc(node)); -} - -int -rf_RecoveryPFunc(node) - RF_DagNode_t *node; -{ - return (rf_RecoveryXorFunc(node)); -} - -int -rf_RegularPFunc(node) - RF_DagNode_t *node; -{ - return (rf_RegularXorFunc(node)); -} -#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) - -static void -QDelta(char *dest, char *obuf, char *nbuf, unsigned length, - unsigned char coeff); -static void -rf_InvertQ(unsigned long *qbuf, unsigned long *abuf, - unsigned length, unsigned coeff); - -RF_RedFuncs_t rf_qFuncs = {rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q"}; -RF_RedFuncs_t rf_qRecoveryFuncs = {rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func"}; -RF_RedFuncs_t rf_pqRecoveryFuncs = {rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func"}; - -void -rf_PQDagSelect( - RF_Raid_t * raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t * asmap, - RF_VoidFuncPtr * createFunc) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - unsigned ndfail = asmap->numDataFailed; - unsigned npfail = asmap->numParityFailed; - unsigned ntfail = npfail + ndfail; - - RF_ASSERT(RF_IO_IS_R_OR_W(type)); - if (ntfail > 2) { - RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n"); - /* *infoFunc = */ *createFunc = NULL; - return; - } - /* ok, we can do this I/O */ - if (type == RF_IO_TYPE_READ) { - switch (ndfail) { - case 0: - /* fault free read */ - *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; /* same as raid 5 */ - break; - case 1: - /* lost a single data unit */ - /* two cases: (1) parity is not lost. do a normal raid - * 5 reconstruct read. (2) parity is lost. do a - * reconstruct read using "q". */ - if (ntfail == 2) { /* also lost redundancy */ - if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) - *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateReadDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateReadDAG; - } else { - /* P and Q are ok. But is there a failure in - * some unaccessed data unit? */ - if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2) - *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateReadDAG; - } - break; - case 2: - /* lost two data units */ - /* *infoFunc = PQOneTwo; */ - *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG; - break; - } - return; - } - /* a write */ - switch (ntfail) { - case 0: /* fault free */ - if (rf_suppressLocksAndLargeWrites || - (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) || - (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) { - - *createFunc = (RF_VoidFuncPtr) rf_PQCreateSmallWriteDAG; - } else { - *createFunc = (RF_VoidFuncPtr) rf_PQCreateLargeWriteDAG; - } - break; - - case 1: /* single disk fault */ - if (npfail == 1) { - RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q)); - if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) { /* q died, treat like - * normal mode raid5 - * write. */ - if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) - || rf_NumFailedDataUnitsInStripe(raidPtr, asmap)) - *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateSmallWriteDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateLargeWriteDAG; - } else {/* parity died, small write only updating Q */ - if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) - || rf_NumFailedDataUnitsInStripe(raidPtr, asmap)) - *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateSmallWriteDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateLargeWriteDAG; - } - } else { /* data missing. Do a P reconstruct write if - * only a single data unit is lost in the - * stripe, otherwise a PQ reconstruct write. */ - if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2) - *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateWriteDAG; - } - break; - - case 2: /* two disk faults */ - switch (npfail) { - case 2: /* both p and q dead */ - *createFunc = (RF_VoidFuncPtr) rf_PQ_011_CreateWriteDAG; - break; - case 1: /* either p or q and dead data */ - RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA); - RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)); - if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q) - *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateWriteDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateWriteDAG; - break; - case 0: /* double data loss */ - *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG; - break; - } - break; - - default: /* more than 2 disk faults */ - *createFunc = NULL; - RF_PANIC(); - } - return; -} -/* - Used as a stop gap info function -*/ -#if 0 -static void -PQOne(raidPtr, nSucc, nAnte, asmap) - RF_Raid_t *raidPtr; - int *nSucc; - int *nAnte; - RF_AccessStripeMap_t *asmap; -{ - *nSucc = *nAnte = 1; -} - -static void -PQOneTwo(raidPtr, nSucc, nAnte, asmap) - RF_Raid_t *raidPtr; - int *nSucc; - int *nAnte; - RF_AccessStripeMap_t *asmap; -{ - *nSucc = 1; - *nAnte = 2; -} -#endif - -RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG) -{ - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2, - rf_RegularPQFunc, RF_FALSE); -} - -int -rf_RegularONQFunc(node) - RF_DagNode_t *node; -{ - int np = node->numParams; - int d; - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; - int i; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - char *qbuf, *qpbuf; - char *obuf, *nbuf; - RF_PhysDiskAddr_t *old, *new; - unsigned long coeff; - unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; - - RF_ETIMER_START(timer); - - d = (np - 3) / 4; - RF_ASSERT(4 * d + 3 == np); - qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */ - for (i = 0; i < d; i++) { - old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; - obuf = (char *) node->params[2 * i + 1].p; - new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p; - nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p; - RF_ASSERT(new->numSector == old->numSector); - RF_ASSERT(new->raidAddress == old->raidAddress); - /* the stripe unit within the stripe tells us the coefficient - * to use for the multiply. */ - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress); - /* compute the data unit offset within the column, then add - * one */ - coeff = (coeff % raidPtr->Layout.numDataCol); - qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU); - QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); - } - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->q_us += RF_ETIMER_VAL_US(timer); - rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no - * I/O in this node */ - return (0); -} -/* - See the SimpleXORFunc for the difference between a simple and regular func. - These Q functions should be used for - - new q = Q(data,old data,old q) - - style updates and not for - - q = ( new data, new data, .... ) - - computations. - - The simple q takes 2(2d+1)+1 params, where d is the number - of stripes written. The order of params is - old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... old data pda_d, old data buffer_d - [2d] old q pda_0, old q buffer - [2d_2] new data pda_0, new data buffer_0, ... new data pda_d, new data buffer_d - raidPtr -*/ - -int -rf_SimpleONQFunc(node) - RF_DagNode_t *node; -{ - int np = node->numParams; - int d; - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; - int i; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - char *qbuf; - char *obuf, *nbuf; - RF_PhysDiskAddr_t *old, *new; - unsigned long coeff; - - RF_ETIMER_START(timer); - - d = (np - 3) / 4; - RF_ASSERT(4 * d + 3 == np); - qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */ - for (i = 0; i < d; i++) { - old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; - obuf = (char *) node->params[2 * i + 1].p; - new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p; - nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p; - RF_ASSERT(new->numSector == old->numSector); - RF_ASSERT(new->raidAddress == old->raidAddress); - /* the stripe unit within the stripe tells us the coefficient - * to use for the multiply. */ - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress); - /* compute the data unit offset within the column, then add - * one */ - coeff = (coeff % raidPtr->Layout.numDataCol); - QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); - } - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->q_us += RF_ETIMER_VAL_US(timer); - rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no - * I/O in this node */ - return (0); -} -RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG) -{ - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs); -} - -static void RegularQSubr(RF_DagNode_t *node, char *qbuf); - -static void -RegularQSubr(node, qbuf) - RF_DagNode_t *node; - char *qbuf; -{ - int np = node->numParams; - int d; - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; - unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; - int i; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - char *obuf, *qpbuf; - RF_PhysDiskAddr_t *old; - unsigned long coeff; - - RF_ETIMER_START(timer); - - d = (np - 1) / 2; - RF_ASSERT(2 * d + 1 == np); - for (i = 0; i < d; i++) { - old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; - obuf = (char *) node->params[2 * i + 1].p; - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress); - /* compute the data unit offset within the column, then add - * one */ - coeff = (coeff % raidPtr->Layout.numDataCol); - /* the input buffers may not all be aligned with the start of - * the stripe. so shift by their sector offset within the - * stripe unit */ - qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU); - rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); - } - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->q_us += RF_ETIMER_VAL_US(timer); -} -/* - used in degraded writes. -*/ - -static void DegrQSubr(RF_DagNode_t *node); - -static void -DegrQSubr(node) - RF_DagNode_t *node; -{ - int np = node->numParams; - int d; - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; - unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; - int i; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - char *qbuf = node->results[1]; - char *obuf, *qpbuf; - RF_PhysDiskAddr_t *old; - unsigned long coeff; - unsigned fail_start; - int j; - - old = (RF_PhysDiskAddr_t *) node->params[np - 2].p; - fail_start = old->startSector % secPerSU; - - RF_ETIMER_START(timer); - - d = (np - 2) / 2; - RF_ASSERT(2 * d + 2 == np); - for (i = 0; i < d; i++) { - old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; - obuf = (char *) node->params[2 * i + 1].p; - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress); - /* compute the data unit offset within the column, then add - * one */ - coeff = (coeff % raidPtr->Layout.numDataCol); - /* the input buffers may not all be aligned with the start of - * the stripe. so shift by their sector offset within the - * stripe unit */ - j = old->startSector % secPerSU; - RF_ASSERT(j >= fail_start); - qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start); - rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); - } - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->q_us += RF_ETIMER_VAL_US(timer); -} -/* - Called by large write code to compute the new parity and the new q. - - structure of the params: - - pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d ( d = numDataCol - raidPtr - - for a total of 2d+1 arguments. - The result buffers results[0], results[1] are the buffers for the p and q, - respectively. - - We compute Q first, then compute P. The P calculation may try to reuse - one of the input buffers for its output, so if we computed P first, we would - corrupt the input for the q calculation. -*/ - -int -rf_RegularPQFunc(node) - RF_DagNode_t *node; -{ - RegularQSubr(node, node->results[1]); - return (rf_RegularXorFunc(node)); /* does the wakeup */ -} - -int -rf_RegularQFunc(node) - RF_DagNode_t *node; -{ - /* Almost ... adjust Qsubr args */ - RegularQSubr(node, node->results[0]); - rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no - * I/O in this node */ - return (0); -} -/* - Called by singly degraded write code to compute the new parity and the new q. - - structure of the params: - - pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d - failedPDA raidPtr - - for a total of 2d+2 arguments. - The result buffers results[0], results[1] are the buffers for the parity and q, - respectively. - - We compute Q first, then compute parity. The parity calculation may try to reuse - one of the input buffers for its output, so if we computed parity first, we would - corrupt the input for the q calculation. - - We treat this identically to the regularPQ case, ignoring the failedPDA extra argument. -*/ - -void -rf_Degraded_100_PQFunc(node) - RF_DagNode_t *node; -{ - int np = node->numParams; - - RF_ASSERT(np >= 2); - DegrQSubr(node); - rf_RecoveryXorFunc(node); -} - - -/* - The two below are used when reading a stripe with a single lost data unit. - The parameters are - - pda_0, buffer_0, .... pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr - - and results[0] contains the data buffer. Which is originally zero-filled. - -*/ - -/* this Q func is used by the degraded-mode dag functions to recover lost data. - * the second-to-last parameter is the PDA for the failed portion of the access. - * the code here looks at this PDA and assumes that the xor target buffer is - * equal in size to the number of sectors in the failed PDA. It then uses - * the other PDAs in the parameter list to determine where within the target - * buffer the corresponding data should be xored. - * - * Recall the basic equation is - * - * Q = ( data_1 + 2 * data_2 ... + k * data_k ) mod 256 - * - * so to recover data_j we need - * - * J data_j = (Q - data_1 - 2 data_2 ....- k* data_k) mod 256 - * - * So the coefficient for each buffer is (255 - data_col), and j should be initialized by - * copying Q into it. Then we need to do a table lookup to convert to solve - * data_j /= J - * - * - */ -int -rf_RecoveryQFunc(node) - RF_DagNode_t *node; -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; - RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; - int i; - RF_PhysDiskAddr_t *pda; - RF_RaidAddr_t suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); - char *srcbuf, *destbuf; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - unsigned long coeff; - - RF_ETIMER_START(timer); - /* start by copying Q into the buffer */ - bcopy(node->params[node->numParams - 3].p, node->results[0], - rf_RaidAddressToByte(raidPtr, failedPDA->numSector)); - for (i = 0; i < node->numParams - 4; i += 2) { - RF_ASSERT(node->params[i + 1].p != node->results[0]); - pda = (RF_PhysDiskAddr_t *) node->params[i].p; - srcbuf = (char *) node->params[i + 1].p; - suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); - destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress); - /* compute the data unit offset within the column */ - coeff = (coeff % raidPtr->Layout.numDataCol); - rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff); - } - /* Do the nasty inversion now */ - coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), failedPDA->startSector) % raidPtr->Layout.numDataCol); - rf_InvertQ(node->results[0], node->results[0], rf_RaidAddressToByte(raidPtr, pda->numSector), coeff); - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->q_us += RF_ETIMER_VAL_US(timer); - rf_GenericWakeupFunc(node, 0); - return (0); -} - -int -rf_RecoveryPQFunc(node) - RF_DagNode_t *node; -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; - printf("raid%d: Recovery from PQ not implemented.\n",raidPtr->raidid); - return (1); -} -/* - Degraded write Q subroutine. - Used when P is dead. - Large-write style Q computation. - Parameters - - (pda,buf),(pda,buf),.....,(failedPDA,bufPtr),failedPDA,raidPtr. - - We ignore failedPDA. - - This is a "simple style" recovery func. -*/ - -void -rf_PQ_DegradedWriteQFunc(node) - RF_DagNode_t *node; -{ - int np = node->numParams; - int d; - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; - unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; - int i; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - char *qbuf = node->results[0]; - char *obuf, *qpbuf; - RF_PhysDiskAddr_t *old; - unsigned long coeff; - int fail_start, j; - - old = (RF_PhysDiskAddr_t *) node->params[np - 2].p; - fail_start = old->startSector % secPerSU; - - RF_ETIMER_START(timer); - - d = (np - 2) / 2; - RF_ASSERT(2 * d + 2 == np); - - for (i = 0; i < d; i++) { - old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; - obuf = (char *) node->params[2 * i + 1].p; - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress); - /* compute the data unit offset within the column, then add - * one */ - coeff = (coeff % raidPtr->Layout.numDataCol); - j = old->startSector % secPerSU; - RF_ASSERT(j >= fail_start); - qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start); - rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); - } - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->q_us += RF_ETIMER_VAL_US(timer); - rf_GenericWakeupFunc(node, 0); -} - - - - -/* Q computations */ - -/* - coeff - colummn; - - compute dest ^= qfor[28-coeff][rn[coeff+1] a] - - on 5-bit basis; - length in bytes; -*/ - -void -rf_IncQ(dest, buf, length, coeff) - unsigned long *dest; - unsigned long *buf; - unsigned length; - unsigned coeff; -{ - unsigned long a, d, new; - unsigned long a1, a2; - unsigned int *q = &(rf_qfor[28 - coeff][0]); - unsigned r = rf_rn[coeff + 1]; - -#define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f) -#define INSERT(a,i) (a << (5L*i)) - - length /= 8; - /* 13 5 bit quants in a 64 bit word */ - while (length) { - a = *buf++; - d = *dest; - a1 = EXTRACT(a, 0) ^ r; - a2 = EXTRACT(a, 1) ^ r; - new = INSERT(a2, 1) | a1; - a1 = EXTRACT(a, 2) ^ r; - a2 = EXTRACT(a, 3) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1, 2) | INSERT(a2, 3); - a1 = EXTRACT(a, 4) ^ r; - a2 = EXTRACT(a, 5) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1, 4) | INSERT(a2, 5); - a1 = EXTRACT(a, 5) ^ r; - a2 = EXTRACT(a, 6) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1, 5) | INSERT(a2, 6); -#if RF_LONGSHIFT > 2 - a1 = EXTRACT(a, 7) ^ r; - a2 = EXTRACT(a, 8) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1, 7) | INSERT(a2, 8); - a1 = EXTRACT(a, 9) ^ r; - a2 = EXTRACT(a, 10) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1, 9) | INSERT(a2, 10); - a1 = EXTRACT(a, 11) ^ r; - a2 = EXTRACT(a, 12) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1, 11) | INSERT(a2, 12); -#endif /* RF_LONGSHIFT > 2 */ - d ^= new; - *dest++ = d; - length--; - } -} -/* - compute - - dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new) ] - - on a five bit basis. - optimization: compute old ^ new on 64 bit basis. - - length in bytes. -*/ - -static void -QDelta( - char *dest, - char *obuf, - char *nbuf, - unsigned length, - unsigned char coeff) -{ - unsigned long a, d, new; - unsigned long a1, a2; - unsigned int *q = &(rf_qfor[28 - coeff][0]); - unsigned int r = rf_rn[coeff + 1]; - - r = a1 = a2 = new = d = a = 0; /* XXX for now... */ - q = NULL; /* XXX for now */ - -#ifdef _KERNEL - /* PQ in kernel currently not supported because the encoding/decoding - * table is not present */ - bzero(dest, length); -#else /* KERNEL */ - /* this code probably doesn't work and should be rewritten -wvcii */ - /* 13 5 bit quants in a 64 bit word */ - length /= 8; - while (length) { - a = *obuf++; /* XXX need to reorg to avoid cache conflicts */ - a ^= *nbuf++; - d = *dest; - a1 = EXTRACT(a, 0) ^ r; - a2 = EXTRACT(a, 1) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = INSERT(a2, 1) | a1; - a1 = EXTRACT(a, 2) ^ r; - a2 = EXTRACT(a, 3) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1, 2) | INSERT(a2, 3); - a1 = EXTRACT(a, 4) ^ r; - a2 = EXTRACT(a, 5) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1, 4) | INSERT(a2, 5); - a1 = EXTRACT(a, 5) ^ r; - a2 = EXTRACT(a, 6) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1, 5) | INSERT(a2, 6); -#if RF_LONGSHIFT > 2 - a1 = EXTRACT(a, 7) ^ r; - a2 = EXTRACT(a, 8) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1, 7) | INSERT(a2, 8); - a1 = EXTRACT(a, 9) ^ r; - a2 = EXTRACT(a, 10) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1, 9) | INSERT(a2, 10); - a1 = EXTRACT(a, 11) ^ r; - a2 = EXTRACT(a, 12) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1, 11) | INSERT(a2, 12); -#endif /* RF_LONGSHIFT > 2 */ - d ^= new; - *dest++ = d; - length--; - } -#endif /* _KERNEL */ -} -/* - recover columns a and b from the given p and q into - bufs abuf and bbuf. All bufs are word aligned. - Length is in bytes. -*/ - - -/* - * XXX - * - * Everything about this seems wrong. - */ -void -rf_PQ_recover(pbuf, qbuf, abuf, bbuf, length, coeff_a, coeff_b) - unsigned long *pbuf; - unsigned long *qbuf; - unsigned long *abuf; - unsigned long *bbuf; - unsigned length; - unsigned coeff_a; - unsigned coeff_b; -{ - unsigned long p, q, a, a0, a1; - int col = (29 * coeff_a) + coeff_b; - unsigned char *q0 = &(rf_qinv[col][0]); - - length /= 8; - while (length) { - p = *pbuf++; - q = *qbuf++; - a0 = EXTRACT(p, 0); - a1 = EXTRACT(q, 0); - a = q0[a0 << 5 | a1]; -#define MF(i) \ - a0 = EXTRACT(p,i); \ - a1 = EXTRACT(q,i); \ - a = a | INSERT(q0[a0<<5 | a1],i) - - MF(1); - MF(2); - MF(3); - MF(4); - MF(5); - MF(6); -#if 0 - MF(7); - MF(8); - MF(9); - MF(10); - MF(11); - MF(12); -#endif /* 0 */ - *abuf++ = a; - *bbuf++ = a ^ p; - length--; - } -} -/* - Lost parity and a data column. Recover that data column. - Assume col coeff is lost. Let q the contents of Q after - all surviving data columns have been q-xored out of it. - Then we have the equation - - q[28-coeff][a_i ^ r_i+1] = q - - but q is cyclic with period 31. - So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] = - q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} . - - so a_i = r_{coeff+1} ^ q[3+coeff][q] - - The routine is passed q buffer and the buffer - the data is to be recoverd into. They can be the same. -*/ - - - -static void -rf_InvertQ( - unsigned long *qbuf, - unsigned long *abuf, - unsigned length, - unsigned coeff) -{ - unsigned long a, new; - unsigned long a1, a2; - unsigned int *q = &(rf_qfor[3 + coeff][0]); - unsigned r = rf_rn[coeff + 1]; - - /* 13 5 bit quants in a 64 bit word */ - length /= 8; - while (length) { - a = *qbuf++; - a1 = EXTRACT(a, 0); - a2 = EXTRACT(a, 1); - a1 = r ^ q[a1]; - a2 = r ^ q[a2]; - new = INSERT(a2, 1) | a1; -#define M(i,j) \ - a1 = EXTRACT(a,i); \ - a2 = EXTRACT(a,j); \ - a1 = r ^ q[a1]; \ - a2 = r ^ q[a2]; \ - new = new | INSERT(a1,i) | INSERT(a2,j) - - M(2, 3); - M(4, 5); - M(5, 6); -#if RF_LONGSHIFT > 2 - M(7, 8); - M(9, 10); - M(11, 12); -#endif /* RF_LONGSHIFT > 2 */ - *abuf++ = new; - length--; - } -} -#endif /* (RF_INCLUDE_DECL_PQ > 0) || - * (RF_INCLUDE_RAID6 > 0) */ diff --git a/sys/dev/raidframe/rf_pq.h b/sys/dev/raidframe/rf_pq.h deleted file mode 100644 index 9a2ce23..0000000 --- a/sys/dev/raidframe/rf_pq.h +++ /dev/null @@ -1,75 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_pq.h,v 1.3 1999/02/05 00:06:15 oster Exp $ */ -/* - * rf_pq.h - */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Daniel Stodolsky - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_PQ_H_ -#define _RF__RF_PQ_H_ - -#include <dev/raidframe/rf_archs.h> - -extern RF_RedFuncs_t rf_pFuncs; -extern RF_RedFuncs_t rf_pRecoveryFuncs; - -int rf_RegularONPFunc(RF_DagNode_t * node); -int rf_SimpleONPFunc(RF_DagNode_t * node); -int rf_RecoveryPFunc(RF_DagNode_t * node); -int rf_RegularPFunc(RF_DagNode_t * node); - -#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) - -extern RF_RedFuncs_t rf_qFuncs; -extern RF_RedFuncs_t rf_qRecoveryFuncs; -extern RF_RedFuncs_t rf_pqRecoveryFuncs; - -void -rf_PQDagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); -RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG); -int rf_RegularONQFunc(RF_DagNode_t * node); -int rf_SimpleONQFunc(RF_DagNode_t * node); -RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG); -int rf_RegularPQFunc(RF_DagNode_t * node); -int rf_RegularQFunc(RF_DagNode_t * node); -void rf_Degraded_100_PQFunc(RF_DagNode_t * node); -int rf_RecoveryQFunc(RF_DagNode_t * node); -int rf_RecoveryPQFunc(RF_DagNode_t * node); -void rf_PQ_DegradedWriteQFunc(RF_DagNode_t * node); -void -rf_IncQ(unsigned long *dest, unsigned long *buf, unsigned length, - unsigned coeff); -void -rf_PQ_recover(unsigned long *pbuf, unsigned long *qbuf, unsigned long *abuf, - unsigned long *bbuf, unsigned length, unsigned coeff_a, unsigned coeff_b); - -#endif /* (RF_INCLUDE_DECL_PQ > 0) || - * (RF_INCLUDE_RAID6 > 0) */ - -#endif /* !_RF__RF_PQ_H_ */ diff --git a/sys/dev/raidframe/rf_pqdeg.c b/sys/dev/raidframe/rf_pqdeg.c deleted file mode 100644 index 0d3356c..0000000 --- a/sys/dev/raidframe/rf_pqdeg.c +++ /dev/null @@ -1,219 +0,0 @@ -/* $NetBSD: rf_pqdeg.c,v 1.5 2000/01/07 03:41:04 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Daniel Stodolsky - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#include <dev/raidframe/rf_archs.h> - -#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_dagffrd.h> -#include <dev/raidframe/rf_dagffwr.h> -#include <dev/raidframe/rf_dagdegrd.h> -#include <dev/raidframe/rf_dagdegwr.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_pqdeg.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_pqdegdags.h> -#include <dev/raidframe/rf_pq.h> - -/* - Degraded mode dag functions for P+Q calculations. - - The following nomenclature is used. - - PQ_<D><P><Q>_Create{Large,Small}<Write|Read>DAG - - where <D><P><Q> are single digits representing the number of failed - data units <D> (0,1,2), parity units <P> (0,1), and Q units <Q>, effecting - the I/O. The reads have only PQ_<D><P><Q>_CreateReadDAG variants, while - the single fault writes have both large and small write versions. (Single fault - PQ is equivalent to normal mode raid 5 in many aspects. - - Some versions degenerate into the same case, and are grouped together below. -*/ - -/* Reads, single failure - - we have parity, so we can do a raid 5 - reconstruct read. -*/ - -RF_CREATE_DAG_FUNC_DECL(rf_PQ_100_CreateReadDAG) -{ - rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pRecoveryFuncs); -} -/* Reads double failure */ - -/* - Q is lost, but not parity - so we can a raid 5 reconstruct read. -*/ - -RF_CREATE_DAG_FUNC_DECL(rf_PQ_101_CreateReadDAG) -{ - rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pRecoveryFuncs); -} -/* - parity is lost, so we need to - do a reconstruct read and recompute - the data with Q. -*/ - -RF_CREATE_DAG_FUNC_DECL(rf_PQ_110_CreateReadDAG) -{ - RF_PhysDiskAddr_t *temp; - /* swap P and Q pointers to fake out the DegradedReadDAG code */ - temp = asmap->parityInfo; - asmap->parityInfo = asmap->qInfo; - asmap->qInfo = temp; - rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_qRecoveryFuncs); -} -/* - Two data units are dead in this stripe, so we will need read - both P and Q to reconstruct the data. Note that only - one data unit we are reading may actually be missing. -*/ -RF_CREATE_DAG_FUNC_DECL(rf_CreateDoubleDegradedReadDAG); -RF_CREATE_DAG_FUNC_DECL(rf_CreateDoubleDegradedReadDAG) -{ - rf_PQ_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList); -} -RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateReadDAG); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateReadDAG) -{ - rf_CreateDoubleDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList); -} -/* Writes, single failure */ - -RF_CREATE_DAG_FUNC_DECL(rf_PQ_100_CreateWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_100_CreateWriteDAG) -{ - if (asmap->numStripeUnitsAccessed != 1 && - asmap->failedPDAs[0]->numSector != - raidPtr->Layout.sectorsPerStripeUnit) - RF_PANIC(); - rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, - flags, allocList, 2, - (int (*) (RF_DagNode_t *)) rf_Degraded_100_PQFunc, - RF_FALSE); -} -/* Dead P - act like a RAID 5 small write with parity = Q */ -RF_CREATE_DAG_FUNC_DECL(rf_PQ_010_CreateSmallWriteDAG) -{ - RF_PhysDiskAddr_t *temp; - /* swap P and Q pointers to fake out the DegradedReadDAG code */ - temp = asmap->parityInfo; - asmap->parityInfo = asmap->qInfo; - asmap->qInfo = temp; - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, - allocList, &rf_qFuncs, NULL); -} -/* Dead Q - act like a RAID 5 small write */ -RF_CREATE_DAG_FUNC_DECL(rf_PQ_001_CreateSmallWriteDAG) -{ - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, - allocList, &rf_pFuncs, NULL); -} -/* Dead P - act like a RAID 5 large write but for Q */ -RF_CREATE_DAG_FUNC_DECL(rf_PQ_010_CreateLargeWriteDAG) -{ - RF_PhysDiskAddr_t *temp; - /* swap P and Q pointers to fake out the code */ - temp = asmap->parityInfo; - asmap->parityInfo = asmap->qInfo; - asmap->qInfo = temp; - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, - allocList, 1, rf_RegularQFunc, RF_FALSE); -} -/* Dead Q - act like a RAID 5 large write */ -RF_CREATE_DAG_FUNC_DECL(rf_PQ_001_CreateLargeWriteDAG) -{ - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, - allocList, 1, rf_RegularPFunc, RF_FALSE); -} - - -/* - * writes, double failure - */ - -/* - * Lost P & Q - do a nonredundant write - */ -RF_CREATE_DAG_FUNC_DECL(rf_PQ_011_CreateWriteDAG) -{ - rf_CreateNonRedundantWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - RF_IO_TYPE_WRITE); -} -/* - In the two cases below, - A nasty case arises when the write a (strict) portion of a failed stripe unit - and parts of another su. For now, we do not support this. -*/ - -/* - Lost Data and P - do a Q write. -*/ -RF_CREATE_DAG_FUNC_DECL(rf_PQ_110_CreateWriteDAG) -{ - RF_PhysDiskAddr_t *temp; - - if (asmap->numStripeUnitsAccessed != 1 && - asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) { - RF_PANIC(); - } - /* swap P and Q to fake out parity code */ - temp = asmap->parityInfo; - asmap->parityInfo = asmap->qInfo; - asmap->qInfo = temp; - rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, - allocList, 1, - (int (*) (RF_DagNode_t *)) rf_PQ_DegradedWriteQFunc, - RF_FALSE); - /* is the regular Q func the right one to call? */ -} -/* - Lost Data and Q - do degraded mode P write -*/ -RF_CREATE_DAG_FUNC_DECL(rf_PQ_101_CreateWriteDAG) -{ - if (asmap->numStripeUnitsAccessed != 1 && - asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) - RF_PANIC(); - rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, - allocList, 1, rf_RecoveryXorFunc, RF_FALSE); -} -#endif /* (RF_INCLUDE_DECL_PQ > 0) || - * (RF_INCLUDE_RAID6 > 0) */ diff --git a/sys/dev/raidframe/rf_pqdeg.h b/sys/dev/raidframe/rf_pqdeg.h deleted file mode 100644 index 83371e6..0000000 --- a/sys/dev/raidframe/rf_pqdeg.h +++ /dev/null @@ -1,75 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_pqdeg.h,v 1.3 1999/02/05 00:06:15 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Daniel Stodolsky - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_PQDEG_H_ -#define _RF__RF_PQDEG_H_ - -#include <dev/raidframe/rf_types.h> - -#if RF_UTILITY == 0 -#include <dev/raidframe/rf_dag.h> - -/* extern decl's of the failure mode PQ functions. - * See pddeg.c for nomenclature discussion. - */ - -/* reads, single failure */ -RF_CREATE_DAG_FUNC_DECL(rf_PQ_100_CreateReadDAG); -/* reads, two failure */ -RF_CREATE_DAG_FUNC_DECL(rf_PQ_110_CreateReadDAG); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_101_CreateReadDAG); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateReadDAG); - -/* writes, single failure */ -RF_CREATE_DAG_FUNC_DECL(rf_PQ_100_CreateWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_010_CreateSmallWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_010_CreateLargeWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_001_CreateSmallWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_001_CreateLargeWriteDAG); - -/* writes, double failure */ -RF_CREATE_DAG_FUNC_DECL(rf_PQ_011_CreateWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_110_CreateWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_101_CreateWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateWriteDAG); -#endif /* RF_UTILITY == 0 */ - -typedef RF_uint32 RF_ua32_t[32]; -typedef RF_uint8 RF_ua1024_t[1024]; - -extern RF_ua32_t rf_rn; -extern RF_ua32_t rf_qfor[32]; -#ifndef _KERNEL /* we don't support PQ in the kernel yet, so - * don't link in this monster table */ -extern RF_ua1024_t rf_qinv[29 * 29]; -#else /* !_KERNEL */ -extern RF_ua1024_t rf_qinv[1]; -#endif /* !_KERNEL */ - -#endif /* !_RF__RF_PQDEG_H_ */ diff --git a/sys/dev/raidframe/rf_pqdegdags.c b/sys/dev/raidframe/rf_pqdegdags.c deleted file mode 100644 index 3606005..0000000 --- a/sys/dev/raidframe/rf_pqdegdags.c +++ /dev/null @@ -1,432 +0,0 @@ -/* $NetBSD: rf_pqdegdags.c,v 1.5 1999/08/15 02:36:40 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Daniel Stodolsky - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * rf_pqdegdags.c - * Degraded mode dags for double fault cases. -*/ - - -#include <dev/raidframe/rf_archs.h> - -#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagdegrd.h> -#include <dev/raidframe/rf_dagdegwr.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_acctrace.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_pqdegdags.h> -#include <dev/raidframe/rf_pq.h> - -static void -applyPDA(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * pda, RF_PhysDiskAddr_t * ppda, - RF_PhysDiskAddr_t * qpda, void *bp); - -/* - Two data drives have failed, and we are doing a read that covers one of them. - We may also be reading some of the surviving drives. - - - ***************************************************************************************** - * - * creates a DAG to perform a degraded-mode read of data within one stripe. - * This DAG is as follows: - * - * Hdr - * | - * Block - * / / \ \ \ \ - * Rud ... Rud Rrd ... Rrd Rp Rq - * | \ | \ | \ | \ | \ | \ - * - * | | - * Unblock X - * \ / - * ------ T ------ - * - * Each R node is a successor of the L node - * One successor arc from each R node goes to U, and the other to X - * There is one Rud for each chunk of surviving user data requested by the user, - * and one Rrd for each chunk of surviving user data _not_ being read by the user - * R = read, ud = user data, rd = recovery (surviving) data, p = P data, q = Qdata - * X = pq recovery node, T = terminate - * - * The block & unblock nodes are leftovers from a previous version. They - * do nothing, but I haven't deleted them because it would be a tremendous - * effort to put them back in. - * - * Note: The target buffer for the XOR node is set to the actual user buffer where the - * failed data is supposed to end up. This buffer is zero'd by the code here. Thus, - * if you create a degraded read dag, use it, and then re-use, you have to be sure to - * zero the target buffer prior to the re-use. - * - * Every buffer read is passed to the pq recovery node, whose job it is to sort out whats - * needs and what's not. - ****************************************************************************************/ -/* init a disk node with 2 successors and one predecessor */ -#define INIT_DISK_NODE(node,name) \ -rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 2,1,4,0, dag_h, name, allocList); \ -(node)->succedents[0] = unblockNode; \ -(node)->succedents[1] = recoveryNode; \ -(node)->antecedents[0] = blockNode; \ -(node)->antType[0] = rf_control - -#define DISK_NODE_PARAMS(_node_,_p_) \ - (_node_).params[0].p = _p_ ; \ - (_node_).params[1].p = (_p_)->bufPtr; \ - (_node_).params[2].v = parityStripeID; \ - (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru) - -#define DISK_NODE_PDA(node) ((node)->params[0].p) - -RF_CREATE_DAG_FUNC_DECL(rf_PQ_DoubleDegRead) -{ - rf_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList, - "Rq", "PQ Recovery", rf_PQDoubleRecoveryFunc); -} - -static void -applyPDA(raidPtr, pda, ppda, qpda, bp) - RF_Raid_t *raidPtr; - RF_PhysDiskAddr_t *pda; - RF_PhysDiskAddr_t *ppda; - RF_PhysDiskAddr_t *qpda; - void *bp; -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_RaidAddr_t s0off = rf_StripeUnitOffset(layoutPtr, ppda->startSector); - RF_SectorCount_t s0len = ppda->numSector, len; - RF_SectorNum_t suoffset; - unsigned coeff; - char *pbuf = ppda->bufPtr; - char *qbuf = qpda->bufPtr; - char *buf; - int delta; - - suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); - len = pda->numSector; - /* see if pda intersects a recovery pda */ - if ((suoffset < s0off + s0len) && (suoffset + len > s0off)) { - buf = pda->bufPtr; - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress); - coeff = (coeff % raidPtr->Layout.numDataCol); - - if (suoffset < s0off) { - delta = s0off - suoffset; - buf += rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), delta); - suoffset = s0off; - len -= delta; - } - if (suoffset > s0off) { - delta = suoffset - s0off; - pbuf += rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), delta); - qbuf += rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), delta); - } - if ((suoffset + len) > (s0len + s0off)) - len = s0len + s0off - suoffset; - - /* src, dest, len */ - rf_bxor(buf, pbuf, rf_RaidAddressToByte(raidPtr, len), bp); - - /* dest, src, len, coeff */ - rf_IncQ((unsigned long *) qbuf, (unsigned long *) buf, rf_RaidAddressToByte(raidPtr, len), coeff); - } -} -/* - Recover data in the case of a double failure. There can be two - result buffers, one for each chunk of data trying to be recovered. - The params are pda's that have not been range restricted or otherwise - politely massaged - this should be done here. The last params are the - pdas of P and Q, followed by the raidPtr. The list can look like - - pda, pda, ... , p pda, q pda, raidptr, asm - - or - - pda, pda, ... , p_1 pda, p_2 pda, q_1 pda, q_2 pda, raidptr, asm - - depending on wether two chunks of recovery data were required. - - The second condition only arises if there are two failed buffers - whose lengths do not add up a stripe unit. -*/ - - -int -rf_PQDoubleRecoveryFunc(node) - RF_DagNode_t *node; -{ - int np = node->numParams; - RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p; - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); - int d, i; - unsigned coeff; - RF_RaidAddr_t sosAddr, suoffset; - RF_SectorCount_t len, secPerSU = layoutPtr->sectorsPerStripeUnit; - int two = 0; - RF_PhysDiskAddr_t *ppda, *ppda2, *qpda, *qpda2, *pda, npda; - char *buf; - int numDataCol = layoutPtr->numDataCol; - RF_Etimer_t timer; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - - RF_ETIMER_START(timer); - - if (asmap->failedPDAs[1] && - (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) { - RF_ASSERT(0); - ppda = node->params[np - 6].p; - ppda2 = node->params[np - 5].p; - qpda = node->params[np - 4].p; - qpda2 = node->params[np - 3].p; - d = (np - 6); - two = 1; - } else { - ppda = node->params[np - 4].p; - qpda = node->params[np - 3].p; - d = (np - 4); - } - - for (i = 0; i < d; i++) { - pda = node->params[i].p; - buf = pda->bufPtr; - suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); - len = pda->numSector; - coeff = rf_RaidAddressToStripeUnitID(layoutPtr, pda->raidAddress); - /* compute the data unit offset within the column */ - coeff = (coeff % raidPtr->Layout.numDataCol); - /* see if pda intersects a recovery pda */ - applyPDA(raidPtr, pda, ppda, qpda, node->dagHdr->bp); - if (two) - applyPDA(raidPtr, pda, ppda, qpda, node->dagHdr->bp); - } - - /* ok, we got the parity back to the point where we can recover. We - * now need to determine the coeff of the columns that need to be - * recovered. We can also only need to recover a single stripe unit. */ - - if (asmap->failedPDAs[1] == NULL) { /* only a single stripe unit - * to recover. */ - pda = asmap->failedPDAs[0]; - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - /* need to determine the column of the other failed disk */ - coeff = rf_RaidAddressToStripeUnitID(layoutPtr, pda->raidAddress); - /* compute the data unit offset within the column */ - coeff = (coeff % raidPtr->Layout.numDataCol); - for (i = 0; i < numDataCol; i++) { - npda.raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0); - /* skip over dead disks */ - if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status)) - if (i != coeff) - break; - } - RF_ASSERT(i < numDataCol); - RF_ASSERT(two == 0); - /* recover the data. Since we need only want to recover one - * column, we overwrite the parity with the other one. */ - if (coeff < i) /* recovering 'a' */ - rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) pda->bufPtr, (unsigned long *) ppda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff, i); - else /* recovering 'b' */ - rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, (unsigned long *) pda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), i, coeff); - } else - RF_PANIC(); - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - if (tracerec) - tracerec->q_us += RF_ETIMER_VAL_US(timer); - rf_GenericWakeupFunc(node, 0); - return (0); -} - -int -rf_PQWriteDoubleRecoveryFunc(node) - RF_DagNode_t *node; -{ - /* The situation: - * - * We are doing a write that hits only one failed data unit. The other - * failed data unit is not being overwritten, so we need to generate - * it. - * - * For the moment, we assume all the nonfailed data being written is in - * the shadow of the failed data unit. (i.e,, either a single data - * unit write or the entire failed stripe unit is being overwritten. ) - * - * Recovery strategy: apply the recovery data to the parity and q. Use P - * & Q to recover the second failed data unit in P. Zero fill Q, then - * apply the recovered data to p. Then apply the data being written to - * the failed drive. Then walk through the surviving drives, applying - * new data when it exists, othewise the recovery data. Quite a mess. - * - * - * The params - * - * read pda0, read pda1, ... read pda (numDataCol-3), write pda0, ... , - * write pda (numStripeUnitAccess - numDataFailed), failed pda, - * raidPtr, asmap */ - - int np = node->numParams; - RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p; - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); - int i; - RF_RaidAddr_t sosAddr; - unsigned coeff; - RF_StripeCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; - RF_PhysDiskAddr_t *ppda, *qpda, *pda, npda; - int numDataCol = layoutPtr->numDataCol; - RF_Etimer_t timer; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - - RF_ASSERT(node->numResults == 2); - RF_ASSERT(asmap->failedPDAs[1] == NULL); - RF_ETIMER_START(timer); - ppda = node->results[0]; - qpda = node->results[1]; - /* apply the recovery data */ - for (i = 0; i < numDataCol - 2; i++) - applyPDA(raidPtr, node->params[i].p, ppda, qpda, node->dagHdr->bp); - - /* determine the other failed data unit */ - pda = asmap->failedPDAs[0]; - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - /* need to determine the column of the other failed disk */ - coeff = rf_RaidAddressToStripeUnitID(layoutPtr, pda->raidAddress); - /* compute the data unit offset within the column */ - coeff = (coeff % raidPtr->Layout.numDataCol); - for (i = 0; i < numDataCol; i++) { - npda.raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0); - /* skip over dead disks */ - if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status)) - if (i != coeff) - break; - } - RF_ASSERT(i < numDataCol); - /* recover the data. The column we want to recover we write over the - * parity. The column we don't care about we dump in q. */ - if (coeff < i) /* recovering 'a' */ - rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff, i); - else /* recovering 'b' */ - rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), i, coeff); - - /* OK. The valid data is in P. Zero fill Q, then inc it into it. */ - bzero(qpda->bufPtr, rf_RaidAddressToByte(raidPtr, qpda->numSector)); - rf_IncQ((unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, rf_RaidAddressToByte(raidPtr, qpda->numSector), i); - - /* now apply all the write data to the buffer */ - /* single stripe unit write case: the failed data is only thing we are - * writing. */ - RF_ASSERT(asmap->numStripeUnitsAccessed == 1); - /* dest, src, len, coeff */ - rf_IncQ((unsigned long *) qpda->bufPtr, (unsigned long *) asmap->failedPDAs[0]->bufPtr, rf_RaidAddressToByte(raidPtr, qpda->numSector), coeff); - rf_bxor(asmap->failedPDAs[0]->bufPtr, ppda->bufPtr, rf_RaidAddressToByte(raidPtr, ppda->numSector), node->dagHdr->bp); - - /* now apply all the recovery data */ - for (i = 0; i < numDataCol - 2; i++) - applyPDA(raidPtr, node->params[i].p, ppda, qpda, node->dagHdr->bp); - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - if (tracerec) - tracerec->q_us += RF_ETIMER_VAL_US(timer); - - rf_GenericWakeupFunc(node, 0); - return (0); -} -RF_CREATE_DAG_FUNC_DECL(rf_PQ_DDLargeWrite) -{ - RF_PANIC(); -} -/* - Two lost data unit write case. - - There are really two cases here: - - (1) The write completely covers the two lost data units. - In that case, a reconstruct write that doesn't write the - failed data units will do the correct thing. So in this case, - the dag looks like - - full stripe read of surviving data units (not being overwriten) - write new data (ignoring failed units) compute P&Q - write P&Q - - - (2) The write does not completely cover both failed data units - (but touches at least one of them). Then we need to do the - equivalent of a reconstruct read to recover the missing data - unit from the other stripe. - - For any data we are writing that is not in the "shadow" - of the failed units, we need to do a four cycle update. - PANIC on this case. for now - -*/ - -RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateWriteDAG) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_SectorCount_t sectorsPerSU = layoutPtr->sectorsPerStripeUnit; - int sum; - int nf = asmap->numDataFailed; - - sum = asmap->failedPDAs[0]->numSector; - if (nf == 2) - sum += asmap->failedPDAs[1]->numSector; - - if ((nf == 2) && (sum == (2 * sectorsPerSU))) { - /* large write case */ - rf_PQ_DDLargeWrite(raidPtr, asmap, dag_h, bp, flags, allocList); - return; - } - if ((nf == asmap->numStripeUnitsAccessed) || (sum >= sectorsPerSU)) { - /* small write case, no user data not in shadow */ - rf_PQ_DDSimpleSmallWrite(raidPtr, asmap, dag_h, bp, flags, allocList); - return; - } - RF_PANIC(); -} -RF_CREATE_DAG_FUNC_DECL(rf_PQ_DDSimpleSmallWrite) -{ - rf_DoubleDegSmallWrite(raidPtr, asmap, dag_h, bp, flags, allocList, "Rq", "Wq", "PQ Recovery", rf_PQWriteDoubleRecoveryFunc); -} -#endif /* (RF_INCLUDE_DECL_PQ > 0) || - * (RF_INCLUDE_RAID6 > 0) */ diff --git a/sys/dev/raidframe/rf_pqdegdags.h b/sys/dev/raidframe/rf_pqdegdags.h deleted file mode 100644 index 11ce820..0000000 --- a/sys/dev/raidframe/rf_pqdegdags.h +++ /dev/null @@ -1,49 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_pqdegdags.h,v 1.3 1999/02/05 00:06:15 oster Exp $ */ -/* - * rf_pqdegdags.h - */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Daniel Stodolsky - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ -/* - * rf_pqdegdags.c - * Degraded mode dags for double fault cases. - */ - -#ifndef _RF__RF_PQDEGDAGS_H_ -#define _RF__RF_PQDEGDAGS_H_ - -#include <dev/raidframe/rf_dag.h> - -RF_CREATE_DAG_FUNC_DECL(rf_PQ_DoubleDegRead); -int rf_PQDoubleRecoveryFunc(RF_DagNode_t * node); -int rf_PQWriteDoubleRecoveryFunc(RF_DagNode_t * node); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_DDLargeWrite); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_DDSimpleSmallWrite); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateWriteDAG); - -#endif /* !_RF__RF_PQDEGDAGS_H_ */ diff --git a/sys/dev/raidframe/rf_psstatus.c b/sys/dev/raidframe/rf_psstatus.c deleted file mode 100644 index a6968cf..0000000 --- a/sys/dev/raidframe/rf_psstatus.c +++ /dev/null @@ -1,378 +0,0 @@ -/* $NetBSD: rf_psstatus.c,v 1.5 2000/01/08 22:57:31 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************** - * - * psstatus.c - * - * The reconstruction code maintains a bunch of status related to the parity - * stripes that are currently under reconstruction. This header file defines - * the status structures. - * - *****************************************************************************/ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_debugprint.h> -#include <dev/raidframe/rf_freelist.h> -#include <dev/raidframe/rf_psstatus.h> -#include <dev/raidframe/rf_shutdown.h> - -#define Dprintf1(s,a) if (rf_pssDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf2(s,a,b) if (rf_pssDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf3(s,a,b,c) if (rf_pssDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL) - -static void -RealPrintPSStatusTable(RF_Raid_t * raidPtr, - RF_PSStatusHeader_t * pssTable); - -#define RF_MAX_FREE_PSS 32 -#define RF_PSS_INC 8 -#define RF_PSS_INITIAL 4 - -static int init_pss(RF_ReconParityStripeStatus_t *, RF_Raid_t *); -static void clean_pss(RF_ReconParityStripeStatus_t *, RF_Raid_t *); -static void rf_ShutdownPSStatus(void *); - -static int -init_pss(p, raidPtr) - RF_ReconParityStripeStatus_t *p; - RF_Raid_t *raidPtr; -{ - RF_Calloc(p->issued, raidPtr->numCol, sizeof(char), (char *)); - if (p->issued == NULL) - return (ENOMEM); - return (0); -} - -static void -clean_pss(p, raidPtr) - RF_ReconParityStripeStatus_t *p; - RF_Raid_t *raidPtr; -{ - RF_Free(p->issued, raidPtr->numCol * sizeof(char)); -} - -static void -rf_ShutdownPSStatus(arg) - void *arg; -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) arg; - - RF_FREELIST_DESTROY_CLEAN_ARG(raidPtr->pss_freelist, next, (RF_ReconParityStripeStatus_t *), clean_pss, raidPtr); -} - -int -rf_ConfigurePSStatus( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - int rc; - - raidPtr->pssTableSize = RF_PSS_DEFAULT_TABLESIZE; - RF_FREELIST_CREATE(raidPtr->pss_freelist, RF_MAX_FREE_PSS, - RF_PSS_INC, sizeof(RF_ReconParityStripeStatus_t)); - if (raidPtr->pss_freelist == NULL) - return (ENOMEM); - rc = rf_ShutdownCreate(listp, rf_ShutdownPSStatus, raidPtr); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - rf_ShutdownPSStatus(raidPtr); - return (rc); - } - RF_FREELIST_PRIME_INIT_ARG(raidPtr->pss_freelist, RF_PSS_INITIAL, next, - (RF_ReconParityStripeStatus_t *), init_pss, raidPtr); - return (0); -} -/***************************************************************************************** - * sets up the pss table - * We pre-allocate a bunch of entries to avoid as much as possible having to - * malloc up hash chain entries. - ****************************************************************************************/ -RF_PSStatusHeader_t * -rf_MakeParityStripeStatusTable(raidPtr) - RF_Raid_t *raidPtr; -{ - RF_PSStatusHeader_t *pssTable; - int i, j, rc; - - RF_Calloc(pssTable, raidPtr->pssTableSize, sizeof(RF_PSStatusHeader_t), (RF_PSStatusHeader_t *)); - for (i = 0; i < raidPtr->pssTableSize; i++) { - rc = rf_mutex_init(&pssTable[i].mutex, __FUNCTION__); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - /* fail and deallocate */ - for (j = 0; j < i; j++) { - rf_mutex_destroy(&pssTable[i].mutex); - } - RF_Free(pssTable, raidPtr->pssTableSize * sizeof(RF_PSStatusHeader_t)); - return (NULL); - } - } - return (pssTable); -} - -void -rf_FreeParityStripeStatusTable(raidPtr, pssTable) - RF_Raid_t *raidPtr; - RF_PSStatusHeader_t *pssTable; -{ - int i; - - if (rf_pssDebug) - RealPrintPSStatusTable(raidPtr, pssTable); - for (i = 0; i < raidPtr->pssTableSize; i++) { - if (pssTable[i].chain) { - printf("ERROR: pss hash chain not null at recon shutdown\n"); - } - rf_mutex_destroy(&pssTable[i].mutex); - } - RF_Free(pssTable, raidPtr->pssTableSize * sizeof(RF_PSStatusHeader_t)); -} - - -/* looks up the status structure for a parity stripe. - * if the create_flag is on, creates and returns the status structure it it doesn't exist - * otherwise returns NULL if the status structure does not exist - * - * ASSUMES THE PSS DESCRIPTOR IS LOCKED UPON ENTRY - */ -RF_ReconParityStripeStatus_t * -rf_LookupRUStatus( - RF_Raid_t * raidPtr, - RF_PSStatusHeader_t * pssTable, - RF_StripeNum_t psID, - RF_ReconUnitNum_t which_ru, - RF_PSSFlags_t flags, /* whether or not to create it if it doesn't - * exist + what flags to set initially */ - int *created) -{ - RF_PSStatusHeader_t *hdr = &pssTable[RF_HASH_PSID(raidPtr, psID)]; - RF_ReconParityStripeStatus_t *p, *pssPtr = hdr->chain; - - *created = 0; - for (p = pssPtr; p; p = p->next) { - if (p->parityStripeID == psID && p->which_ru == which_ru) - break; - } - - if (!p && (flags & RF_PSS_CREATE)) { - Dprintf2("PSS: creating pss for psid %ld ru %d\n", psID, which_ru); - p = rf_AllocPSStatus(raidPtr); - p->next = hdr->chain; - hdr->chain = p; - - p->parityStripeID = psID; - p->which_ru = which_ru; - p->flags = flags; - p->rbuf = NULL; - p->writeRbuf = NULL; - p->blockCount = 0; - p->procWaitList = NULL; - p->blockWaitList = NULL; - p->bufWaitList = NULL; - *created = 1; - } else - if (p) { /* we didn't create, but we want to specify - * some new status */ - p->flags |= flags; /* add in whatever flags we're - * specifying */ - } - if (p && (flags & RF_PSS_RECON_BLOCKED)) { - p->blockCount++;/* if we're asking to block recon, bump the - * count */ - Dprintf3("raid%d: Blocked recon on psid %ld. count now %d\n", - raidPtr->raidid, psID, p->blockCount); - } - return (p); -} -/* deletes an entry from the parity stripe status table. typically used - * when an entry has been allocated solely to block reconstruction, and - * no recon was requested while recon was blocked. Assumes the hash - * chain is ALREADY LOCKED. - */ -void -rf_PSStatusDelete(raidPtr, pssTable, pssPtr) - RF_Raid_t *raidPtr; - RF_PSStatusHeader_t *pssTable; - RF_ReconParityStripeStatus_t *pssPtr; -{ - RF_PSStatusHeader_t *hdr = &(pssTable[RF_HASH_PSID(raidPtr, pssPtr->parityStripeID)]); - RF_ReconParityStripeStatus_t *p = hdr->chain, *pt = NULL; - - while (p) { - if (p == pssPtr) { - if (pt) - pt->next = p->next; - else - hdr->chain = p->next; - p->next = NULL; - rf_FreePSStatus(raidPtr, p); - return; - } - pt = p; - p = p->next; - } - RF_ASSERT(0); /* we must find it here */ -} -/* deletes an entry from the ps status table after reconstruction has completed */ -void -rf_RemoveFromActiveReconTable(raidPtr, row, psid, which_ru) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_ReconUnitNum_t which_ru; - RF_StripeNum_t psid; -{ - RF_PSStatusHeader_t *hdr = &(raidPtr->reconControl[row]->pssTable[RF_HASH_PSID(raidPtr, psid)]); - RF_ReconParityStripeStatus_t *p, *pt; - RF_CallbackDesc_t *cb, *cb1; - - RF_LOCK_MUTEX(hdr->mutex); - for (pt = NULL, p = hdr->chain; p; pt = p, p = p->next) { - if ((p->parityStripeID == psid) && (p->which_ru == which_ru)) - break; - } - if (p == NULL) { - rf_PrintPSStatusTable(raidPtr, row); - } - RF_ASSERT(p); /* it must be there */ - - Dprintf2("PSS: deleting pss for psid %ld ru %d\n", psid, which_ru); - - /* delete this entry from the hash chain */ - if (pt) - pt->next = p->next; - else - hdr->chain = p->next; - p->next = NULL; - - RF_UNLOCK_MUTEX(hdr->mutex); - - /* wakup anyone waiting on the parity stripe ID */ - cb = p->procWaitList; - p->procWaitList = NULL; - while (cb) { - Dprintf1("Waking up access waiting on parity stripe ID %ld\n", p->parityStripeID); - cb1 = cb->next; - (cb->callbackFunc) (cb->callbackArg); - - /* THIS IS WHAT THE ORIGINAL CODE HAD... the extra 0 is bogus, - * IMHO */ - /* (cb->callbackFunc)(cb->callbackArg, 0); */ - rf_FreeCallbackDesc(cb); - cb = cb1; - } - - rf_FreePSStatus(raidPtr, p); -} - -RF_ReconParityStripeStatus_t * -rf_AllocPSStatus(raidPtr) - RF_Raid_t *raidPtr; -{ - RF_ReconParityStripeStatus_t *p; - - RF_FREELIST_GET_INIT_ARG(raidPtr->pss_freelist, p, next, (RF_ReconParityStripeStatus_t *), init_pss, raidPtr); - if (p) { - bzero(p->issued, raidPtr->numCol); - } - p->next = NULL; - /* no need to initialize here b/c the only place we're called from is - * the above Lookup */ - return (p); -} - -void -rf_FreePSStatus(raidPtr, p) - RF_Raid_t *raidPtr; - RF_ReconParityStripeStatus_t *p; -{ - RF_ASSERT(p->procWaitList == NULL); - RF_ASSERT(p->blockWaitList == NULL); - RF_ASSERT(p->bufWaitList == NULL); - - RF_FREELIST_FREE_CLEAN_ARG(raidPtr->pss_freelist, p, next, clean_pss, raidPtr); -} - -static void -RealPrintPSStatusTable(raidPtr, pssTable) - RF_Raid_t *raidPtr; - RF_PSStatusHeader_t *pssTable; -{ - int i, j, procsWaiting, blocksWaiting, bufsWaiting; - RF_ReconParityStripeStatus_t *p; - RF_CallbackDesc_t *cb; - - printf("\nParity Stripe Status Table\n"); - for (i = 0; i < raidPtr->pssTableSize; i++) { - for (p = pssTable[i].chain; p; p = p->next) { - procsWaiting = blocksWaiting = bufsWaiting = 0; - for (cb = p->procWaitList; cb; cb = cb->next) - procsWaiting++; - for (cb = p->blockWaitList; cb; cb = cb->next) - blocksWaiting++; - for (cb = p->bufWaitList; cb; cb = cb->next) - bufsWaiting++; - printf("PSID %ld RU %d : blockCount %d %d/%d/%d proc/block/buf waiting, issued ", - (long) p->parityStripeID, p->which_ru, p->blockCount, procsWaiting, blocksWaiting, bufsWaiting); - for (j = 0; j < raidPtr->numCol; j++) - printf("%c", (p->issued[j]) ? '1' : '0'); - if (!p->flags) - printf(" flags: (none)"); - else { - if (p->flags & RF_PSS_UNDER_RECON) - printf(" under-recon"); - if (p->flags & RF_PSS_FORCED_ON_WRITE) - printf(" forced-w"); - if (p->flags & RF_PSS_FORCED_ON_READ) - printf(" forced-r"); - if (p->flags & RF_PSS_RECON_BLOCKED) - printf(" blocked"); - if (p->flags & RF_PSS_BUFFERWAIT) - printf(" bufwait"); - } - printf("\n"); - } - } -} - -void -rf_PrintPSStatusTable(raidPtr, row) - RF_Raid_t *raidPtr; - RF_RowCol_t row; -{ - RF_PSStatusHeader_t *pssTable = raidPtr->reconControl[row]->pssTable; - RealPrintPSStatusTable(raidPtr, pssTable); -} diff --git a/sys/dev/raidframe/rf_psstatus.h b/sys/dev/raidframe/rf_psstatus.h deleted file mode 100644 index c836d49..0000000 --- a/sys/dev/raidframe/rf_psstatus.h +++ /dev/null @@ -1,132 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_psstatus.h,v 1.3 1999/02/05 00:06:15 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************** - * - * psstatus.h - * - * The reconstruction code maintains a bunch of status related to the parity - * stripes that are currently under reconstruction. This header file defines - * the status structures. - * - *****************************************************************************/ - -#ifndef _RF__RF_PSSTATUS_H_ -#define _RF__RF_PSSTATUS_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_callback.h> - -#define RF_PS_MAX_BUFS 10 /* max number of bufs we'll accumulate before - * we do an XOR */ - -#define RF_PSS_DEFAULT_TABLESIZE 200 - -/* - * Macros to acquire/release the mutex lock on a parity stripe status - * descriptor. Note that we use just one lock for the whole hash chain. - */ -#define RF_HASH_PSID(_raid_,_psid_) ( (_psid_) % ((_raid_)->pssTableSize) ) /* simple hash function */ -#define RF_LOCK_PSS_MUTEX(_raidPtr, _row, _psid) \ - RF_LOCK_MUTEX((_raidPtr)->reconControl[_row]->pssTable[ RF_HASH_PSID(_raidPtr,_psid) ].mutex) -#define RF_UNLOCK_PSS_MUTEX(_raidPtr, _row, _psid) \ - RF_UNLOCK_MUTEX((_raidPtr)->reconControl[_row]->pssTable[ RF_HASH_PSID(_raidPtr,_psid) ].mutex) - -struct RF_ReconParityStripeStatus_s { - RF_StripeNum_t parityStripeID; /* the parity stripe ID */ - RF_ReconUnitNum_t which_ru; /* which reconstruction unit with the - * indicated parity stripe */ - RF_PSSFlags_t flags; /* flags indicating various conditions */ - void *rbuf; /* this is the accumulating xor sum */ - void *writeRbuf; /* DEBUG ONLY: a pointer to the rbuf after it - * has filled & been sent to disk */ - void *rbufsForXor[RF_PS_MAX_BUFS]; /* these are buffers still to - * be xored into the - * accumulating sum */ - int xorBufCount; /* num buffers waiting to be xored */ - int blockCount; /* count of # proc that have blocked recon on - * this parity stripe */ - char *issued; /* issued[i]==1 <=> column i has already - * issued a read request for the indicated RU */ - RF_CallbackDesc_t *procWaitList; /* list of user procs waiting - * for recon to be done */ - RF_CallbackDesc_t *blockWaitList; /* list of disks blocked - * waiting for user write to - * complete */ - RF_CallbackDesc_t *bufWaitList; /* list of disks blocked waiting to - * acquire a buffer for this RU */ - RF_ReconParityStripeStatus_t *next; -}; - -struct RF_PSStatusHeader_s { - RF_DECLARE_MUTEX(mutex) /* mutex for this hash chain */ - RF_ReconParityStripeStatus_t *chain; /* the hash chain */ -}; -/* masks for the "flags" field above */ -#define RF_PSS_NONE 0x00000000 /* no flags */ -#define RF_PSS_UNDER_RECON 0x00000001 /* this parity stripe is - * currently under - * reconstruction */ -#define RF_PSS_FORCED_ON_WRITE 0x00000002 /* indicates a recon was - * forced due to a user-write - * operation */ -#define RF_PSS_FORCED_ON_READ 0x00000004 /* ditto for read, but not - * currently implemented */ -#define RF_PSS_RECON_BLOCKED 0x00000008 /* reconstruction is currently - * blocked due to a pending - * user I/O */ -#define RF_PSS_CREATE 0x00000010 /* tells LookupRUStatus to - * create the entry */ -#define RF_PSS_BUFFERWAIT 0x00000020 /* someone is waiting for a - * buffer for this RU */ - -int -rf_ConfigurePSStatus(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); - -RF_PSStatusHeader_t *rf_MakeParityStripeStatusTable(RF_Raid_t * raidPtr); -void -rf_FreeParityStripeStatusTable(RF_Raid_t * raidPtr, - RF_PSStatusHeader_t * pssTable); -RF_ReconParityStripeStatus_t * -rf_LookupRUStatus(RF_Raid_t * raidPtr, - RF_PSStatusHeader_t * pssTable, RF_StripeNum_t psID, - RF_ReconUnitNum_t which_ru, RF_PSSFlags_t flags, int *created); -void -rf_PSStatusDelete(RF_Raid_t * raidPtr, RF_PSStatusHeader_t * pssTable, - RF_ReconParityStripeStatus_t * pssPtr); -void -rf_RemoveFromActiveReconTable(RF_Raid_t * raidPtr, RF_RowCol_t row, - RF_StripeNum_t psid, RF_ReconUnitNum_t which_ru); -RF_ReconParityStripeStatus_t *rf_AllocPSStatus(RF_Raid_t * raidPtr); -void rf_FreePSStatus(RF_Raid_t * raidPtr, RF_ReconParityStripeStatus_t * p); -void rf_PrintPSStatusTable(RF_Raid_t * raidPtr, RF_RowCol_t row); - -#endif /* !_RF__RF_PSSTATUS_H_ */ diff --git a/sys/dev/raidframe/rf_raid.h b/sys/dev/raidframe/rf_raid.h deleted file mode 100644 index e91a2ae..0000000 --- a/sys/dev/raidframe/rf_raid.h +++ /dev/null @@ -1,299 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_raid.h,v 1.12 2000/02/24 17:12:10 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/********************************************** - * rf_raid.h -- main header file for RAID driver - **********************************************/ - - -#ifndef _RF__RF_RAID_H_ -#define _RF__RF_RAID_H_ - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> - -#include <dev/raidframe/rf_bsd.h> - -#include <sys/disklabel.h> -#include <sys/types.h> - -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_stripelocks.h> -#include <dev/raidframe/rf_layout.h> -#include <dev/raidframe/rf_disks.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_diskqueue.h> -#include <dev/raidframe/rf_reconstruct.h> -#include <dev/raidframe/rf_acctrace.h> - -#if RF_INCLUDE_PARITYLOGGING > 0 -#include <dev/raidframe/rf_paritylog.h> -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ - -#define RF_MAX_DISKS 128 /* max disks per array */ -#if defined(__NetBSD__) -#define RF_DEV2RAIDID(_dev) (DISKUNIT(_dev)) -#endif - -#define RF_COMPONENT_LABEL_VERSION_1 1 -#define RF_COMPONENT_LABEL_VERSION 2 -#define RF_RAID_DIRTY 0 -#define RF_RAID_CLEAN 1 - - -/* - * Each row in the array is a distinct parity group, so - * each has it's own status, which is one of the following. - */ -typedef enum RF_RowStatus_e { - rf_rs_optimal, - rf_rs_degraded, - rf_rs_reconstructing, - rf_rs_reconfigured -} RF_RowStatus_t; - -struct RF_CumulativeStats_s { - struct timeval start; /* the time when the stats were last started */ - struct timeval stop; /* the time when the stats were last stopped */ - long sum_io_us; /* sum of all user response times (us) */ - long num_ios; /* total number of I/Os serviced */ - long num_sect_moved; /* total number of sectors read or written */ -}; - -struct RF_ThroughputStats_s { - RF_DECLARE_MUTEX(mutex) /* a mutex used to lock the configuration - * stuff */ - struct timeval start; /* timer started when numOutstandingRequests - * moves from 0 to 1 */ - struct timeval stop; /* timer stopped when numOutstandingRequests - * moves from 1 to 0 */ - RF_uint64 sum_io_us; /* total time timer is enabled */ - RF_uint64 num_ios; /* total number of ios processed by RAIDframe */ - long num_out_ios; /* number of outstanding ios */ -}; - -struct RF_Raid_s { - /* This portion never changes, and can be accessed without locking */ - /* an exception is Disks[][].status, which requires locking when it is - * changed. XXX this is no longer true. numSpare and friends can - * change now. - */ - u_int numRow; /* number of rows of disks, typically == # of - * ranks */ - u_int numCol; /* number of columns of disks, typically == # - * of disks/rank */ - u_int numSpare; /* number of spare disks */ - int maxQueueDepth; /* max disk queue depth */ - RF_SectorCount_t totalSectors; /* total number of sectors in the - * array */ - RF_SectorCount_t sectorsPerDisk; /* number of sectors on each - * disk */ - u_int logBytesPerSector; /* base-2 log of the number of bytes - * in a sector */ - u_int bytesPerSector; /* bytes in a sector */ - RF_int32 sectorMask; /* mask of bytes-per-sector */ - - RF_RaidLayout_t Layout; /* all information related to layout */ - RF_RaidDisk_t **Disks; /* all information related to physical disks */ - RF_DiskQueue_t **Queues;/* all information related to disk queues */ - RF_DiskQueueSW_t *qType;/* pointer to the DiskQueueSW used for the - component queues. */ - /* NOTE: This is an anchor point via which the queues can be - * accessed, but the enqueue/dequeue routines in diskqueue.c use a - * local copy of this pointer for the actual accesses. */ - /* The remainder of the structure can change, and therefore requires - * locking on reads and updates */ - RF_DECLARE_MUTEX(mutex) /* mutex used to serialize access to - * the fields below */ - RF_RowStatus_t *status; /* the status of each row in the array */ - int valid; /* indicates successful configuration */ - RF_LockTableEntry_t *lockTable; /* stripe-lock table */ - RF_LockTableEntry_t *quiesceLock; /* quiesnce table */ - int numFailures; /* total number of failures in the array */ - int numNewFailures; /* number of *new* failures (that havn't - caused a mod_counter update */ - - int parity_good; /* !0 if parity is known to be correct */ - int serial_number; /* a "serial number" for this set */ - int mod_counter; /* modification counter for component labels */ - int clean; /* the clean bit for this array. */ - - int openings; /* Number of IO's which can be scheduled - simultaneously (high-level - not a - per-component limit)*/ - - int maxOutstanding; /* maxOutstanding requests (per-component) */ - int autoconfigure; /* automatically configure this RAID set. - 0 == no, 1 == yes */ - int root_partition; /* Use this set as / - 0 == no, 1 == yes*/ - int last_unit; /* last unit number (e.g. 0 for /dev/raid0) - of this component. Used for autoconfigure - only. */ - int config_order; /* 0 .. n. The order in which the component - should be auto-configured. E.g. 0 is will - done first, (and would become raid0). - This may be in conflict with last_unit!!?! */ - /* Not currently used. */ - - /* - * Cleanup stuff - */ - RF_ShutdownList_t *shutdownList; /* shutdown activities */ - RF_AllocListElem_t *cleanupList; /* memory to be freed at - * shutdown time */ - - /* - * Recon stuff - */ - RF_HeadSepLimit_t headSepLimit; - int numFloatingReconBufs; - int reconInProgress; - RF_DECLARE_COND(waitForReconCond) - RF_RaidReconDesc_t *reconDesc; /* reconstruction descriptor */ - RF_ReconCtrl_t **reconControl; /* reconstruction control structure - * pointers for each row in the array */ - - /* - * Array-quiescence stuff - */ - RF_DECLARE_MUTEX(access_suspend_mutex) - RF_DECLARE_COND(quiescent_cond) - RF_IoCount_t accesses_suspended; - RF_IoCount_t accs_in_flight; - int access_suspend_release; - int waiting_for_quiescence; - RF_CallbackDesc_t *quiesce_wait_list; - - /* - * Statistics - */ -#if !defined(_KERNEL) && !defined(SIMULATE) - RF_ThroughputStats_t throughputstats; -#endif /* !KERNEL && !SIMULATE */ - RF_CumulativeStats_t userstats; - int parity_rewrite_stripes_done; - int recon_stripes_done; - int copyback_stripes_done; - - int recon_in_progress; - int parity_rewrite_in_progress; - int copyback_in_progress; - - /* - * Engine thread control - */ - RF_DECLARE_MUTEX(node_queue_mutex) - RF_DECLARE_COND(node_queue_cond) - RF_DagNode_t *node_queue; - RF_Thread_t parity_rewrite_thread; - RF_Thread_t copyback_thread; - RF_Thread_t engine_thread; - RF_Thread_t recon_thread; - RF_ThreadGroup_t engine_tg; - int shutdown_engine; - int dags_in_flight; /* debug */ - - /* - * PSS (Parity Stripe Status) stuff - */ - RF_FreeList_t *pss_freelist; - long pssTableSize; - - /* - * Reconstruction stuff - */ - int procsInBufWait; - int numFullReconBuffers; - RF_AccTraceEntry_t *recon_tracerecs; - unsigned long accumXorTimeUs; - RF_ReconDoneProc_t *recon_done_procs; - RF_DECLARE_MUTEX(recon_done_proc_mutex) - /* - * nAccOutstanding, waitShutdown protected by desc freelist lock - * (This may seem strange, since that's a central serialization point - * for a per-array piece of data, but otherwise, it'd be an extra - * per-array lock, and that'd only be less efficient...) - */ - RF_DECLARE_COND(outstandingCond) - int waitShutdown; - int nAccOutstanding; - - RF_DiskId_t **diskids; - RF_DiskId_t *sparediskids; - - int raidid; - RF_AccTotals_t acc_totals; - int keep_acc_totals; - - struct raidcinfo **raid_cinfo; /* array of component info */ - - int terminate_disk_queues; - - /* - * XXX - * - * config-specific information should be moved - * somewhere else, or at least hung off this - * in some generic way - */ - - /* used by rf_compute_workload_shift */ - RF_RowCol_t hist_diskreq[RF_MAXROW][RF_MAXCOL]; - - /* used by declustering */ - int noRotate; - -#if RF_INCLUDE_PARITYLOGGING > 0 - /* used by parity logging */ - RF_SectorCount_t regionLogCapacity; - RF_ParityLogQueue_t parityLogPool; /* pool of unused parity logs */ - RF_RegionInfo_t *regionInfo; /* array of region state */ - int numParityLogs; - int numSectorsPerLog; - int regionParityRange; - int logsInUse; /* debugging */ - RF_ParityLogDiskQueue_t parityLogDiskQueue; /* state of parity - * logging disk work */ - RF_RegionBufferQueue_t regionBufferPool; /* buffers for holding - * region log */ - RF_RegionBufferQueue_t parityBufferPool; /* buffers for holding - * parity */ - caddr_t parityLogBufferHeap; /* pool of unused parity logs */ - RF_Thread_t pLogDiskThreadHandle; - -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ - /* Point back to the softc for this device. This is needed to rid - * ourselves of the ugly static device arrays. - * XXX Will this affect compatibility with NetBSD? - */ - void *sc; -}; -#endif /* !_RF__RF_RAID_H_ */ diff --git a/sys/dev/raidframe/rf_raid0.c b/sys/dev/raidframe/rf_raid0.c deleted file mode 100644 index 5eefabb..0000000 --- a/sys/dev/raidframe/rf_raid0.c +++ /dev/null @@ -1,163 +0,0 @@ -/* $NetBSD: rf_raid0.c,v 1.4 2000/01/07 03:41:02 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/*************************************** - * - * rf_raid0.c -- implements RAID Level 0 - * - ***************************************/ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_raid0.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagffrd.h> -#include <dev/raidframe/rf_dagffwr.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_configure.h> -#include <dev/raidframe/rf_parityscan.h> - -typedef struct RF_Raid0ConfigInfo_s { - RF_RowCol_t *stripeIdentifier; -} RF_Raid0ConfigInfo_t; - -int -rf_ConfigureRAID0( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_Raid0ConfigInfo_t *info; - RF_RowCol_t i; - - /* create a RAID level 0 configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_Raid0ConfigInfo_t), (RF_Raid0ConfigInfo_t *), raidPtr->cleanupList); - if (info == NULL) - return (ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - - RF_MallocAndAdd(info->stripeIdentifier, raidPtr->numCol * sizeof(RF_RowCol_t), (RF_RowCol_t *), raidPtr->cleanupList); - if (info->stripeIdentifier == NULL) - return (ENOMEM); - for (i = 0; i < raidPtr->numCol; i++) - info->stripeIdentifier[i] = i; - - RF_ASSERT(raidPtr->numRow == 1); - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * raidPtr->numCol * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; - layoutPtr->dataSectorsPerStripe = raidPtr->numCol * layoutPtr->sectorsPerStripeUnit; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = raidPtr->numCol; - layoutPtr->numParityCol = 0; - return (0); -} - -void -rf_MapSectorRAID0( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - *row = 0; - *col = SUID % raidPtr->numCol; - *diskSector = (SUID / raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); -} - -void -rf_MapParityRAID0( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - *row = *col = 0; - *diskSector = 0; -} - -void -rf_IdentifyStripeRAID0( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) -{ - RF_Raid0ConfigInfo_t *info; - - info = raidPtr->Layout.layoutSpecificInfo; - *diskids = info->stripeIdentifier; - *outRow = 0; -} - -void -rf_MapSIDToPSIDRAID0( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru) -{ - *which_ru = 0; - *psID = stripeID; -} - -void -rf_RAID0DagSelect( - RF_Raid_t * raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t * asmap, - RF_VoidFuncPtr * createFunc) -{ - *createFunc = ((type == RF_IO_TYPE_READ) ? - (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG : (RF_VoidFuncPtr) rf_CreateRAID0WriteDAG); -} - -int -rf_VerifyParityRAID0( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t * parityPDA, - int correct_it, - RF_RaidAccessFlags_t flags) -{ - /* - * No parity is always okay. - */ - return (RF_PARITY_OKAY); -} diff --git a/sys/dev/raidframe/rf_raid0.h b/sys/dev/raidframe/rf_raid0.h deleted file mode 100644 index 36aae81..0000000 --- a/sys/dev/raidframe/rf_raid0.h +++ /dev/null @@ -1,58 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_raid0.h,v 1.3 1999/02/05 00:06:15 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_raid0.h - header file for RAID Level 0 */ - -#ifndef _RF__RF_RAID0_H_ -#define _RF__RF_RAID0_H_ - -int -rf_ConfigureRAID0(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -void -rf_MapSectorRAID0(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapParityRAID0(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_IdentifyStripeRAID0(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -void -rf_MapSIDToPSIDRAID0(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru); -void -rf_RAID0DagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); -int -rf_VerifyParityRAID0(RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t * parityPDA, int correct_it, RF_RaidAccessFlags_t flags); - -#endif /* !_RF__RF_RAID0_H_ */ diff --git a/sys/dev/raidframe/rf_raid1.c b/sys/dev/raidframe/rf_raid1.c deleted file mode 100644 index 845e316..0000000 --- a/sys/dev/raidframe/rf_raid1.c +++ /dev/null @@ -1,691 +0,0 @@ -/* $NetBSD: rf_raid1.c,v 1.5 2000/01/08 22:57:30 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************** - * - * rf_raid1.c -- implements RAID Level 1 - * - *****************************************************************************/ - -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_raid1.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagffrd.h> -#include <dev/raidframe/rf_dagffwr.h> -#include <dev/raidframe/rf_dagdegrd.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_diskqueue.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_utils.h> -#include <dev/raidframe/rf_parityscan.h> -#include <dev/raidframe/rf_mcpair.h> -#include <dev/raidframe/rf_layout.h> -#include <dev/raidframe/rf_map.h> -#include <dev/raidframe/rf_engine.h> -#include <dev/raidframe/rf_reconbuffer.h> -#include <dev/raidframe/rf_kintf.h> - -typedef struct RF_Raid1ConfigInfo_s { - RF_RowCol_t **stripeIdentifier; -} RF_Raid1ConfigInfo_t; -/* start of day code specific to RAID level 1 */ -int -rf_ConfigureRAID1( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_Raid1ConfigInfo_t *info; - RF_RowCol_t i; - - /* create a RAID level 1 configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_Raid1ConfigInfo_t), (RF_Raid1ConfigInfo_t *), raidPtr->cleanupList); - if (info == NULL) - return (ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - - /* ... and fill it in. */ - info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol / 2, 2, raidPtr->cleanupList); - if (info->stripeIdentifier == NULL) - return (ENOMEM); - for (i = 0; i < (raidPtr->numCol / 2); i++) { - info->stripeIdentifier[i][0] = (2 * i); - info->stripeIdentifier[i][1] = (2 * i) + 1; - } - - RF_ASSERT(raidPtr->numRow == 1); - - /* this implementation of RAID level 1 uses one row of numCol disks - * and allows multiple (numCol / 2) stripes per row. A stripe - * consists of a single data unit and a single parity (mirror) unit. - * stripe id = raidAddr / stripeUnitSize */ - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * (raidPtr->numCol / 2) * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk * (raidPtr->numCol / 2); - layoutPtr->dataSectorsPerStripe = layoutPtr->sectorsPerStripeUnit; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = 1; - layoutPtr->numParityCol = 1; - return (0); -} - - -/* returns the physical disk location of the primary copy in the mirror pair */ -void -rf_MapSectorRAID1( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2); - - *row = 0; - *col = 2 * mirrorPair; - *diskSector = ((SUID / (raidPtr->numCol / 2)) * raidPtr->Layout.sectorsPerStripeUnit) + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); -} - - -/* Map Parity - * - * returns the physical disk location of the secondary copy in the mirror - * pair - */ -void -rf_MapParityRAID1( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2); - - *row = 0; - *col = (2 * mirrorPair) + 1; - - *diskSector = ((SUID / (raidPtr->numCol / 2)) * raidPtr->Layout.sectorsPerStripeUnit) + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); -} - - -/* IdentifyStripeRAID1 - * - * returns a list of disks for a given redundancy group - */ -void -rf_IdentifyStripeRAID1( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) -{ - RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr); - RF_Raid1ConfigInfo_t *info = raidPtr->Layout.layoutSpecificInfo; - RF_ASSERT(stripeID >= 0); - RF_ASSERT(addr >= 0); - *outRow = 0; - *diskids = info->stripeIdentifier[stripeID % (raidPtr->numCol / 2)]; - RF_ASSERT(*diskids); -} - - -/* MapSIDToPSIDRAID1 - * - * maps a logical stripe to a stripe in the redundant array - */ -void -rf_MapSIDToPSIDRAID1( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru) -{ - *which_ru = 0; - *psID = stripeID; -} - - - -/****************************************************************************** - * select a graph to perform a single-stripe access - * - * Parameters: raidPtr - description of the physical array - * type - type of operation (read or write) requested - * asmap - logical & physical addresses for this access - * createFunc - name of function to use to create the graph - *****************************************************************************/ - -void -rf_RAID1DagSelect( - RF_Raid_t * raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t * asmap, - RF_VoidFuncPtr * createFunc) -{ - RF_RowCol_t frow, fcol, or, oc; - RF_PhysDiskAddr_t *failedPDA; - int prior_recon; - RF_RowStatus_t rstat; - RF_SectorNum_t oo; - - - RF_ASSERT(RF_IO_IS_R_OR_W(type)); - - if (asmap->numDataFailed + asmap->numParityFailed > 1) { - RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); - *createFunc = NULL; - return; - } - if (asmap->numDataFailed + asmap->numParityFailed) { - /* - * We've got a fault. Re-map to spare space, iff applicable. - * Shouldn't the arch-independent code do this for us? - * Anyway, it turns out if we don't do this here, then when - * we're reconstructing, writes go only to the surviving - * original disk, and aren't reflected on the reconstructed - * spare. Oops. --jimz - */ - failedPDA = asmap->failedPDAs[0]; - frow = failedPDA->row; - fcol = failedPDA->col; - rstat = raidPtr->status[frow]; - prior_recon = (rstat == rf_rs_reconfigured) || ( - (rstat == rf_rs_reconstructing) ? - rf_CheckRUReconstructed(raidPtr->reconControl[frow]->reconMap, failedPDA->startSector) : 0 - ); - if (prior_recon) { - or = frow; - oc = fcol; - oo = failedPDA->startSector; - /* - * If we did distributed sparing, we'd monkey with that here. - * But we don't, so we'll - */ - failedPDA->row = raidPtr->Disks[frow][fcol].spareRow; - failedPDA->col = raidPtr->Disks[frow][fcol].spareCol; - /* - * Redirect other components, iff necessary. This looks - * pretty suspicious to me, but it's what the raid5 - * DAG select does. - */ - if (asmap->parityInfo->next) { - if (failedPDA == asmap->parityInfo) { - failedPDA->next->row = failedPDA->row; - failedPDA->next->col = failedPDA->col; - } else { - if (failedPDA == asmap->parityInfo->next) { - asmap->parityInfo->row = failedPDA->row; - asmap->parityInfo->col = failedPDA->col; - } - } - } - if (rf_dagDebug || rf_mapDebug) { - printf("raid%d: Redirected type '%c' r %d c %d o %ld -> r %d c %d o %ld\n", - raidPtr->raidid, type, or, oc, - (long) oo, failedPDA->row, - failedPDA->col, - (long) failedPDA->startSector); - } - asmap->numDataFailed = asmap->numParityFailed = 0; - } - } - if (type == RF_IO_TYPE_READ) { - if (asmap->numDataFailed == 0) - *createFunc = (RF_VoidFuncPtr) rf_CreateMirrorIdleReadDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneDegradedReadDAG; - } else { - *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; - } -} - -int -rf_VerifyParityRAID1( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t * parityPDA, - int correct_it, - RF_RaidAccessFlags_t flags) -{ - int nbytes, bcount, stripeWidth, ret, i, j, nbad, *bbufs; - RF_DagNode_t *blockNode, *unblockNode, *wrBlock; - RF_DagHeader_t *rd_dag_h, *wr_dag_h; - RF_AccessStripeMapHeader_t *asm_h; - RF_AllocListElem_t *allocList; - RF_AccTraceEntry_t tracerec; - RF_ReconUnitNum_t which_ru; - RF_RaidLayout_t *layoutPtr; - RF_AccessStripeMap_t *aasm; - RF_SectorCount_t nsector; - RF_RaidAddr_t startAddr; - char *buf, *buf1, *buf2; - RF_PhysDiskAddr_t *pda; - RF_StripeNum_t psID; - RF_MCPair_t *mcpair; - - layoutPtr = &raidPtr->Layout; - startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr); - nsector = parityPDA->numSector; - nbytes = rf_RaidAddressToByte(raidPtr, nsector); - psID = rf_RaidAddressToParityStripeID(layoutPtr, raidAddr, &which_ru); - - asm_h = NULL; - rd_dag_h = wr_dag_h = NULL; - mcpair = NULL; - - ret = RF_PARITY_COULD_NOT_VERIFY; - - rf_MakeAllocList(allocList); - if (allocList == NULL) - return (RF_PARITY_COULD_NOT_VERIFY); - mcpair = rf_AllocMCPair(); - if (mcpair == NULL) - goto done; - RF_ASSERT(layoutPtr->numDataCol == layoutPtr->numParityCol); - stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; - bcount = nbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol); - RF_MallocAndAdd(buf, bcount, (char *), allocList); - if (buf == NULL) - goto done; - if (rf_verifyParityDebug) { - printf("raid%d: RAID1 parity verify: buf=%lx bcount=%d (%lx - %lx)\n", - raidPtr->raidid, (long) buf, bcount, (long) buf, - (long) buf + bcount); - } - /* - * Generate a DAG which will read the entire stripe- then we can - * just compare data chunks versus "parity" chunks. - */ - - rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, nbytes, buf, - rf_DiskReadFunc, rf_DiskReadUndoFunc, "Rod", allocList, flags, - RF_IO_NORMAL_PRIORITY); - if (rd_dag_h == NULL) - goto done; - blockNode = rd_dag_h->succedents[0]; - unblockNode = blockNode->succedents[0]->succedents[0]; - - /* - * Map the access to physical disk addresses (PDAs)- this will - * get us both a list of data addresses, and "parity" addresses - * (which are really mirror copies). - */ - asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, - buf, RF_DONT_REMAP); - aasm = asm_h->stripeMap; - - buf1 = buf; - /* - * Loop through the data blocks, setting up read nodes for each. - */ - for (pda = aasm->physInfo, i = 0; i < layoutPtr->numDataCol; i++, pda = pda->next) { - RF_ASSERT(pda); - - rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); - - RF_ASSERT(pda->numSector != 0); - if (rf_TryToRedirectPDA(raidPtr, pda, 0)) { - /* cannot verify parity with dead disk */ - goto done; - } - pda->bufPtr = buf1; - blockNode->succedents[i]->params[0].p = pda; - blockNode->succedents[i]->params[1].p = buf1; - blockNode->succedents[i]->params[2].v = psID; - blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - buf1 += nbytes; - } - RF_ASSERT(pda == NULL); - /* - * keep i, buf1 running - * - * Loop through parity blocks, setting up read nodes for each. - */ - for (pda = aasm->parityInfo; i < layoutPtr->numDataCol + layoutPtr->numParityCol; i++, pda = pda->next) { - RF_ASSERT(pda); - rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); - RF_ASSERT(pda->numSector != 0); - if (rf_TryToRedirectPDA(raidPtr, pda, 0)) { - /* cannot verify parity with dead disk */ - goto done; - } - pda->bufPtr = buf1; - blockNode->succedents[i]->params[0].p = pda; - blockNode->succedents[i]->params[1].p = buf1; - blockNode->succedents[i]->params[2].v = psID; - blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - buf1 += nbytes; - } - RF_ASSERT(pda == NULL); - - bzero((char *) &tracerec, sizeof(tracerec)); - rd_dag_h->tracerec = &tracerec; - - if (rf_verifyParityDebug > 1) { - printf("raid%d: RAID1 parity verify read dag:\n", - raidPtr->raidid); - rf_PrintDAGList(rd_dag_h); - } - RF_LOCK_MUTEX(mcpair->mutex); - mcpair->flag = 0; - rf_DispatchDAG(rd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, - (void *) mcpair); - while (mcpair->flag == 0) { - RF_WAIT_MCPAIR(mcpair); - } - RF_UNLOCK_MUTEX(mcpair->mutex); - - if (rd_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to verify raid1 parity: can't read stripe\n"); - ret = RF_PARITY_COULD_NOT_VERIFY; - goto done; - } - /* - * buf1 is the beginning of the data blocks chunk - * buf2 is the beginning of the parity blocks chunk - */ - buf1 = buf; - buf2 = buf + (nbytes * layoutPtr->numDataCol); - ret = RF_PARITY_OKAY; - /* - * bbufs is "bad bufs"- an array whose entries are the data - * column numbers where we had miscompares. (That is, column 0 - * and column 1 of the array are mirror copies, and are considered - * "data column 0" for this purpose). - */ - RF_MallocAndAdd(bbufs, layoutPtr->numParityCol * sizeof(int), (int *), - allocList); - nbad = 0; - /* - * Check data vs "parity" (mirror copy). - */ - for (i = 0; i < layoutPtr->numDataCol; i++) { - if (rf_verifyParityDebug) { - printf("raid%d: RAID1 parity verify %d bytes: i=%d buf1=%lx buf2=%lx buf=%lx\n", - raidPtr->raidid, nbytes, i, (long) buf1, - (long) buf2, (long) buf); - } - ret = bcmp(buf1, buf2, nbytes); - if (ret) { - if (rf_verifyParityDebug > 1) { - for (j = 0; j < nbytes; j++) { - if (buf1[j] != buf2[j]) - break; - } - printf("psid=%ld j=%d\n", (long) psID, j); - printf("buf1 %02x %02x %02x %02x %02x\n", buf1[0] & 0xff, - buf1[1] & 0xff, buf1[2] & 0xff, buf1[3] & 0xff, buf1[4] & 0xff); - printf("buf2 %02x %02x %02x %02x %02x\n", buf2[0] & 0xff, - buf2[1] & 0xff, buf2[2] & 0xff, buf2[3] & 0xff, buf2[4] & 0xff); - } - if (rf_verifyParityDebug) { - printf("raid%d: RAID1: found bad parity, i=%d\n", raidPtr->raidid, i); - } - /* - * Parity is bad. Keep track of which columns were bad. - */ - if (bbufs) - bbufs[nbad] = i; - nbad++; - ret = RF_PARITY_BAD; - } - buf1 += nbytes; - buf2 += nbytes; - } - - if ((ret != RF_PARITY_OKAY) && correct_it) { - ret = RF_PARITY_COULD_NOT_CORRECT; - if (rf_verifyParityDebug) { - printf("raid%d: RAID1 parity verify: parity not correct\n", raidPtr->raidid); - } - if (bbufs == NULL) - goto done; - /* - * Make a DAG with one write node for each bad unit. We'll simply - * write the contents of the data unit onto the parity unit for - * correction. (It's possible that the mirror copy was the correct - * copy, and that we're spooging good data by writing bad over it, - * but there's no way we can know that. - */ - wr_dag_h = rf_MakeSimpleDAG(raidPtr, nbad, nbytes, buf, - rf_DiskWriteFunc, rf_DiskWriteUndoFunc, "Wnp", allocList, flags, - RF_IO_NORMAL_PRIORITY); - if (wr_dag_h == NULL) - goto done; - wrBlock = wr_dag_h->succedents[0]; - /* - * Fill in a write node for each bad compare. - */ - for (i = 0; i < nbad; i++) { - j = i + layoutPtr->numDataCol; - pda = blockNode->succedents[j]->params[0].p; - pda->bufPtr = blockNode->succedents[i]->params[1].p; - wrBlock->succedents[i]->params[0].p = pda; - wrBlock->succedents[i]->params[1].p = pda->bufPtr; - wrBlock->succedents[i]->params[2].v = psID; - wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - bzero((char *) &tracerec, sizeof(tracerec)); - wr_dag_h->tracerec = &tracerec; - if (rf_verifyParityDebug > 1) { - printf("Parity verify write dag:\n"); - rf_PrintDAGList(wr_dag_h); - } - RF_LOCK_MUTEX(mcpair->mutex); - mcpair->flag = 0; - /* fire off the write DAG */ - rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, - (void *) mcpair); - while (!mcpair->flag) { - RF_WAIT_COND(mcpair->cond, mcpair->mutex); - } - RF_UNLOCK_MUTEX(mcpair->mutex); - if (wr_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to correct RAID1 parity in VerifyParity\n"); - goto done; - } - ret = RF_PARITY_CORRECTED; - } -done: - /* - * All done. We might've gotten here without doing part of the function, - * so cleanup what we have to and return our running status. - */ - if (asm_h) - rf_FreeAccessStripeMap(asm_h); - if (rd_dag_h) - rf_FreeDAG(rd_dag_h); - if (wr_dag_h) - rf_FreeDAG(wr_dag_h); - if (mcpair) - rf_FreeMCPair(mcpair); - rf_FreeAllocList(allocList); - if (rf_verifyParityDebug) { - printf("raid%d: RAID1 parity verify, returning %d\n", - raidPtr->raidid, ret); - } - return (ret); -} - -int -rf_SubmitReconBufferRAID1(rbuf, keep_it, use_committed) - RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */ - int keep_it; /* whether we can keep this buffer or we have - * to return it */ - int use_committed; /* whether to use a committed or an available - * recon buffer */ -{ - RF_ReconParityStripeStatus_t *pssPtr; - RF_ReconCtrl_t *reconCtrlPtr; - RF_RaidLayout_t *layoutPtr; - int retcode, created; - RF_CallbackDesc_t *cb, *p; - RF_ReconBuffer_t *t; - RF_Raid_t *raidPtr; - caddr_t ta; - - retcode = 0; - created = 0; - - raidPtr = rbuf->raidPtr; - layoutPtr = &raidPtr->Layout; - reconCtrlPtr = raidPtr->reconControl[rbuf->row]; - - RF_ASSERT(rbuf); - RF_ASSERT(rbuf->col != reconCtrlPtr->fcol); - - if (rf_reconbufferDebug) { - printf("raid%d: RAID1 reconbuffer submission r%d c%d psid %ld ru%d (failed offset %ld)\n", - raidPtr->raidid, rbuf->row, rbuf->col, - (long) rbuf->parityStripeID, rbuf->which_ru, - (long) rbuf->failedDiskSectorOffset); - } - if (rf_reconDebug) { - printf("RAID1 reconbuffer submit psid %ld buf %lx\n", - (long) rbuf->parityStripeID, (long) rbuf->buffer); - printf("RAID1 psid %ld %02x %02x %02x %02x %02x\n", - (long) rbuf->parityStripeID, - rbuf->buffer[0], rbuf->buffer[1], rbuf->buffer[2], rbuf->buffer[3], - rbuf->buffer[4]); - } - RF_LOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); - - RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); - - pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, - rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created); - RF_ASSERT(pssPtr); /* if it didn't exist, we wouldn't have gotten - * an rbuf for it */ - - /* - * Since this is simple mirroring, the first submission for a stripe is also - * treated as the last. - */ - - t = NULL; - if (keep_it) { - if (rf_reconbufferDebug) { - printf("raid%d: RAID1 rbuf submission: keeping rbuf\n", - raidPtr->raidid); - } - t = rbuf; - } else { - if (use_committed) { - if (rf_reconbufferDebug) { - printf("raid%d: RAID1 rbuf submission: using committed rbuf\n", raidPtr->raidid); - } - t = reconCtrlPtr->committedRbufs; - RF_ASSERT(t); - reconCtrlPtr->committedRbufs = t->next; - t->next = NULL; - } else - if (reconCtrlPtr->floatingRbufs) { - if (rf_reconbufferDebug) { - printf("raid%d: RAID1 rbuf submission: using floating rbuf\n", raidPtr->raidid); - } - t = reconCtrlPtr->floatingRbufs; - reconCtrlPtr->floatingRbufs = t->next; - t->next = NULL; - } - } - if (t == NULL) { - if (rf_reconbufferDebug) { - printf("raid%d: RAID1 rbuf submission: waiting for rbuf\n", raidPtr->raidid); - } - RF_ASSERT((keep_it == 0) && (use_committed == 0)); - raidPtr->procsInBufWait++; - if ((raidPtr->procsInBufWait == (raidPtr->numCol - 1)) - && (raidPtr->numFullReconBuffers == 0)) { - /* ruh-ro */ - RF_ERRORMSG("Buffer wait deadlock\n"); - rf_PrintPSStatusTable(raidPtr, rbuf->row); - RF_PANIC(); - } - pssPtr->flags |= RF_PSS_BUFFERWAIT; - cb = rf_AllocCallbackDesc(); - cb->row = rbuf->row; - cb->col = rbuf->col; - cb->callbackArg.v = rbuf->parityStripeID; - cb->callbackArg2.v = rbuf->which_ru; - cb->next = NULL; - if (reconCtrlPtr->bufferWaitList == NULL) { - /* we are the wait list- lucky us */ - reconCtrlPtr->bufferWaitList = cb; - } else { - /* append to wait list */ - for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next); - p->next = cb; - } - retcode = 1; - goto out; - } - if (t != rbuf) { - t->row = rbuf->row; - t->col = reconCtrlPtr->fcol; - t->parityStripeID = rbuf->parityStripeID; - t->which_ru = rbuf->which_ru; - t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset; - t->spRow = rbuf->spRow; - t->spCol = rbuf->spCol; - t->spOffset = rbuf->spOffset; - /* Swap buffers. DANCE! */ - ta = t->buffer; - t->buffer = rbuf->buffer; - rbuf->buffer = ta; - } - /* - * Use the rbuf we've been given as the target. - */ - RF_ASSERT(pssPtr->rbuf == NULL); - pssPtr->rbuf = t; - - t->count = 1; - /* - * Below, we use 1 for numDataCol (which is equal to the count in the - * previous line), so we'll always be done. - */ - rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, 1); - -out: - RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); - RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); - if (rf_reconbufferDebug) { - printf("raid%d: RAID1 rbuf submission: returning %d\n", - raidPtr->raidid, retcode); - } - return (retcode); -} diff --git a/sys/dev/raidframe/rf_raid1.h b/sys/dev/raidframe/rf_raid1.h deleted file mode 100644 index 484cbcf..0000000 --- a/sys/dev/raidframe/rf_raid1.h +++ /dev/null @@ -1,63 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_raid1.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* header file for RAID Level 1 */ - -#ifndef _RF__RF_RAID1_H_ -#define _RF__RF_RAID1_H_ - -#include <dev/raidframe/rf_types.h> - -int -rf_ConfigureRAID1(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -void -rf_MapSectorRAID1(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapParityRAID1(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_IdentifyStripeRAID1(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -void -rf_MapSIDToPSIDRAID1(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru); -void -rf_RAID1DagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); -int -rf_VerifyParityRAID1(RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t * parityPDA, int correct_it, RF_RaidAccessFlags_t flags); -int -rf_SubmitReconBufferRAID1(RF_ReconBuffer_t * rbuf, int keep_int, - int use_committed); - -#endif /* !_RF__RF_RAID1_H_ */ diff --git a/sys/dev/raidframe/rf_raid4.c b/sys/dev/raidframe/rf_raid4.c deleted file mode 100644 index d080319..0000000 --- a/sys/dev/raidframe/rf_raid4.c +++ /dev/null @@ -1,159 +0,0 @@ -/* $NetBSD: rf_raid4.c,v 1.4 2000/01/07 03:41:02 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Rachad Youssef - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/*************************************** - * - * rf_raid4.c -- implements RAID Level 4 - * - ***************************************/ - -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_dagffrd.h> -#include <dev/raidframe/rf_dagffwr.h> -#include <dev/raidframe/rf_dagdegrd.h> -#include <dev/raidframe/rf_dagdegwr.h> -#include <dev/raidframe/rf_raid4.h> -#include <dev/raidframe/rf_general.h> - -typedef struct RF_Raid4ConfigInfo_s { - RF_RowCol_t *stripeIdentifier; /* filled in at config time & used by - * IdentifyStripe */ -} RF_Raid4ConfigInfo_t; - - - -int -rf_ConfigureRAID4( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_Raid4ConfigInfo_t *info; - int i; - - /* create a RAID level 4 configuration structure ... */ - RF_MallocAndAdd(info, sizeof(RF_Raid4ConfigInfo_t), (RF_Raid4ConfigInfo_t *), raidPtr->cleanupList); - if (info == NULL) - return (ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - - /* ... and fill it in. */ - RF_MallocAndAdd(info->stripeIdentifier, raidPtr->numCol * sizeof(RF_RowCol_t), (RF_RowCol_t *), raidPtr->cleanupList); - if (info->stripeIdentifier == NULL) - return (ENOMEM); - for (i = 0; i < raidPtr->numCol; i++) - info->stripeIdentifier[i] = i; - - RF_ASSERT(raidPtr->numRow == 1); - - /* fill in the remaining layout parameters */ - layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = raidPtr->numCol - 1; - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numParityCol = 1; - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - - return (0); -} - -int -rf_GetDefaultNumFloatingReconBuffersRAID4(RF_Raid_t * raidPtr) -{ - return (20); -} - -RF_HeadSepLimit_t -rf_GetDefaultHeadSepLimitRAID4(RF_Raid_t * raidPtr) -{ - return (20); -} - -void -rf_MapSectorRAID4( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - *row = 0; - *col = SUID % raidPtr->Layout.numDataCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); -} - -void -rf_MapParityRAID4( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - - *row = 0; - *col = raidPtr->Layout.numDataCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); -} - -void -rf_IdentifyStripeRAID4( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) -{ - RF_Raid4ConfigInfo_t *info = raidPtr->Layout.layoutSpecificInfo; - - *outRow = 0; - *diskids = info->stripeIdentifier; -} - -void -rf_MapSIDToPSIDRAID4( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru) -{ - *which_ru = 0; - *psID = stripeID; -} diff --git a/sys/dev/raidframe/rf_raid4.h b/sys/dev/raidframe/rf_raid4.h deleted file mode 100644 index 56df05a..0000000 --- a/sys/dev/raidframe/rf_raid4.h +++ /dev/null @@ -1,57 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_raid4.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Rachad Youssef - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_raid4.h header file for RAID Level 4 */ - -#ifndef _RF__RF_RAID4_H_ -#define _RF__RF_RAID4_H_ - -int -rf_ConfigureRAID4(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int rf_GetDefaultNumFloatingReconBuffersRAID4(RF_Raid_t * raidPtr); -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitRAID4(RF_Raid_t * raidPtr); -void -rf_MapSectorRAID4(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapParityRAID4(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_IdentifyStripeRAID4(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -void -rf_MapSIDToPSIDRAID4(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru); -void -rf_RAID4DagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); - -#endif /* !_RF__RF_RAID4_H_ */ diff --git a/sys/dev/raidframe/rf_raid5.c b/sys/dev/raidframe/rf_raid5.c deleted file mode 100644 index 794e5a3..0000000 --- a/sys/dev/raidframe/rf_raid5.c +++ /dev/null @@ -1,322 +0,0 @@ -/* $NetBSD: rf_raid5.c,v 1.4 2000/01/08 22:57:30 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/****************************************************************************** - * - * rf_raid5.c -- implements RAID Level 5 - * - *****************************************************************************/ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_raid5.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagffrd.h> -#include <dev/raidframe/rf_dagffwr.h> -#include <dev/raidframe/rf_dagdegrd.h> -#include <dev/raidframe/rf_dagdegwr.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_map.h> -#include <dev/raidframe/rf_utils.h> - -typedef struct RF_Raid5ConfigInfo_s { - RF_RowCol_t **stripeIdentifier; /* filled in at config time and used - * by IdentifyStripe */ -} RF_Raid5ConfigInfo_t; - -int -rf_ConfigureRAID5( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_Raid5ConfigInfo_t *info; - RF_RowCol_t i, j, startdisk; - - /* create a RAID level 5 configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_Raid5ConfigInfo_t), (RF_Raid5ConfigInfo_t *), raidPtr->cleanupList); - if (info == NULL) - return (ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - - RF_ASSERT(raidPtr->numRow == 1); - - /* the stripe identifier must identify the disks in each stripe, IN - * THE ORDER THAT THEY APPEAR IN THE STRIPE. */ - info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, raidPtr->numCol, raidPtr->cleanupList); - if (info->stripeIdentifier == NULL) - return (ENOMEM); - startdisk = 0; - for (i = 0; i < raidPtr->numCol; i++) { - for (j = 0; j < raidPtr->numCol; j++) { - info->stripeIdentifier[i][j] = (startdisk + j) % raidPtr->numCol; - } - if ((--startdisk) < 0) - startdisk = raidPtr->numCol - 1; - } - - /* fill in the remaining layout parameters */ - layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = raidPtr->numCol - 1; - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numParityCol = 1; - layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk; - - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - - return (0); -} - -int -rf_GetDefaultNumFloatingReconBuffersRAID5(RF_Raid_t * raidPtr) -{ - return (20); -} - -RF_HeadSepLimit_t -rf_GetDefaultHeadSepLimitRAID5(RF_Raid_t * raidPtr) -{ - return (10); -} -#if !defined(__NetBSD__) && !defined(__FreeBSD__) && !defined(_KERNEL) -/* not currently used */ -int -rf_ShutdownRAID5(RF_Raid_t * raidPtr) -{ - return (0); -} -#endif - -void -rf_MapSectorRAID5( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - *row = 0; - *col = (SUID % raidPtr->numCol); - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); -} - -void -rf_MapParityRAID5( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - - *row = 0; - *col = raidPtr->Layout.numDataCol - (SUID / raidPtr->Layout.numDataCol) % raidPtr->numCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); -} - -void -rf_IdentifyStripeRAID5( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) -{ - RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr); - RF_Raid5ConfigInfo_t *info = (RF_Raid5ConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - - *outRow = 0; - *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol]; -} - -void -rf_MapSIDToPSIDRAID5( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru) -{ - *which_ru = 0; - *psID = stripeID; -} -/* select an algorithm for performing an access. Returns two pointers, - * one to a function that will return information about the DAG, and - * another to a function that will create the dag. - */ -void -rf_RaidFiveDagSelect( - RF_Raid_t * raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t * asmap, - RF_VoidFuncPtr * createFunc) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_PhysDiskAddr_t *failedPDA = NULL; - RF_RowCol_t frow, fcol; - RF_RowStatus_t rstat; - int prior_recon; - - RF_ASSERT(RF_IO_IS_R_OR_W(type)); - - if (asmap->numDataFailed + asmap->numParityFailed > 1) { - RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); - /* *infoFunc = */ *createFunc = NULL; - return; - } else - if (asmap->numDataFailed + asmap->numParityFailed == 1) { - - /* if under recon & already reconstructed, redirect - * the access to the spare drive and eliminate the - * failure indication */ - failedPDA = asmap->failedPDAs[0]; - frow = failedPDA->row; - fcol = failedPDA->col; - rstat = raidPtr->status[failedPDA->row]; - prior_recon = (rstat == rf_rs_reconfigured) || ( - (rstat == rf_rs_reconstructing) ? - rf_CheckRUReconstructed(raidPtr->reconControl[frow]->reconMap, failedPDA->startSector) : 0 - ); - if (prior_recon) { - RF_RowCol_t or = failedPDA->row, oc = failedPDA->col; - RF_SectorNum_t oo = failedPDA->startSector; - - if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) { /* redirect to dist - * spare space */ - - if (failedPDA == asmap->parityInfo) { - - /* parity has failed */ - (layoutPtr->map->MapParity) (raidPtr, failedPDA->raidAddress, &failedPDA->row, - &failedPDA->col, &failedPDA->startSector, RF_REMAP); - - if (asmap->parityInfo->next) { /* redir 2nd component, - * if any */ - RF_PhysDiskAddr_t *p = asmap->parityInfo->next; - RF_SectorNum_t SUoffs = p->startSector % layoutPtr->sectorsPerStripeUnit; - p->row = failedPDA->row; - p->col = failedPDA->col; - p->startSector = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, failedPDA->startSector) + - SUoffs; /* cheating: - * startSector is not - * really a RAID address */ - } - } else - if (asmap->parityInfo->next && failedPDA == asmap->parityInfo->next) { - RF_ASSERT(0); /* should not ever - * happen */ - } else { - - /* data has failed */ - (layoutPtr->map->MapSector) (raidPtr, failedPDA->raidAddress, &failedPDA->row, - &failedPDA->col, &failedPDA->startSector, RF_REMAP); - - } - - } else { /* redirect to dedicated spare - * space */ - - failedPDA->row = raidPtr->Disks[frow][fcol].spareRow; - failedPDA->col = raidPtr->Disks[frow][fcol].spareCol; - - /* the parity may have two distinct - * components, both of which may need - * to be redirected */ - if (asmap->parityInfo->next) { - if (failedPDA == asmap->parityInfo) { - failedPDA->next->row = failedPDA->row; - failedPDA->next->col = failedPDA->col; - } else - if (failedPDA == asmap->parityInfo->next) { /* paranoid: should - * never occur */ - asmap->parityInfo->row = failedPDA->row; - asmap->parityInfo->col = failedPDA->col; - } - } - } - - RF_ASSERT(failedPDA->col != -1); - - if (rf_dagDebug || rf_mapDebug) { - printf("raid%d: Redirected type '%c' r %d c %d o %ld -> r %d c %d o %ld\n", - raidPtr->raidid, type, or, oc, - (long) oo, failedPDA->row, - failedPDA->col, - (long) failedPDA->startSector); - } - asmap->numDataFailed = asmap->numParityFailed = 0; - } - } - /* all dags begin/end with block/unblock node therefore, hdrSucc & - * termAnt counts should always be 1 also, these counts should not be - * visible outside dag creation routines - manipulating the counts - * here should be removed */ - if (type == RF_IO_TYPE_READ) { - if (asmap->numDataFailed == 0) - *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_CreateRaidFiveDegradedReadDAG; - } else { - - - /* if mirroring, always use large writes. If the access - * requires two distinct parity updates, always do a small - * write. If the stripe contains a failure but the access - * does not, do a small write. The first conditional - * (numStripeUnitsAccessed <= numDataCol/2) uses a - * less-than-or-equal rather than just a less-than because - * when G is 3 or 4, numDataCol/2 is 1, and I want - * single-stripe-unit updates to use just one disk. */ - if ((asmap->numDataFailed + asmap->numParityFailed) == 0) { - if (rf_suppressLocksAndLargeWrites || - (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) || - (asmap->parityInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) { - *createFunc = (RF_VoidFuncPtr) rf_CreateSmallWriteDAG; - } else - *createFunc = (RF_VoidFuncPtr) rf_CreateLargeWriteDAG; - } else { - if (asmap->numParityFailed == 1) - *createFunc = (RF_VoidFuncPtr) rf_CreateNonRedundantWriteDAG; - else - if (asmap->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit) - *createFunc = NULL; - else - *createFunc = (RF_VoidFuncPtr) rf_CreateDegradedWriteDAG; - } - } -} diff --git a/sys/dev/raidframe/rf_raid5.h b/sys/dev/raidframe/rf_raid5.h deleted file mode 100644 index 17549fe..0000000 --- a/sys/dev/raidframe/rf_raid5.h +++ /dev/null @@ -1,57 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_raid5.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_raid5.h - header file for RAID Level 5 */ - -#ifndef _RF__RF_RAID5_H_ -#define _RF__RF_RAID5_H_ - -int -rf_ConfigureRAID5(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int rf_GetDefaultNumFloatingReconBuffersRAID5(RF_Raid_t * raidPtr); -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitRAID5(RF_Raid_t * raidPtr); -void -rf_MapSectorRAID5(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapParityRAID5(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_IdentifyStripeRAID5(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -void -rf_MapSIDToPSIDRAID5(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru); -void -rf_RaidFiveDagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); - -#endif /* !_RF__RF_RAID5_H_ */ diff --git a/sys/dev/raidframe/rf_raid5_rotatedspare.c b/sys/dev/raidframe/rf_raid5_rotatedspare.c deleted file mode 100644 index f167a5f..0000000 --- a/sys/dev/raidframe/rf_raid5_rotatedspare.c +++ /dev/null @@ -1,177 +0,0 @@ -/* $NetBSD: rf_raid5_rotatedspare.c,v 1.5 2001/01/26 05:16:58 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Khalil Amiri - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/************************************************************************** - * - * rf_raid5_rotated_spare.c -- implements RAID Level 5 with rotated sparing - * - **************************************************************************/ - -#include <dev/raidframe/rf_archs.h> - -#if RF_INCLUDE_RAID5_RS > 0 - -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_raid5.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_utils.h> -#include <dev/raidframe/rf_raid5_rotatedspare.h> - -typedef struct RF_Raid5RSConfigInfo_s { - RF_RowCol_t **stripeIdentifier; /* filled in at config time & used by - * IdentifyStripe */ -} RF_Raid5RSConfigInfo_t; - -int -rf_ConfigureRAID5_RS( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_Raid5RSConfigInfo_t *info; - RF_RowCol_t i, j, startdisk; - - /* create a RAID level 5 configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_Raid5RSConfigInfo_t), (RF_Raid5RSConfigInfo_t *), raidPtr->cleanupList); - if (info == NULL) - return (ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - - RF_ASSERT(raidPtr->numRow == 1); - RF_ASSERT(raidPtr->numCol >= 3); - - /* the stripe identifier must identify the disks in each stripe, IN - * THE ORDER THAT THEY APPEAR IN THE STRIPE. */ - info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, raidPtr->numCol, raidPtr->cleanupList); - if (info->stripeIdentifier == NULL) - return (ENOMEM); - startdisk = 0; - for (i = 0; i < raidPtr->numCol; i++) { - for (j = 0; j < raidPtr->numCol; j++) { - info->stripeIdentifier[i][j] = (startdisk + j) % raidPtr->numCol; - } - if ((--startdisk) < 0) - startdisk = raidPtr->numCol - 1; - } - - /* fill in the remaining layout parameters */ - layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = raidPtr->numCol - 2; - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numParityCol = 1; - layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk; - raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; - - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - - return (0); -} - -RF_ReconUnitCount_t -rf_GetNumSpareRUsRAID5_RS(raidPtr) - RF_Raid_t *raidPtr; -{ - return (raidPtr->Layout.stripeUnitsPerDisk / raidPtr->numCol); -} - -void -rf_MapSectorRAID5_RS( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - - *row = 0; - if (remap) { - *col = raidPtr->numCol - 1 - (1 + SUID / raidPtr->Layout.numDataCol) % raidPtr->numCol; - *col = (*col + 1) % raidPtr->numCol; /* spare unit is rotated - * with parity; line - * above maps to parity */ - } else { - *col = (SUID + (SUID / raidPtr->Layout.numDataCol)) % raidPtr->numCol; - } - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); -} - -void -rf_MapParityRAID5_RS( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - - *row = 0; - *col = raidPtr->numCol - 1 - (1 + SUID / raidPtr->Layout.numDataCol) % raidPtr->numCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - if (remap) - *col = (*col + 1) % raidPtr->numCol; -} - -void -rf_IdentifyStripeRAID5_RS( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) -{ - RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr); - RF_Raid5RSConfigInfo_t *info = (RF_Raid5RSConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - *outRow = 0; - *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol]; - -} - -void -rf_MapSIDToPSIDRAID5_RS( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru) -{ - *which_ru = 0; - *psID = stripeID; -} -#endif /* RF_INCLUDE_RAID5_RS > 0 */ diff --git a/sys/dev/raidframe/rf_raid5_rotatedspare.h b/sys/dev/raidframe/rf_raid5_rotatedspare.h deleted file mode 100644 index 779150f..0000000 --- a/sys/dev/raidframe/rf_raid5_rotatedspare.h +++ /dev/null @@ -1,53 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_raid5_rotatedspare.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Khalil Amiri - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_raid5_rotatedspare.h - header file for RAID Level 5 with rotated sparing */ - -#ifndef _RF__RF_RAID5_ROTATEDSPARE_H_ -#define _RF__RF_RAID5_ROTATEDSPARE_H_ - -int -rf_ConfigureRAID5_RS(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -RF_ReconUnitCount_t rf_GetNumSpareRUsRAID5_RS(RF_Raid_t * raidPtr); -void -rf_MapSectorRAID5_RS(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapParityRAID5_RS(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_IdentifyStripeRAID5_RS(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -void -rf_MapSIDToPSIDRAID5_RS(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru); - -#endif /* !_RF__RF_RAID5_ROTATEDSPARE_H_ */ diff --git a/sys/dev/raidframe/rf_raidframe.h b/sys/dev/raidframe/rf_raidframe.h deleted file mode 100644 index fd711bd..0000000 --- a/sys/dev/raidframe/rf_raidframe.h +++ /dev/null @@ -1,162 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_raidframe.h,v 1.11 2000/05/28 00:48:31 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************** - * - * rf_raidframe.h - * - * main header file for using raidframe in the kernel. - * - *****************************************************/ - - -#ifndef _RF__RF_RAIDFRAME_H_ -#define _RF__RF_RAIDFRAME_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_configure.h> -#include <dev/raidframe/rf_disks.h> -#include <dev/raidframe/rf_raid.h> - -typedef RF_uint32 RF_ReconReqFlags_t; - -struct rf_recon_req { /* used to tell the kernel to fail a disk */ - RF_RowCol_t row, col; - RF_ReconReqFlags_t flags; - void *raidPtr; /* used internally; need not be set at ioctl - * time */ - struct rf_recon_req *next; /* used internally; need not be set at - * ioctl time */ -}; - -struct RF_SparetWait_s { - int C, G, fcol; /* C = # disks in row, G = # units in stripe, - * fcol = which disk has failed */ - - RF_StripeCount_t SUsPerPU; /* this stuff is the info required to - * create a spare table */ - int TablesPerSpareRegion; - int BlocksPerTable; - RF_StripeCount_t TableDepthInPUs; - RF_StripeCount_t SpareSpaceDepthPerRegionInSUs; - - RF_SparetWait_t *next; /* used internally; need not be set at ioctl - * time */ -}; - -typedef struct RF_DeviceConfig_s { - u_int rows; - u_int cols; - u_int maxqdepth; - int ndevs; - RF_RaidDisk_t devs[RF_MAX_DISKS]; - int nspares; - RF_RaidDisk_t spares[RF_MAX_DISKS]; -} RF_DeviceConfig_t; - -typedef struct RF_ProgressInfo_s { - RF_uint64 remaining; - RF_uint64 completed; - RF_uint64 total; -} RF_ProgressInfo_t; - -/* flags that can be put in the rf_recon_req structure */ -#define RF_FDFLAGS_NONE 0x0 /* just fail the disk */ -#define RF_FDFLAGS_RECON 0x1 /* fail and initiate recon */ - -#define RAIDFRAME_CONFIGURE _IOW ('r', 1, void *) /* config an array */ -#if defined(__NetBSD__) -#define RAIDFRAME_SHUTDOWN _IO ('r', 2) /* shutdown the array */ -#elif defined(__FreeBSD__) -#define RAIDFRAME_SHUTDOWN _IOW ('r', 2, int) /* shutdown the array */ -#endif -#define RAIDFRAME_TUR _IOW ('r', 3, dev_t) /* debug only: test - * ready */ -#define RAIDFRAME_TEST_ACC _IOWR('r', 4, struct rf_test_acc) - /* run a test access */ -#define RAIDFRAME_FAIL_DISK _IOW ('r', 5, struct rf_recon_req) - /* fail a disk & - * optionally start - * recon */ -#define RAIDFRAME_CHECK_RECON_STATUS _IOR('r', 6, int) /* get reconstruction % - * complete on indicated - * row */ -#define RAIDFRAME_REWRITEPARITY _IO ('r', 7) /* rewrite (initialize) - * all parity */ -#define RAIDFRAME_COPYBACK _IO ('r', 8) /* copy reconstructed - * data back to replaced - * disk */ -#define RAIDFRAME_SPARET_WAIT _IOR ('r', 9, RF_SparetWait_t) - /* does not return until - * kernel needs a spare - * table */ -#define RAIDFRAME_SEND_SPARET _IOW ('r', 10, void *) /* used to send a spare - * table down into the - * kernel */ -#define RAIDFRAME_ABORT_SPARET_WAIT _IO ('r', 11) /* used to wake up the - * sparemap daemon & - * tell it to exit */ -#define RAIDFRAME_START_ATRACE _IO ('r', 12) /* start tracing - * accesses */ -#define RAIDFRAME_STOP_ATRACE _IO ('r', 13) /* stop tracing - * accesses */ -#define RAIDFRAME_GET_SIZE _IOR ('r', 14, int) /* get size (# sectors) - * in raid device */ -#define RAIDFRAME_GET_INFO _IOWR ('r', 15, RF_DeviceConfig_t *) - /* get configuration */ -#define RAIDFRAME_RESET_ACCTOTALS _IO ('r', 16) /* reset AccTotals for - * device */ -#define RAIDFRAME_GET_ACCTOTALS _IOR ('r', 17, RF_AccTotals_t) - /* retrieve AccTotals - * for device */ -#define RAIDFRAME_KEEP_ACCTOTALS _IOW ('r', 18, int) /* turn AccTotals on or - * off for device */ -#define RAIDFRAME_GET_COMPONENT_LABEL _IOWR ('r', 19, RF_ComponentLabel_t) -#define RAIDFRAME_SET_COMPONENT_LABEL _IOW ('r', 20, RF_ComponentLabel_t) - -#define RAIDFRAME_INIT_LABELS _IOW ('r', 21, RF_ComponentLabel_t) -#define RAIDFRAME_ADD_HOT_SPARE _IOW ('r', 22, RF_SingleComponent_t) -#define RAIDFRAME_REMOVE_HOT_SPARE _IOW ('r', 23, RF_SingleComponent_t) -#define RAIDFRAME_REBUILD_IN_PLACE _IOW ('r', 24, RF_SingleComponent_t) -#define RAIDFRAME_CHECK_PARITY _IOWR ('r', 25, int) -#define RAIDFRAME_CHECK_PARITYREWRITE_STATUS _IOR ('r', 26, int) -#define RAIDFRAME_CHECK_COPYBACK_STATUS _IOR ('r', 27, int) -#define RAIDFRAME_SET_AUTOCONFIG _IOWR ('r', 28, int) -#define RAIDFRAME_SET_ROOT _IOWR ('r', 29, int) -#define RAIDFRAME_DELETE_COMPONENT _IOW ('r', 30, RF_SingleComponent_t) -#define RAIDFRAME_INCORPORATE_HOT_SPARE _IOW ('r', 31, RF_SingleComponent_t) - -/* 'Extended' status versions */ -#define RAIDFRAME_CHECK_RECON_STATUS_EXT _IOR('r', 32, RF_ProgressInfo_t) -#define RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT _IOR ('r', 33, \ - RF_ProgressInfo_t) -#define RAIDFRAME_CHECK_COPYBACK_STATUS_EXT _IOR ('r', 34, RF_ProgressInfo_t) -#define RAIDFRAME_GET_UNIT _IOWR ('r', 35, int) - -#endif /* !_RF__RF_RAIDFRAME_H_ */ diff --git a/sys/dev/raidframe/rf_reconbuffer.c b/sys/dev/raidframe/rf_reconbuffer.c deleted file mode 100644 index 5831d5a..0000000 --- a/sys/dev/raidframe/rf_reconbuffer.c +++ /dev/null @@ -1,468 +0,0 @@ -/* $NetBSD: rf_reconbuffer.c,v 1.5 2001/01/27 20:10:49 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/*************************************************** - * - * rf_reconbuffer.c -- reconstruction buffer manager - * - ***************************************************/ - -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_reconbuffer.h> -#include <dev/raidframe/rf_acctrace.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_debugprint.h> -#include <dev/raidframe/rf_revent.h> -#include <dev/raidframe/rf_reconutil.h> -#include <dev/raidframe/rf_nwayxor.h> - -#define Dprintf1(s,a) if (rf_reconbufferDebug) printf(s,a) -#define Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s,a,b) -#define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s,a,b,c) -#define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s,a,b,c,d) -#define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s,a,b,c,d,e) - -/***************************************************************************** - * - * Submit a reconstruction buffer to the manager for XOR. We can only - * submit a buffer if (1) we can xor into an existing buffer, which - * means we don't have to acquire a new one, (2) we can acquire a - * floating recon buffer, or (3) the caller has indicated that we are - * allowed to keep the submitted buffer. - * - * Returns non-zero if and only if we were not able to submit. - * In this case, we append the current disk ID to the wait list on the - * indicated RU, so that it will be re-enabled when we acquire a buffer - * for this RU. - * - ****************************************************************************/ - -/* - * nWayXorFuncs[i] is a pointer to a function that will xor "i" - * bufs into the accumulating sum. - */ -static RF_VoidFuncPtr nWayXorFuncs[] = { - NULL, - (RF_VoidFuncPtr) rf_nWayXor1, - (RF_VoidFuncPtr) rf_nWayXor2, - (RF_VoidFuncPtr) rf_nWayXor3, - (RF_VoidFuncPtr) rf_nWayXor4, - (RF_VoidFuncPtr) rf_nWayXor5, - (RF_VoidFuncPtr) rf_nWayXor6, - (RF_VoidFuncPtr) rf_nWayXor7, - (RF_VoidFuncPtr) rf_nWayXor8, - (RF_VoidFuncPtr) rf_nWayXor9 -}; - -int -rf_SubmitReconBuffer(rbuf, keep_it, use_committed) - RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */ - int keep_it; /* whether we can keep this buffer or we have - * to return it */ - int use_committed; /* whether to use a committed or an available - * recon buffer */ -{ - RF_LayoutSW_t *lp; - int rc; - - lp = rbuf->raidPtr->Layout.map; - rc = lp->SubmitReconBuffer(rbuf, keep_it, use_committed); - return (rc); -} - -int -rf_SubmitReconBufferBasic(rbuf, keep_it, use_committed) - RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */ - int keep_it; /* whether we can keep this buffer or we have - * to return it */ - int use_committed; /* whether to use a committed or an available - * recon buffer */ -{ - RF_Raid_t *raidPtr = rbuf->raidPtr; - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[rbuf->row]; - RF_ReconParityStripeStatus_t *pssPtr; - RF_ReconBuffer_t *targetRbuf, *t = NULL; /* temporary rbuf - * pointers */ - caddr_t ta; /* temporary data buffer pointer */ - RF_CallbackDesc_t *cb, *p; - int retcode = 0, created = 0; - - RF_Etimer_t timer; - - /* makes no sense to have a submission from the failed disk */ - RF_ASSERT(rbuf); - RF_ASSERT(rbuf->col != reconCtrlPtr->fcol); - - Dprintf5("RECON: submission by row %d col %d for psid %ld ru %d (failed offset %ld)\n", - rbuf->row, rbuf->col, (long) rbuf->parityStripeID, rbuf->which_ru, (long) rbuf->failedDiskSectorOffset); - - RF_LOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); - - RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); - - pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created); - RF_ASSERT(pssPtr); /* if it didn't exist, we wouldn't have gotten - * an rbuf for it */ - - /* check to see if enough buffers have accumulated to do an XOR. If - * so, there's no need to acquire a floating rbuf. Before we can do - * any XORing, we must have acquired a destination buffer. If we - * have, then we can go ahead and do the XOR if (1) including this - * buffer, enough bufs have accumulated, or (2) this is the last - * submission for this stripe. Otherwise, we have to go acquire a - * floating rbuf. */ - - targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; - if ((targetRbuf != NULL) && - ((pssPtr->xorBufCount == rf_numBufsToAccumulate - 1) || (targetRbuf->count + pssPtr->xorBufCount + 1 == layoutPtr->numDataCol))) { - pssPtr->rbufsForXor[pssPtr->xorBufCount++] = rbuf; /* install this buffer */ - Dprintf3("RECON: row %d col %d invoking a %d-way XOR\n", rbuf->row, rbuf->col, pssPtr->xorBufCount); - RF_ETIMER_START(timer); - rf_MultiWayReconXor(raidPtr, pssPtr); - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - raidPtr->accumXorTimeUs += RF_ETIMER_VAL_US(timer); - if (!keep_it) { - raidPtr->recon_tracerecs[rbuf->col].xor_us = RF_ETIMER_VAL_US(timer); - RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us += - RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - - rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]); - } - rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol); - - /* if use_committed is on, we _must_ consume a buffer off the - * committed list. */ - if (use_committed) { - t = reconCtrlPtr->committedRbufs; - RF_ASSERT(t); - reconCtrlPtr->committedRbufs = t->next; - rf_ReleaseFloatingReconBuffer(raidPtr, rbuf->row, t); - } - if (keep_it) { - RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); - RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); - rf_FreeReconBuffer(rbuf); - return (retcode); - } - goto out; - } - /* set the value of "t", which we'll use as the rbuf from here on */ - if (keep_it) { - t = rbuf; - } else { - if (use_committed) { /* if a buffer has been committed to - * us, use it */ - t = reconCtrlPtr->committedRbufs; - RF_ASSERT(t); - reconCtrlPtr->committedRbufs = t->next; - t->next = NULL; - } else - if (reconCtrlPtr->floatingRbufs) { - t = reconCtrlPtr->floatingRbufs; - reconCtrlPtr->floatingRbufs = t->next; - t->next = NULL; - } - } - - /* If we weren't able to acquire a buffer, append to the end of the - * buf list in the recon ctrl struct. */ - if (!t) { - RF_ASSERT(!keep_it && !use_committed); - Dprintf2("RECON: row %d col %d failed to acquire floating rbuf\n", rbuf->row, rbuf->col); - - raidPtr->procsInBufWait++; - if ((raidPtr->procsInBufWait == raidPtr->numCol - 1) && (raidPtr->numFullReconBuffers == 0)) { - printf("Buffer wait deadlock detected. Exiting.\n"); - rf_PrintPSStatusTable(raidPtr, rbuf->row); - RF_PANIC(); - } - pssPtr->flags |= RF_PSS_BUFFERWAIT; - cb = rf_AllocCallbackDesc(); /* append to buf wait list in - * recon ctrl structure */ - cb->row = rbuf->row; - cb->col = rbuf->col; - cb->callbackArg.v = rbuf->parityStripeID; - cb->callbackArg2.v = rbuf->which_ru; - cb->next = NULL; - if (!reconCtrlPtr->bufferWaitList) - reconCtrlPtr->bufferWaitList = cb; - else { /* might want to maintain head/tail pointers - * here rather than search for end of list */ - for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next); - p->next = cb; - } - retcode = 1; - goto out; - } - Dprintf2("RECON: row %d col %d acquired rbuf\n", rbuf->row, rbuf->col); - RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us += - RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - - rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]); - - /* initialize the buffer */ - if (t != rbuf) { - t->row = rbuf->row; - t->col = reconCtrlPtr->fcol; - t->parityStripeID = rbuf->parityStripeID; - t->which_ru = rbuf->which_ru; - t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset; - t->spRow = rbuf->spRow; - t->spCol = rbuf->spCol; - t->spOffset = rbuf->spOffset; - - ta = t->buffer; - t->buffer = rbuf->buffer; - rbuf->buffer = ta; /* swap buffers */ - } - /* the first installation always gets installed as the destination - * buffer. subsequent installations get stacked up to allow for - * multi-way XOR */ - if (!pssPtr->rbuf) { - pssPtr->rbuf = t; - t->count = 1; - } else - pssPtr->rbufsForXor[pssPtr->xorBufCount++] = t; /* install this buffer */ - - rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol); /* the buffer is full if - * G=2 */ - -out: - RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); - RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); - return (retcode); -} - -int -rf_MultiWayReconXor(raidPtr, pssPtr) - RF_Raid_t *raidPtr; - RF_ReconParityStripeStatus_t *pssPtr; /* the pss descriptor for this - * parity stripe */ -{ - int i, numBufs = pssPtr->xorBufCount; - int numBytes = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU); - RF_ReconBuffer_t **rbufs = (RF_ReconBuffer_t **) pssPtr->rbufsForXor; - RF_ReconBuffer_t *targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; - - RF_ASSERT(pssPtr->rbuf != NULL); - RF_ASSERT(numBufs > 0 && numBufs < RF_PS_MAX_BUFS); -#ifdef _KERNEL -#ifndef __NetBSD__ -#ifndef __FreeBSD__ - thread_block(); /* yield the processor before doing a big XOR */ -#endif -#endif -#endif /* _KERNEL */ - /* - * XXX - * - * What if more than 9 bufs? - */ - nWayXorFuncs[numBufs] (pssPtr->rbufsForXor, targetRbuf, numBytes / sizeof(long)); - - /* release all the reconstruction buffers except the last one, which - * belongs to the disk whose submission caused this XOR to take place */ - for (i = 0; i < numBufs - 1; i++) { - if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING) - rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]->row, rbufs[i]); - else - if (rbufs[i]->type == RF_RBUF_TYPE_FORCED) - rf_FreeReconBuffer(rbufs[i]); - else - RF_ASSERT(0); - } - targetRbuf->count += pssPtr->xorBufCount; - pssPtr->xorBufCount = 0; - return (0); -} -/* removes one full buffer from one of the full-buffer lists and returns it. - * - * ASSUMES THE RB_MUTEX IS UNLOCKED AT ENTRY. - */ -RF_ReconBuffer_t * -rf_GetFullReconBuffer(reconCtrlPtr) - RF_ReconCtrl_t *reconCtrlPtr; -{ - RF_ReconBuffer_t *p; - - RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); - - if ((p = reconCtrlPtr->priorityList) != NULL) { - reconCtrlPtr->priorityList = p->next; - p->next = NULL; - goto out; - } - if ((p = reconCtrlPtr->fullBufferList) != NULL) { - reconCtrlPtr->fullBufferList = p->next; - p->next = NULL; - goto out; - } -out: - RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); - return (p); -} - - -/* if the reconstruction buffer is full, move it to the full list, - * which is maintained sorted by failed disk sector offset - * - * ASSUMES THE RB_MUTEX IS LOCKED AT ENTRY. */ -int -rf_CheckForFullRbuf(raidPtr, reconCtrl, pssPtr, numDataCol) - RF_Raid_t *raidPtr; - RF_ReconCtrl_t *reconCtrl; - RF_ReconParityStripeStatus_t *pssPtr; - int numDataCol; -{ - RF_ReconBuffer_t *p, *pt, *rbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; - - if (rbuf->count == numDataCol) { - raidPtr->numFullReconBuffers++; - Dprintf2("RECON: rbuf for psid %ld ru %d has filled\n", - (long) rbuf->parityStripeID, rbuf->which_ru); - if (!reconCtrl->fullBufferList || (rbuf->failedDiskSectorOffset < reconCtrl->fullBufferList->failedDiskSectorOffset)) { - Dprintf2("RECON: rbuf for psid %ld ru %d is head of list\n", - (long) rbuf->parityStripeID, rbuf->which_ru); - rbuf->next = reconCtrl->fullBufferList; - reconCtrl->fullBufferList = rbuf; - } else { - for (pt = reconCtrl->fullBufferList, p = pt->next; p && p->failedDiskSectorOffset < rbuf->failedDiskSectorOffset; pt = p, p = p->next); - rbuf->next = p; - pt->next = rbuf; - Dprintf2("RECON: rbuf for psid %ld ru %d is in list\n", - (long) rbuf->parityStripeID, rbuf->which_ru); - } -#if 0 - pssPtr->writeRbuf = pssPtr->rbuf; /* DEBUG ONLY: we like - * to be able to find - * this rbuf while it's - * awaiting write */ -#else - rbuf->pssPtr = pssPtr; -#endif - pssPtr->rbuf = NULL; - rf_CauseReconEvent(raidPtr, rbuf->row, rbuf->col, NULL, RF_REVENT_BUFREADY); - } - return (0); -} - - -/* release a floating recon buffer for someone else to use. - * assumes the rb_mutex is LOCKED at entry - */ -void -rf_ReleaseFloatingReconBuffer(raidPtr, row, rbuf) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_ReconBuffer_t *rbuf; -{ - RF_ReconCtrl_t *rcPtr = raidPtr->reconControl[row]; - RF_CallbackDesc_t *cb; - - Dprintf2("RECON: releasing rbuf for psid %ld ru %d\n", - (long) rbuf->parityStripeID, rbuf->which_ru); - - /* if anyone is waiting on buffers, wake one of them up. They will - * subsequently wake up anyone else waiting on their RU */ - if (rcPtr->bufferWaitList) { - rbuf->next = rcPtr->committedRbufs; - rcPtr->committedRbufs = rbuf; - cb = rcPtr->bufferWaitList; - rcPtr->bufferWaitList = cb->next; - rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 1, RF_REVENT_BUFCLEAR); /* arg==1 => we've - * committed a buffer */ - rf_FreeCallbackDesc(cb); - raidPtr->procsInBufWait--; - } else { - rbuf->next = rcPtr->floatingRbufs; - rcPtr->floatingRbufs = rbuf; - } -} -/* release any disk that is waiting on a buffer for the indicated RU. - * assumes the rb_mutex is LOCKED at entry - */ -void -rf_ReleaseBufferWaiters(raidPtr, pssPtr) - RF_Raid_t *raidPtr; - RF_ReconParityStripeStatus_t *pssPtr; -{ - RF_CallbackDesc_t *cb1, *cb = pssPtr->bufWaitList; - - Dprintf2("RECON: releasing buf waiters for psid %ld ru %d\n", - (long) pssPtr->parityStripeID, pssPtr->which_ru); - pssPtr->flags &= ~RF_PSS_BUFFERWAIT; - while (cb) { - cb1 = cb->next; - cb->next = NULL; - rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFCLEAR); /* arg==0 => we haven't - * committed a buffer */ - rf_FreeCallbackDesc(cb); - cb = cb1; - } - pssPtr->bufWaitList = NULL; -} -/* when reconstruction is forced on an RU, there may be some disks waiting to - * acquire a buffer for that RU. Since we allocate a new buffer as part of - * the forced-reconstruction process, we no longer have to wait for any - * buffers, so we wakeup any waiter that we find in the bufferWaitList - * - * assumes the rb_mutex is LOCKED at entry - */ -void -rf_ReleaseBufferWaiter(rcPtr, rbuf) - RF_ReconCtrl_t *rcPtr; - RF_ReconBuffer_t *rbuf; -{ - RF_CallbackDesc_t *cb, *cbt; - - for (cbt = NULL, cb = rcPtr->bufferWaitList; cb; cbt = cb, cb = cb->next) { - if ((cb->callbackArg.v == rbuf->parityStripeID) && (cb->callbackArg2.v == rbuf->which_ru)) { - Dprintf2("RECON: Dropping row %d col %d from buffer wait list\n", cb->row, cb->col); - if (cbt) - cbt->next = cb->next; - else - rcPtr->bufferWaitList = cb->next; - rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFREADY); /* arg==0 => no - * committed buffer */ - rf_FreeCallbackDesc(cb); - return; - } - } -} diff --git a/sys/dev/raidframe/rf_reconbuffer.h b/sys/dev/raidframe/rf_reconbuffer.h deleted file mode 100644 index 1a5407e..0000000 --- a/sys/dev/raidframe/rf_reconbuffer.h +++ /dev/null @@ -1,63 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_reconbuffer.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/******************************************************************* - * - * rf_reconbuffer.h -- header file for reconstruction buffer manager - * - *******************************************************************/ - -#ifndef _RF__RF_RECONBUFFER_H_ -#define _RF__RF_RECONBUFFER_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_reconstruct.h> - -int -rf_SubmitReconBuffer(RF_ReconBuffer_t * rbuf, int keep_int, - int use_committed); -int -rf_SubmitReconBufferBasic(RF_ReconBuffer_t * rbuf, int keep_int, - int use_committed); -int -rf_MultiWayReconXor(RF_Raid_t * raidPtr, - RF_ReconParityStripeStatus_t * pssPtr); -RF_ReconBuffer_t *rf_GetFullReconBuffer(RF_ReconCtrl_t * reconCtrlPtr); -int -rf_CheckForFullRbuf(RF_Raid_t * raidPtr, RF_ReconCtrl_t * reconCtrl, - RF_ReconParityStripeStatus_t * pssPtr, int numDataCol); -void -rf_ReleaseFloatingReconBuffer(RF_Raid_t * raidPtr, RF_RowCol_t row, - RF_ReconBuffer_t * rbuf); -void -rf_ReleaseBufferWaiters(RF_Raid_t * raidPtr, - RF_ReconParityStripeStatus_t * pssPtr); -void rf_ReleaseBufferWaiter(RF_ReconCtrl_t * rcPtr, RF_ReconBuffer_t * rbuf); - -#endif /* !_RF__RF_RECONBUFFER_H_ */ diff --git a/sys/dev/raidframe/rf_reconmap.c b/sys/dev/raidframe/rf_reconmap.c deleted file mode 100644 index 261d339..0000000 --- a/sys/dev/raidframe/rf_reconmap.c +++ /dev/null @@ -1,396 +0,0 @@ -/* $NetBSD: rf_reconmap.c,v 1.6 1999/08/14 21:44:24 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/************************************************************************* - * rf_reconmap.c - * - * code to maintain a map of what sectors have/have not been reconstructed - * - *************************************************************************/ - -#include <dev/raidframe/rf_raid.h> -#include <sys/time.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_utils.h> - -/* special pointer values indicating that a reconstruction unit - * has been either totally reconstructed or not at all. Both - * are illegal pointer values, so you have to be careful not to - * dereference through them. RU_NOTHING must be zero, since - * MakeReconMap uses bzero to initialize the structure. These are used - * only at the head of the list. - */ -#define RU_ALL ((RF_ReconMapListElem_t *) -1) -#define RU_NOTHING ((RF_ReconMapListElem_t *) 0) - -/* used to mark the end of the list */ -#define RU_NIL ((RF_ReconMapListElem_t *) 0) - - -static void -compact_stat_entry(RF_Raid_t * raidPtr, RF_ReconMap_t * mapPtr, - int i); -static void crunch_list(RF_ReconMap_t * mapPtr, RF_ReconMapListElem_t * listPtr); -static RF_ReconMapListElem_t * -MakeReconMapListElem(RF_SectorNum_t startSector, - RF_SectorNum_t stopSector, RF_ReconMapListElem_t * next); -static void -FreeReconMapListElem(RF_ReconMap_t * mapPtr, - RF_ReconMapListElem_t * p); -static void update_size(RF_ReconMap_t * mapPtr, int size); -static void PrintList(RF_ReconMapListElem_t * listPtr); - -/*----------------------------------------------------------------------------- - * - * Creates and initializes new Reconstruction map - * - *-----------------------------------------------------------------------------*/ - -RF_ReconMap_t * -rf_MakeReconMap(raidPtr, ru_sectors, disk_sectors, spareUnitsPerDisk) - RF_Raid_t *raidPtr; - RF_SectorCount_t ru_sectors; /* size of reconstruction unit in - * sectors */ - RF_SectorCount_t disk_sectors; /* size of disk in sectors */ - RF_ReconUnitCount_t spareUnitsPerDisk; /* zero unless distributed - * sparing */ -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_ReconUnitCount_t num_rus = layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerRU; - RF_ReconMap_t *p; - int rc; - - RF_Malloc(p, sizeof(RF_ReconMap_t), (RF_ReconMap_t *)); - p->sectorsPerReconUnit = ru_sectors; - p->sectorsInDisk = disk_sectors; - - p->totalRUs = num_rus; - p->spareRUs = spareUnitsPerDisk; - p->unitsLeft = num_rus - spareUnitsPerDisk; - - RF_Malloc(p->status, num_rus * sizeof(RF_ReconMapListElem_t *), (RF_ReconMapListElem_t **)); - RF_ASSERT(p->status != (RF_ReconMapListElem_t **) NULL); - - (void) bzero((char *) p->status, num_rus * sizeof(RF_ReconMapListElem_t *)); - - p->size = sizeof(RF_ReconMap_t) + num_rus * sizeof(RF_ReconMapListElem_t *); - p->maxSize = p->size; - - rc = rf_mutex_init(&p->mutex, __FUNCTION__); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - RF_Free(p->status, num_rus * sizeof(RF_ReconMapListElem_t *)); - RF_Free(p, sizeof(RF_ReconMap_t)); - return (NULL); - } - return (p); -} - - -/*----------------------------------------------------------------------------- - * - * marks a new set of sectors as reconstructed. All the possible mergings get - * complicated. To simplify matters, the approach I take is to just dump - * something into the list, and then clean it up (i.e. merge elements and - * eliminate redundant ones) in a second pass over the list (compact_stat_entry()). - * Not 100% efficient, since a structure can be allocated and then immediately - * freed, but it keeps this code from becoming (more of) a nightmare of - * special cases. The only thing that compact_stat_entry() assumes is that the - * list is sorted by startSector, and so this is the only condition I maintain - * here. (MCH) - * - *-----------------------------------------------------------------------------*/ - -void -rf_ReconMapUpdate(raidPtr, mapPtr, startSector, stopSector) - RF_Raid_t *raidPtr; - RF_ReconMap_t *mapPtr; - RF_SectorNum_t startSector; - RF_SectorNum_t stopSector; -{ - RF_SectorCount_t sectorsPerReconUnit = mapPtr->sectorsPerReconUnit; - RF_SectorNum_t i, first_in_RU, last_in_RU; - RF_ReconMapListElem_t *p, *pt; - - RF_LOCK_MUTEX(mapPtr->mutex); - RF_ASSERT(startSector >= 0 && stopSector < mapPtr->sectorsInDisk && stopSector >= startSector); - - while (startSector <= stopSector) { - i = startSector / mapPtr->sectorsPerReconUnit; - first_in_RU = i * sectorsPerReconUnit; - last_in_RU = first_in_RU + sectorsPerReconUnit - 1; - p = mapPtr->status[i]; - if (p != RU_ALL) { - if (p == RU_NOTHING || p->startSector > startSector) { /* insert at front of - * list */ - - mapPtr->status[i] = MakeReconMapListElem(startSector, RF_MIN(stopSector, last_in_RU), (p == RU_NOTHING) ? NULL : p); - update_size(mapPtr, sizeof(RF_ReconMapListElem_t)); - - } else {/* general case */ - do { /* search for place to insert */ - pt = p; - p = p->next; - } while (p && (p->startSector < startSector)); - pt->next = MakeReconMapListElem(startSector, RF_MIN(stopSector, last_in_RU), p); - update_size(mapPtr, sizeof(RF_ReconMapListElem_t)); - } - compact_stat_entry(raidPtr, mapPtr, i); - } - startSector = RF_MIN(stopSector, last_in_RU) + 1; - } - RF_UNLOCK_MUTEX(mapPtr->mutex); -} - - - -/*----------------------------------------------------------------------------- - * - * performs whatever list compactions can be done, and frees any space - * that is no longer necessary. Assumes only that the list is sorted - * by startSector. crunch_list() compacts a single list as much as possible, - * and the second block of code deletes the entire list if possible. - * crunch_list() is also called from MakeReconMapAccessList(). - * - * When a recon unit is detected to be fully reconstructed, we set the - * corresponding bit in the parity stripe map so that the head follow - * code will not select this parity stripe again. This is redundant (but - * harmless) when compact_stat_entry is called from the reconstruction code, - * but necessary when called from the user-write code. - * - *-----------------------------------------------------------------------------*/ - -static void -compact_stat_entry(raidPtr, mapPtr, i) - RF_Raid_t *raidPtr; - RF_ReconMap_t *mapPtr; - int i; -{ - RF_SectorCount_t sectorsPerReconUnit = mapPtr->sectorsPerReconUnit; - RF_ReconMapListElem_t *p = mapPtr->status[i]; - - crunch_list(mapPtr, p); - - if ((p->startSector == i * sectorsPerReconUnit) && - (p->stopSector == i * sectorsPerReconUnit + sectorsPerReconUnit - 1)) { - mapPtr->status[i] = RU_ALL; - mapPtr->unitsLeft--; - FreeReconMapListElem(mapPtr, p); - } -} - -static void -crunch_list(mapPtr, listPtr) - RF_ReconMap_t *mapPtr; - RF_ReconMapListElem_t *listPtr; -{ - RF_ReconMapListElem_t *pt, *p = listPtr; - - if (!p) - return; - pt = p; - p = p->next; - while (p) { - if (pt->stopSector >= p->startSector - 1) { - pt->stopSector = RF_MAX(pt->stopSector, p->stopSector); - pt->next = p->next; - FreeReconMapListElem(mapPtr, p); - p = pt->next; - } else { - pt = p; - p = p->next; - } - } -} -/*----------------------------------------------------------------------------- - * - * Allocate and fill a new list element - * - *-----------------------------------------------------------------------------*/ - -static RF_ReconMapListElem_t * -MakeReconMapListElem( - RF_SectorNum_t startSector, - RF_SectorNum_t stopSector, - RF_ReconMapListElem_t * next) -{ - RF_ReconMapListElem_t *p; - - RF_Malloc(p, sizeof(RF_ReconMapListElem_t), (RF_ReconMapListElem_t *)); - if (p == NULL) - return (NULL); - p->startSector = startSector; - p->stopSector = stopSector; - p->next = next; - return (p); -} -/*----------------------------------------------------------------------------- - * - * Free a list element - * - *-----------------------------------------------------------------------------*/ - -static void -FreeReconMapListElem(mapPtr, p) - RF_ReconMap_t *mapPtr; - RF_ReconMapListElem_t *p; -{ - int delta; - - if (mapPtr) { - delta = 0 - (int) sizeof(RF_ReconMapListElem_t); - update_size(mapPtr, delta); - } - RF_Free(p, sizeof(*p)); -} -/*----------------------------------------------------------------------------- - * - * Free an entire status structure. Inefficient, but can be called at any time. - * - *-----------------------------------------------------------------------------*/ -void -rf_FreeReconMap(mapPtr) - RF_ReconMap_t *mapPtr; -{ - RF_ReconMapListElem_t *p, *q; - RF_ReconUnitCount_t numRUs; - RF_ReconUnitNum_t i; - - numRUs = mapPtr->sectorsInDisk / mapPtr->sectorsPerReconUnit; - if (mapPtr->sectorsInDisk % mapPtr->sectorsPerReconUnit) - numRUs++; - - for (i = 0; i < numRUs; i++) { - p = mapPtr->status[i]; - while (p != RU_NOTHING && p != RU_ALL) { - q = p; - p = p->next; - RF_Free(q, sizeof(*q)); - } - } - rf_mutex_destroy(&mapPtr->mutex); - RF_Free(mapPtr->status, mapPtr->totalRUs * sizeof(RF_ReconMapListElem_t *)); - RF_Free(mapPtr, sizeof(RF_ReconMap_t)); -} -/*----------------------------------------------------------------------------- - * - * returns nonzero if the indicated RU has been reconstructed already - * - *---------------------------------------------------------------------------*/ - -int -rf_CheckRUReconstructed(mapPtr, startSector) - RF_ReconMap_t *mapPtr; - RF_SectorNum_t startSector; -{ - RF_ReconMapListElem_t *l; /* used for searching */ - RF_ReconUnitNum_t i; - - i = startSector / mapPtr->sectorsPerReconUnit; - l = mapPtr->status[i]; - return ((l == RU_ALL) ? 1 : 0); -} - -RF_ReconUnitCount_t -rf_UnitsLeftToReconstruct(mapPtr) - RF_ReconMap_t *mapPtr; -{ - RF_ASSERT(mapPtr != NULL); - return (mapPtr->unitsLeft); -} -/* updates the size fields of a status descriptor */ -static void -update_size(mapPtr, size) - RF_ReconMap_t *mapPtr; - int size; -{ - mapPtr->size += size; - mapPtr->maxSize = RF_MAX(mapPtr->size, mapPtr->maxSize); -} - -static void -PrintList(listPtr) - RF_ReconMapListElem_t *listPtr; -{ - while (listPtr) { - printf("%d,%d -> ", (int) listPtr->startSector, (int) listPtr->stopSector); - listPtr = listPtr->next; - } - printf("\n"); -} - -void -rf_PrintReconMap(raidPtr, mapPtr, frow, fcol) - RF_Raid_t *raidPtr; - RF_ReconMap_t *mapPtr; - RF_RowCol_t frow; - RF_RowCol_t fcol; -{ - RF_ReconUnitCount_t numRUs; - RF_ReconMapListElem_t *p; - RF_ReconUnitNum_t i; - - numRUs = mapPtr->totalRUs; - if (mapPtr->sectorsInDisk % mapPtr->sectorsPerReconUnit) - numRUs++; - - for (i = 0; i < numRUs; i++) { - p = mapPtr->status[i]; - if (p == RU_ALL)/* printf("[%d] ALL\n",i) */ - ; - else - if (p == RU_NOTHING) { - printf("%d: Unreconstructed\n", i); - } else { - printf("%d: ", i); - PrintList(p); - } - } -} - -void -rf_PrintReconSchedule(mapPtr, starttime) - RF_ReconMap_t *mapPtr; - struct timeval *starttime; -{ - static int old_pctg = -1; - struct timeval tv, diff; - int new_pctg; - - new_pctg = 100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 / mapPtr->totalRUs); - if (new_pctg != old_pctg) { - RF_GETTIME(tv); - RF_TIMEVAL_DIFF(starttime, &tv, &diff); - printf("%d %d.%06d\n", (int) new_pctg, (int) diff.tv_sec, (int) diff.tv_usec); - old_pctg = new_pctg; - } -} diff --git a/sys/dev/raidframe/rf_reconmap.h b/sys/dev/raidframe/rf_reconmap.h deleted file mode 100644 index 2fee059..0000000 --- a/sys/dev/raidframe/rf_reconmap.h +++ /dev/null @@ -1,86 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_reconmap.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/****************************************************************************** - * rf_reconMap.h -- Header file describing reconstruction status data structure - ******************************************************************************/ - -#ifndef _RF__RF_RECONMAP_H_ -#define _RF__RF_RECONMAP_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> - -/* - * Main reconstruction status descriptor. size and maxsize are used for - * monitoring only: they have no function for reconstruction. - */ -struct RF_ReconMap_s { - RF_SectorCount_t sectorsPerReconUnit; /* sectors per reconstruct - * unit */ - RF_SectorCount_t sectorsInDisk; /* total sectors in disk */ - RF_SectorCount_t unitsLeft; /* recon units left to recon */ - RF_ReconUnitCount_t totalRUs; /* total recon units on disk */ - RF_ReconUnitCount_t spareRUs; /* total number of spare RUs on failed - * disk */ - RF_StripeCount_t totalParityStripes; /* total number of parity - * stripes in array */ - u_int size; /* overall size of this structure */ - u_int maxSize; /* maximum size so far */ - RF_ReconMapListElem_t **status; /* array of ptrs to list elements */ - RF_DECLARE_MUTEX(mutex) -}; -/* a list element */ -struct RF_ReconMapListElem_s { - RF_SectorNum_t startSector; /* bounding sect nums on this block */ - RF_SectorNum_t stopSector; - RF_ReconMapListElem_t *next; /* next element in list */ -}; - -RF_ReconMap_t * -rf_MakeReconMap(RF_Raid_t * raidPtr, RF_SectorCount_t ru_sectors, - RF_SectorCount_t disk_sectors, RF_ReconUnitCount_t spareUnitsPerDisk); - -void -rf_ReconMapUpdate(RF_Raid_t * raidPtr, RF_ReconMap_t * mapPtr, - RF_SectorNum_t startSector, RF_SectorNum_t stopSector); - -void rf_FreeReconMap(RF_ReconMap_t * mapPtr); - -int rf_CheckRUReconstructed(RF_ReconMap_t * mapPtr, RF_SectorNum_t startSector); - -RF_ReconUnitCount_t rf_UnitsLeftToReconstruct(RF_ReconMap_t * mapPtr); - -void -rf_PrintReconMap(RF_Raid_t * raidPtr, RF_ReconMap_t * mapPtr, - RF_RowCol_t frow, RF_RowCol_t fcol); - -void rf_PrintReconSchedule(RF_ReconMap_t * mapPtr, struct timeval * starttime); - -#endif /* !_RF__RF_RECONMAP_H_ */ diff --git a/sys/dev/raidframe/rf_reconstruct.c b/sys/dev/raidframe/rf_reconstruct.c deleted file mode 100644 index e24d440..0000000 --- a/sys/dev/raidframe/rf_reconstruct.c +++ /dev/null @@ -1,1682 +0,0 @@ -/* $NetBSD: rf_reconstruct.c,v 1.27 2001/01/26 02:16:24 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/************************************************************ - * - * rf_reconstruct.c -- code to perform on-line reconstruction - * - ************************************************************/ - -#include <dev/raidframe/rf_types.h> -#include <sys/time.h> -#if defined(__FreeBSD__) -#include <sys/systm.h> -#if __FreeBSD_version > 500005 -#include <sys/bio.h> -#endif -#endif -#include <sys/buf.h> -#include <sys/errno.h> - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/proc.h> -#if defined(__NetBSD__) -#include <sys/ioctl.h> -#elif defined(__FreeBSD__) -#include <sys/ioccom.h> -#endif -#include <sys/fcntl.h> -#include <sys/vnode.h> - - -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_reconutil.h> -#include <dev/raidframe/rf_revent.h> -#include <dev/raidframe/rf_reconbuffer.h> -#include <dev/raidframe/rf_acctrace.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_desc.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_freelist.h> -#include <dev/raidframe/rf_debugprint.h> -#include <dev/raidframe/rf_driver.h> -#include <dev/raidframe/rf_utils.h> -#include <dev/raidframe/rf_shutdown.h> - -#include <dev/raidframe/rf_kintf.h> - -/* setting these to -1 causes them to be set to their default values if not set by debug options */ - -#define Dprintf(s) if (rf_reconDebug) rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf1(s,a) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf2(s,a,b) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf3(s,a,b,c) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL) -#define Dprintf4(s,a,b,c,d) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),NULL,NULL,NULL,NULL) -#define Dprintf5(s,a,b,c,d,e) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),NULL,NULL,NULL) -#define Dprintf6(s,a,b,c,d,e,f) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),NULL,NULL) -#define Dprintf7(s,a,b,c,d,e,f,g) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),(void *)((unsigned long)g),NULL) - -#define DDprintf1(s,a) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL) -#define DDprintf2(s,a,b) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL) - -static RF_FreeList_t *rf_recond_freelist; -#define RF_MAX_FREE_RECOND 4 -#define RF_RECOND_INC 1 - -static RF_RaidReconDesc_t * -AllocRaidReconDesc(RF_Raid_t * raidPtr, - RF_RowCol_t row, RF_RowCol_t col, RF_RaidDisk_t * spareDiskPtr, - int numDisksDone, RF_RowCol_t srow, RF_RowCol_t scol); -static void FreeReconDesc(RF_RaidReconDesc_t * reconDesc); -static int -ProcessReconEvent(RF_Raid_t * raidPtr, RF_RowCol_t frow, - RF_ReconEvent_t * event); -static int -IssueNextReadRequest(RF_Raid_t * raidPtr, RF_RowCol_t row, - RF_RowCol_t col); -static int TryToRead(RF_Raid_t * raidPtr, RF_RowCol_t row, RF_RowCol_t col); -static int -ComputePSDiskOffsets(RF_Raid_t * raidPtr, RF_StripeNum_t psid, - RF_RowCol_t row, RF_RowCol_t col, RF_SectorNum_t * outDiskOffset, - RF_SectorNum_t * outFailedDiskSectorOffset, RF_RowCol_t * spRow, - RF_RowCol_t * spCol, RF_SectorNum_t * spOffset); -static int IssueNextWriteRequest(RF_Raid_t * raidPtr, RF_RowCol_t row); -static int ReconReadDoneProc(void *arg, int status); -static int ReconWriteDoneProc(void *arg, int status); -static void -CheckForNewMinHeadSep(RF_Raid_t * raidPtr, RF_RowCol_t row, - RF_HeadSepLimit_t hsCtr); -static int -CheckHeadSeparation(RF_Raid_t * raidPtr, RF_PerDiskReconCtrl_t * ctrl, - RF_RowCol_t row, RF_RowCol_t col, RF_HeadSepLimit_t hsCtr, - RF_ReconUnitNum_t which_ru); -static int -CheckForcedOrBlockedReconstruction(RF_Raid_t * raidPtr, - RF_ReconParityStripeStatus_t * pssPtr, RF_PerDiskReconCtrl_t * ctrl, - RF_RowCol_t row, RF_RowCol_t col, RF_StripeNum_t psid, - RF_ReconUnitNum_t which_ru); -static void ForceReconReadDoneProc(void *arg, int status); - -static void rf_ShutdownReconstruction(void *); - -struct RF_ReconDoneProc_s { - void (*proc) (RF_Raid_t *, void *); - void *arg; - RF_ReconDoneProc_t *next; -}; - -static RF_FreeList_t *rf_rdp_freelist; -#define RF_MAX_FREE_RDP 4 -#define RF_RDP_INC 1 - -static void -SignalReconDone(RF_Raid_t * raidPtr) -{ - RF_ReconDoneProc_t *p; - - RF_LOCK_MUTEX(raidPtr->recon_done_proc_mutex); - for (p = raidPtr->recon_done_procs; p; p = p->next) { - p->proc(raidPtr, p->arg); - } - RF_UNLOCK_MUTEX(raidPtr->recon_done_proc_mutex); -} - -int -rf_RegisterReconDoneProc( - RF_Raid_t * raidPtr, - void (*proc) (RF_Raid_t *, void *), - void *arg, - RF_ReconDoneProc_t ** handlep) -{ - RF_ReconDoneProc_t *p; - - RF_FREELIST_GET(rf_rdp_freelist, p, next, (RF_ReconDoneProc_t *)); - if (p == NULL) - return (ENOMEM); - p->proc = proc; - p->arg = arg; - RF_LOCK_MUTEX(raidPtr->recon_done_proc_mutex); - p->next = raidPtr->recon_done_procs; - raidPtr->recon_done_procs = p; - RF_UNLOCK_MUTEX(raidPtr->recon_done_proc_mutex); - if (handlep) - *handlep = p; - return (0); -} -/************************************************************************** - * - * sets up the parameters that will be used by the reconstruction process - * currently there are none, except for those that the layout-specific - * configuration (e.g. rf_ConfigureDeclustered) routine sets up. - * - * in the kernel, we fire off the recon thread. - * - **************************************************************************/ -static void -rf_ShutdownReconstruction(ignored) - void *ignored; -{ - RF_FREELIST_DESTROY(rf_recond_freelist, next, (RF_RaidReconDesc_t *)); - RF_FREELIST_DESTROY(rf_rdp_freelist, next, (RF_ReconDoneProc_t *)); -} - -int -rf_ConfigureReconstruction(listp) - RF_ShutdownList_t **listp; -{ - int rc; - - RF_FREELIST_CREATE(rf_recond_freelist, RF_MAX_FREE_RECOND, - RF_RECOND_INC, sizeof(RF_RaidReconDesc_t)); - if (rf_recond_freelist == NULL) - return (ENOMEM); - RF_FREELIST_CREATE(rf_rdp_freelist, RF_MAX_FREE_RDP, - RF_RDP_INC, sizeof(RF_ReconDoneProc_t)); - if (rf_rdp_freelist == NULL) { - RF_FREELIST_DESTROY(rf_recond_freelist, next, (RF_RaidReconDesc_t *)); - return (ENOMEM); - } - rc = rf_ShutdownCreate(listp, rf_ShutdownReconstruction, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - rf_ShutdownReconstruction(NULL); - return (rc); - } - return (0); -} - -static RF_RaidReconDesc_t * -AllocRaidReconDesc(raidPtr, row, col, spareDiskPtr, numDisksDone, srow, scol) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_RowCol_t col; - RF_RaidDisk_t *spareDiskPtr; - int numDisksDone; - RF_RowCol_t srow; - RF_RowCol_t scol; -{ - - RF_RaidReconDesc_t *reconDesc; - - RF_FREELIST_GET(rf_recond_freelist, reconDesc, next, (RF_RaidReconDesc_t *)); - - reconDesc->raidPtr = raidPtr; - reconDesc->row = row; - reconDesc->col = col; - reconDesc->spareDiskPtr = spareDiskPtr; - reconDesc->numDisksDone = numDisksDone; - reconDesc->srow = srow; - reconDesc->scol = scol; - reconDesc->state = 0; - reconDesc->next = NULL; - - return (reconDesc); -} - -static void -FreeReconDesc(reconDesc) - RF_RaidReconDesc_t *reconDesc; -{ -#if RF_RECON_STATS > 0 - printf("RAIDframe: %lu recon event waits, %lu recon delays\n", - (long) reconDesc->numReconEventWaits, (long) reconDesc->numReconExecDelays); -#endif /* RF_RECON_STATS > 0 */ - printf("RAIDframe: %lu max exec ticks\n", - (long) reconDesc->maxReconExecTicks); -#if (RF_RECON_STATS > 0) || defined(KERNEL) - printf("\n"); -#endif /* (RF_RECON_STATS > 0) || KERNEL */ - RF_FREELIST_FREE(rf_recond_freelist, reconDesc, next); -} - - -/***************************************************************************** - * - * primary routine to reconstruct a failed disk. This should be called from - * within its own thread. It won't return until reconstruction completes, - * fails, or is aborted. - *****************************************************************************/ -int -rf_ReconstructFailedDisk(raidPtr, row, col) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_RowCol_t col; -{ - RF_LayoutSW_t *lp; - int rc; - - lp = raidPtr->Layout.map; - if (lp->SubmitReconBuffer) { - /* - * The current infrastructure only supports reconstructing one - * disk at a time for each array. - */ - RF_LOCK_MUTEX(raidPtr->mutex); - while (raidPtr->reconInProgress) { - RF_WAIT_COND(raidPtr->waitForReconCond, raidPtr->mutex); - } - raidPtr->reconInProgress++; - RF_UNLOCK_MUTEX(raidPtr->mutex); - rc = rf_ReconstructFailedDiskBasic(raidPtr, row, col); - RF_LOCK_MUTEX(raidPtr->mutex); - raidPtr->reconInProgress--; - RF_UNLOCK_MUTEX(raidPtr->mutex); - } else { - RF_ERRORMSG1("RECON: no way to reconstruct failed disk for arch %c\n", - lp->parityConfig); - rc = EIO; - } - RF_SIGNAL_COND(raidPtr->waitForReconCond); - wakeup(&raidPtr->waitForReconCond); /* XXX Methinks this will be - * needed at some point... GO */ - return (rc); -} - -int -rf_ReconstructFailedDiskBasic(raidPtr, row, col) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_RowCol_t col; -{ - RF_ComponentLabel_t *c_label; - RF_RaidDisk_t *spareDiskPtr = NULL; - RF_RaidReconDesc_t *reconDesc; - RF_RowCol_t srow, scol; - int numDisksDone = 0, rc; - - RF_Malloc(c_label, sizeof(RF_ComponentLabel_t), (RF_ComponentLabel_t *)); - if (c_label == NULL) { - printf("rf_ReconstructInPlace: Out of memory?\n"); - return (ENOMEM); - } - - /* first look for a spare drive onto which to reconstruct the data */ - /* spare disk descriptors are stored in row 0. This may have to - * change eventually */ - - RF_LOCK_MUTEX(raidPtr->mutex); - RF_ASSERT(raidPtr->Disks[row][col].status == rf_ds_failed); - - if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { - if (raidPtr->status[row] != rf_rs_degraded) { - RF_ERRORMSG2("Unable to reconstruct disk at row %d col %d because status not degraded\n", row, col); - RF_UNLOCK_MUTEX(raidPtr->mutex); - RF_Free(c_label, sizeof(RF_ComponentLabel_t)); - return (EINVAL); - } - srow = row; - scol = (-1); - } else { - srow = 0; - for (scol = raidPtr->numCol; scol < raidPtr->numCol + raidPtr->numSpare; scol++) { - if (raidPtr->Disks[srow][scol].status == rf_ds_spare) { - spareDiskPtr = &raidPtr->Disks[srow][scol]; - spareDiskPtr->status = rf_ds_used_spare; - break; - } - } - if (!spareDiskPtr) { - RF_ERRORMSG2("Unable to reconstruct disk at row %d col %d because no spares are available\n", row, col); - RF_UNLOCK_MUTEX(raidPtr->mutex); - RF_Free(c_label, sizeof(RF_ComponentLabel_t)); - return (ENOSPC); - } - printf("RECON: initiating reconstruction on row %d col %d -> spare at row %d col %d\n", row, col, srow, scol); - } - RF_UNLOCK_MUTEX(raidPtr->mutex); - - reconDesc = AllocRaidReconDesc((void *) raidPtr, row, col, spareDiskPtr, numDisksDone, srow, scol); - raidPtr->reconDesc = (void *) reconDesc; -#if RF_RECON_STATS > 0 - reconDesc->hsStallCount = 0; - reconDesc->numReconExecDelays = 0; - reconDesc->numReconEventWaits = 0; -#endif /* RF_RECON_STATS > 0 */ - reconDesc->reconExecTimerRunning = 0; - reconDesc->reconExecTicks = 0; - reconDesc->maxReconExecTicks = 0; - rc = rf_ContinueReconstructFailedDisk(reconDesc); - - if (!rc) { - /* fix up the component label */ - /* Don't actually need the read here.. */ - raidread_component_label( - raidPtr->raid_cinfo[srow][scol].ci_dev, - raidPtr->raid_cinfo[srow][scol].ci_vp, - c_label); - - raid_init_component_label( raidPtr, c_label); - c_label->row = row; - c_label->column = col; - c_label->clean = RF_RAID_DIRTY; - c_label->status = rf_ds_optimal; - c_label->partitionSize = raidPtr->Disks[srow][scol].partitionSize; - - /* We've just done a rebuild based on all the other - disks, so at this point the parity is known to be - clean, even if it wasn't before. */ - - /* XXX doesn't hold for RAID 6!! */ - - raidPtr->parity_good = RF_RAID_CLEAN; - - /* XXXX MORE NEEDED HERE */ - - raidwrite_component_label( - raidPtr->raid_cinfo[srow][scol].ci_dev, - raidPtr->raid_cinfo[srow][scol].ci_vp, - c_label); - - } - RF_Free(c_label, sizeof(RF_ComponentLabel_t)); - return (rc); -} - -/* - - Allow reconstructing a disk in-place -- i.e. component /dev/sd2e goes AWOL, - and you don't get a spare until the next Monday. With this function - (and hot-swappable drives) you can now put your new disk containing - /dev/sd2e on the bus, scsictl it alive, and then use raidctl(8) to - rebuild the data "on the spot". - -*/ - -int -rf_ReconstructInPlace(raidPtr, row, col) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_RowCol_t col; -{ - RF_RaidDisk_t *spareDiskPtr = NULL; - RF_RaidReconDesc_t *reconDesc; - RF_LayoutSW_t *lp; - RF_RaidDisk_t *badDisk; - RF_ComponentLabel_t *c_label; - int numDisksDone = 0, rc; - struct vnode *vp; - int retcode; - int ac; - - RF_Malloc(c_label, sizeof(RF_ComponentLabel_t), (RF_ComponentLabel_t *)); - if (c_label == NULL) { - printf("rf_ReconstructInPlace: Out of memory?\n"); - return (ENOMEM); - } - - lp = raidPtr->Layout.map; - if (lp->SubmitReconBuffer) { - /* - * The current infrastructure only supports reconstructing one - * disk at a time for each array. - */ - RF_LOCK_MUTEX(raidPtr->mutex); - if ((raidPtr->Disks[row][col].status == rf_ds_optimal) && - (raidPtr->numFailures > 0)) { - /* XXX 0 above shouldn't be constant!!! */ - /* some component other than this has failed. - Let's not make things worse than they already - are... */ - printf("RAIDFRAME: Unable to reconstruct to disk at:\n"); - printf(" Row: %d Col: %d Too many failures.\n", - row, col); - RF_UNLOCK_MUTEX(raidPtr->mutex); - RF_Free(c_label, sizeof(RF_ComponentLabel_t)); - return (EINVAL); - } - if (raidPtr->Disks[row][col].status == rf_ds_reconstructing) { - printf("RAIDFRAME: Unable to reconstruct to disk at:\n"); - printf(" Row: %d Col: %d Reconstruction already occuring!\n", row, col); - - RF_UNLOCK_MUTEX(raidPtr->mutex); - RF_Free(c_label, sizeof(RF_ComponentLabel_t)); - return (EINVAL); - } - - - if (raidPtr->Disks[row][col].status != rf_ds_failed) { - /* "It's gone..." */ - raidPtr->numFailures++; - raidPtr->Disks[row][col].status = rf_ds_failed; - raidPtr->status[row] = rf_rs_degraded; - rf_update_component_labels(raidPtr, - RF_NORMAL_COMPONENT_UPDATE); - } - - while (raidPtr->reconInProgress) { - RF_WAIT_COND(raidPtr->waitForReconCond, raidPtr->mutex); - } - - raidPtr->reconInProgress++; - - - /* first look for a spare drive onto which to reconstruct - the data. spare disk descriptors are stored in row 0. - This may have to change eventually */ - - /* Actually, we don't care if it's failed or not... - On a RAID set with correct parity, this function - should be callable on any component without ill affects. */ - /* RF_ASSERT(raidPtr->Disks[row][col].status == rf_ds_failed); - */ - - if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { - RF_ERRORMSG2("Unable to reconstruct to disk at row %d col %d: operation not supported for RF_DISTRIBUTE_SPARE\n", row, col); - - raidPtr->reconInProgress--; - RF_UNLOCK_MUTEX(raidPtr->mutex); - RF_Free(c_label, sizeof(RF_ComponentLabel_t)); - return (EINVAL); - } - - /* XXX need goop here to see if the disk is alive, - and, if not, make it so... */ - - - - badDisk = &raidPtr->Disks[row][col]; - - /* This device may have been opened successfully the - first time. Close it before trying to open it again.. */ - - if (raidPtr->raid_cinfo[row][col].ci_vp != NULL) { - printf("Closed the open device: %s\n", - raidPtr->Disks[row][col].devname); - vp = raidPtr->raid_cinfo[row][col].ci_vp; - ac = raidPtr->Disks[row][col].auto_configured; - rf_close_component(raidPtr, vp, ac); - raidPtr->raid_cinfo[row][col].ci_vp = NULL; - } - /* note that this disk was *not* auto_configured (any longer)*/ - raidPtr->Disks[row][col].auto_configured = 0; - - printf("About to (re-)open the device for rebuilding: %s\n", - raidPtr->Disks[row][col].devname); - - retcode = raid_getcomponentsize(raidPtr, row, col); - - if (retcode) { - printf("raid%d: rebuilding: raidlookup on device: %s failed: %d!\n", - raidPtr->raidid, raidPtr->Disks[row][col].devname, - retcode); - - /* XXX the component isn't responding properly... - must be still dead :-( */ - raidPtr->reconInProgress--; - RF_UNLOCK_MUTEX(raidPtr->mutex); - RF_Free(c_label, sizeof(RF_ComponentLabel_t)); - return(retcode); - - } - - spareDiskPtr = &raidPtr->Disks[row][col]; - spareDiskPtr->status = rf_ds_used_spare; - - printf("RECON: initiating in-place reconstruction on\n"); - printf(" row %d col %d -> spare at row %d col %d\n", - row, col, row, col); - - RF_UNLOCK_MUTEX(raidPtr->mutex); - - reconDesc = AllocRaidReconDesc((void *) raidPtr, row, col, - spareDiskPtr, numDisksDone, - row, col); - raidPtr->reconDesc = (void *) reconDesc; -#if RF_RECON_STATS > 0 - reconDesc->hsStallCount = 0; - reconDesc->numReconExecDelays = 0; - reconDesc->numReconEventWaits = 0; -#endif /* RF_RECON_STATS > 0 */ - reconDesc->reconExecTimerRunning = 0; - reconDesc->reconExecTicks = 0; - reconDesc->maxReconExecTicks = 0; - rc = rf_ContinueReconstructFailedDisk(reconDesc); - - RF_LOCK_MUTEX(raidPtr->mutex); - raidPtr->reconInProgress--; - RF_UNLOCK_MUTEX(raidPtr->mutex); - - } else { - RF_ERRORMSG1("RECON: no way to reconstruct failed disk for arch %c\n", - lp->parityConfig); - rc = EIO; - } - RF_LOCK_MUTEX(raidPtr->mutex); - - if (!rc) { - /* Need to set these here, as at this point it'll be claiming - that the disk is in rf_ds_spared! But we know better :-) */ - - raidPtr->Disks[row][col].status = rf_ds_optimal; - raidPtr->status[row] = rf_rs_optimal; - - /* fix up the component label */ - /* Don't actually need the read here.. */ - raidread_component_label(raidPtr->raid_cinfo[row][col].ci_dev, - raidPtr->raid_cinfo[row][col].ci_vp, - c_label); - - raid_init_component_label(raidPtr, c_label); - - c_label->row = row; - c_label->column = col; - - /* We've just done a rebuild based on all the other - disks, so at this point the parity is known to be - clean, even if it wasn't before. */ - - /* XXX doesn't hold for RAID 6!! */ - - raidPtr->parity_good = RF_RAID_CLEAN; - - raidwrite_component_label(raidPtr->raid_cinfo[row][col].ci_dev, - raidPtr->raid_cinfo[row][col].ci_vp, - c_label); - - } - RF_UNLOCK_MUTEX(raidPtr->mutex); - RF_SIGNAL_COND(raidPtr->waitForReconCond); - wakeup(&raidPtr->waitForReconCond); - RF_Free(c_label, sizeof(RF_ComponentLabel_t)); - return (rc); -} - - -int -rf_ContinueReconstructFailedDisk(reconDesc) - RF_RaidReconDesc_t *reconDesc; -{ - RF_Raid_t *raidPtr = reconDesc->raidPtr; - RF_RowCol_t row = reconDesc->row; - RF_RowCol_t col = reconDesc->col; - RF_RowCol_t srow = reconDesc->srow; - RF_RowCol_t scol = reconDesc->scol; - RF_ReconMap_t *mapPtr; - - RF_ReconEvent_t *event; - struct timeval etime, elpsd; - unsigned long xor_s, xor_resid_us; - int retcode, i, ds; - - switch (reconDesc->state) { - - - case 0: - - raidPtr->accumXorTimeUs = 0; - - /* create one trace record per physical disk */ - RF_Malloc(raidPtr->recon_tracerecs, raidPtr->numCol * sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); - - /* quiesce the array prior to starting recon. this is needed - * to assure no nasty interactions with pending user writes. - * We need to do this before we change the disk or row status. */ - reconDesc->state = 1; - - Dprintf("RECON: begin request suspend\n"); - retcode = rf_SuspendNewRequestsAndWait(raidPtr); - Dprintf("RECON: end request suspend\n"); - rf_StartUserStats(raidPtr); /* zero out the stats kept on - * user accs */ - - /* fall through to state 1 */ - - case 1: - - RF_LOCK_MUTEX(raidPtr->mutex); - - /* create the reconstruction control pointer and install it in - * the right slot */ - raidPtr->reconControl[row] = rf_MakeReconControl(reconDesc, row, col, srow, scol); - mapPtr = raidPtr->reconControl[row]->reconMap; - raidPtr->status[row] = rf_rs_reconstructing; - raidPtr->Disks[row][col].status = rf_ds_reconstructing; - raidPtr->Disks[row][col].spareRow = srow; - raidPtr->Disks[row][col].spareCol = scol; - - RF_UNLOCK_MUTEX(raidPtr->mutex); - - RF_GETTIME(raidPtr->reconControl[row]->starttime); - - /* now start up the actual reconstruction: issue a read for - * each surviving disk */ - - reconDesc->numDisksDone = 0; - for (i = 0; i < raidPtr->numCol; i++) { - if (i != col) { - /* find and issue the next I/O on the - * indicated disk */ - if (IssueNextReadRequest(raidPtr, row, i)) { - Dprintf2("RECON: done issuing for r%d c%d\n", row, i); - reconDesc->numDisksDone++; - } - } - } - - case 2: - Dprintf("RECON: resume requests\n"); - rf_ResumeNewRequests(raidPtr); - - - reconDesc->state = 3; - - case 3: - - /* process reconstruction events until all disks report that - * they've completed all work */ - mapPtr = raidPtr->reconControl[row]->reconMap; - - - - while (reconDesc->numDisksDone < raidPtr->numCol - 1) { - - event = rf_GetNextReconEvent(reconDesc, row, (void (*) (void *)) rf_ContinueReconstructFailedDisk, reconDesc); - RF_ASSERT(event); - - if (ProcessReconEvent(raidPtr, row, event)) - reconDesc->numDisksDone++; - raidPtr->reconControl[row]->numRUsTotal = - mapPtr->totalRUs; - raidPtr->reconControl[row]->numRUsComplete = - mapPtr->totalRUs - - rf_UnitsLeftToReconstruct(mapPtr); - - raidPtr->reconControl[row]->percentComplete = - (raidPtr->reconControl[row]->numRUsComplete * 100 / raidPtr->reconControl[row]->numRUsTotal); - if (rf_prReconSched) { - rf_PrintReconSchedule(raidPtr->reconControl[row]->reconMap, &(raidPtr->reconControl[row]->starttime)); - } - } - - - - reconDesc->state = 4; - - - case 4: - mapPtr = raidPtr->reconControl[row]->reconMap; - if (rf_reconDebug) { - printf("RECON: all reads completed\n"); - } - /* at this point all the reads have completed. We now wait - * for any pending writes to complete, and then we're done */ - - while (rf_UnitsLeftToReconstruct(raidPtr->reconControl[row]->reconMap) > 0) { - - event = rf_GetNextReconEvent(reconDesc, row, (void (*) (void *)) rf_ContinueReconstructFailedDisk, reconDesc); - RF_ASSERT(event); - - (void) ProcessReconEvent(raidPtr, row, event); /* ignore return code */ - raidPtr->reconControl[row]->percentComplete = 100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 / mapPtr->totalRUs); - if (rf_prReconSched) { - rf_PrintReconSchedule(raidPtr->reconControl[row]->reconMap, &(raidPtr->reconControl[row]->starttime)); - } - } - reconDesc->state = 5; - - case 5: - /* Success: mark the dead disk as reconstructed. We quiesce - * the array here to assure no nasty interactions with pending - * user accesses when we free up the psstatus structure as - * part of FreeReconControl() */ - - reconDesc->state = 6; - - retcode = rf_SuspendNewRequestsAndWait(raidPtr); - rf_StopUserStats(raidPtr); - rf_PrintUserStats(raidPtr); /* print out the stats on user - * accs accumulated during - * recon */ - - /* fall through to state 6 */ - case 6: - - - - RF_LOCK_MUTEX(raidPtr->mutex); - raidPtr->numFailures--; - ds = (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE); - raidPtr->Disks[row][col].status = (ds) ? rf_ds_dist_spared : rf_ds_spared; - raidPtr->status[row] = (ds) ? rf_rs_reconfigured : rf_rs_optimal; - RF_UNLOCK_MUTEX(raidPtr->mutex); - RF_GETTIME(etime); - RF_TIMEVAL_DIFF(&(raidPtr->reconControl[row]->starttime), &etime, &elpsd); - - /* XXX -- why is state 7 different from state 6 if there is no - * return() here? -- XXX Note that I set elpsd above & use it - * below, so if you put a return here you'll have to fix this. - * (also, FreeReconControl is called below) */ - - case 7: - - rf_ResumeNewRequests(raidPtr); - - printf("Reconstruction of disk at row %d col %d completed\n", - row, col); - xor_s = raidPtr->accumXorTimeUs / 1000000; - xor_resid_us = raidPtr->accumXorTimeUs % 1000000; - printf("Recon time was %d.%06d seconds, accumulated XOR time was %ld us (%ld.%06ld)\n", - (int) elpsd.tv_sec, (int) elpsd.tv_usec, raidPtr->accumXorTimeUs, xor_s, xor_resid_us); - printf(" (start time %d sec %d usec, end time %d sec %d usec)\n", - (int) raidPtr->reconControl[row]->starttime.tv_sec, - (int) raidPtr->reconControl[row]->starttime.tv_usec, - (int) etime.tv_sec, (int) etime.tv_usec); - -#if RF_RECON_STATS > 0 - printf("Total head-sep stall count was %d\n", - (int) reconDesc->hsStallCount); -#endif /* RF_RECON_STATS > 0 */ - rf_FreeReconControl(raidPtr, row); - RF_Free(raidPtr->recon_tracerecs, raidPtr->numCol * sizeof(RF_AccTraceEntry_t)); - FreeReconDesc(reconDesc); - - } - - SignalReconDone(raidPtr); - return (0); -} -/***************************************************************************** - * do the right thing upon each reconstruction event. - * returns nonzero if and only if there is nothing left unread on the - * indicated disk - *****************************************************************************/ -static int -ProcessReconEvent(raidPtr, frow, event) - RF_Raid_t *raidPtr; - RF_RowCol_t frow; - RF_ReconEvent_t *event; -{ - int retcode = 0, submitblocked; - RF_ReconBuffer_t *rbuf; - RF_SectorCount_t sectorsPerRU; - - Dprintf1("RECON: ProcessReconEvent type %d\n", event->type); - switch (event->type) { - - /* a read I/O has completed */ - case RF_REVENT_READDONE: - rbuf = raidPtr->reconControl[frow]->perDiskInfo[event->col].rbuf; - Dprintf3("RECON: READDONE EVENT: row %d col %d psid %ld\n", - frow, event->col, rbuf->parityStripeID); - Dprintf7("RECON: done read psid %ld buf %lx %02x %02x %02x %02x %02x\n", - rbuf->parityStripeID, rbuf->buffer, rbuf->buffer[0] & 0xff, rbuf->buffer[1] & 0xff, - rbuf->buffer[2] & 0xff, rbuf->buffer[3] & 0xff, rbuf->buffer[4] & 0xff); - rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg); - submitblocked = rf_SubmitReconBuffer(rbuf, 0, 0); - Dprintf1("RECON: submitblocked=%d\n", submitblocked); - if (!submitblocked) - retcode = IssueNextReadRequest(raidPtr, frow, event->col); - break; - - /* a write I/O has completed */ - case RF_REVENT_WRITEDONE: - if (rf_floatingRbufDebug) { - rf_CheckFloatingRbufCount(raidPtr, 1); - } - sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU; - rbuf = (RF_ReconBuffer_t *) event->arg; - rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg); - Dprintf3("RECON: WRITEDONE EVENT: psid %d ru %d (%d %% complete)\n", - rbuf->parityStripeID, rbuf->which_ru, raidPtr->reconControl[frow]->percentComplete); - rf_ReconMapUpdate(raidPtr, raidPtr->reconControl[frow]->reconMap, - rbuf->failedDiskSectorOffset, rbuf->failedDiskSectorOffset + sectorsPerRU - 1); - rf_RemoveFromActiveReconTable(raidPtr, frow, rbuf->parityStripeID, rbuf->which_ru); - - if (rbuf->type == RF_RBUF_TYPE_FLOATING) { - RF_LOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex); - raidPtr->numFullReconBuffers--; - rf_ReleaseFloatingReconBuffer(raidPtr, frow, rbuf); - RF_UNLOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex); - } else - if (rbuf->type == RF_RBUF_TYPE_FORCED) - rf_FreeReconBuffer(rbuf); - else - RF_ASSERT(0); - break; - - case RF_REVENT_BUFCLEAR: /* A buffer-stall condition has been - * cleared */ - Dprintf2("RECON: BUFCLEAR EVENT: row %d col %d\n", frow, event->col); - submitblocked = rf_SubmitReconBuffer(raidPtr->reconControl[frow]->perDiskInfo[event->col].rbuf, 0, (int) (long) event->arg); - RF_ASSERT(!submitblocked); /* we wouldn't have gotten the - * BUFCLEAR event if we - * couldn't submit */ - retcode = IssueNextReadRequest(raidPtr, frow, event->col); - break; - - case RF_REVENT_BLOCKCLEAR: /* A user-write reconstruction - * blockage has been cleared */ - DDprintf2("RECON: BLOCKCLEAR EVENT: row %d col %d\n", frow, event->col); - retcode = TryToRead(raidPtr, frow, event->col); - break; - - case RF_REVENT_HEADSEPCLEAR: /* A max-head-separation - * reconstruction blockage has been - * cleared */ - Dprintf2("RECON: HEADSEPCLEAR EVENT: row %d col %d\n", frow, event->col); - retcode = TryToRead(raidPtr, frow, event->col); - break; - - /* a buffer has become ready to write */ - case RF_REVENT_BUFREADY: - Dprintf2("RECON: BUFREADY EVENT: row %d col %d\n", frow, event->col); - retcode = IssueNextWriteRequest(raidPtr, frow); - if (rf_floatingRbufDebug) { - rf_CheckFloatingRbufCount(raidPtr, 1); - } - break; - - /* we need to skip the current RU entirely because it got - * recon'd while we were waiting for something else to happen */ - case RF_REVENT_SKIP: - DDprintf2("RECON: SKIP EVENT: row %d col %d\n", frow, event->col); - retcode = IssueNextReadRequest(raidPtr, frow, event->col); - break; - - /* a forced-reconstruction read access has completed. Just - * submit the buffer */ - case RF_REVENT_FORCEDREADDONE: - rbuf = (RF_ReconBuffer_t *) event->arg; - rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg); - DDprintf2("RECON: FORCEDREADDONE EVENT: row %d col %d\n", frow, event->col); - submitblocked = rf_SubmitReconBuffer(rbuf, 1, 0); - RF_ASSERT(!submitblocked); - break; - - default: - RF_PANIC(); - } - rf_FreeReconEventDesc(event); - return (retcode); -} -/***************************************************************************** - * - * find the next thing that's needed on the indicated disk, and issue - * a read request for it. We assume that the reconstruction buffer - * associated with this process is free to receive the data. If - * reconstruction is blocked on the indicated RU, we issue a - * blockage-release request instead of a physical disk read request. - * If the current disk gets too far ahead of the others, we issue a - * head-separation wait request and return. - * - * ctrl->{ru_count, curPSID, diskOffset} and - * rbuf->failedDiskSectorOffset are maintained to point to the unit - * we're currently accessing. Note that this deviates from the - * standard C idiom of having counters point to the next thing to be - * accessed. This allows us to easily retry when we're blocked by - * head separation or reconstruction-blockage events. - * - * returns nonzero if and only if there is nothing left unread on the - * indicated disk - * - *****************************************************************************/ -static int -IssueNextReadRequest(raidPtr, row, col) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_RowCol_t col; -{ - RF_PerDiskReconCtrl_t *ctrl = &raidPtr->reconControl[row]->perDiskInfo[col]; - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_ReconBuffer_t *rbuf = ctrl->rbuf; - RF_ReconUnitCount_t RUsPerPU = layoutPtr->SUsPerPU / layoutPtr->SUsPerRU; - RF_SectorCount_t sectorsPerRU = layoutPtr->sectorsPerStripeUnit * layoutPtr->SUsPerRU; - int do_new_check = 0, retcode = 0, status; - - /* if we are currently the slowest disk, mark that we have to do a new - * check */ - if (ctrl->headSepCounter <= raidPtr->reconControl[row]->minHeadSepCounter) - do_new_check = 1; - - while (1) { - - ctrl->ru_count++; - if (ctrl->ru_count < RUsPerPU) { - ctrl->diskOffset += sectorsPerRU; - rbuf->failedDiskSectorOffset += sectorsPerRU; - } else { - ctrl->curPSID++; - ctrl->ru_count = 0; - /* code left over from when head-sep was based on - * parity stripe id */ - if (ctrl->curPSID >= raidPtr->reconControl[row]->lastPSID) { - CheckForNewMinHeadSep(raidPtr, row, ++(ctrl->headSepCounter)); - return (1); /* finito! */ - } - /* find the disk offsets of the start of the parity - * stripe on both the current disk and the failed - * disk. skip this entire parity stripe if either disk - * does not appear in the indicated PS */ - status = ComputePSDiskOffsets(raidPtr, ctrl->curPSID, row, col, &ctrl->diskOffset, &rbuf->failedDiskSectorOffset, - &rbuf->spRow, &rbuf->spCol, &rbuf->spOffset); - if (status) { - ctrl->ru_count = RUsPerPU - 1; - continue; - } - } - rbuf->which_ru = ctrl->ru_count; - - /* skip this RU if it's already been reconstructed */ - if (rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, rbuf->failedDiskSectorOffset)) { - Dprintf2("Skipping psid %ld ru %d: already reconstructed\n", ctrl->curPSID, ctrl->ru_count); - continue; - } - break; - } - ctrl->headSepCounter++; - if (do_new_check) - CheckForNewMinHeadSep(raidPtr, row, ctrl->headSepCounter); /* update min if needed */ - - - /* at this point, we have definitely decided what to do, and we have - * only to see if we can actually do it now */ - rbuf->parityStripeID = ctrl->curPSID; - rbuf->which_ru = ctrl->ru_count; - bzero((char *) &raidPtr->recon_tracerecs[col], sizeof(raidPtr->recon_tracerecs[col])); - raidPtr->recon_tracerecs[col].reconacc = 1; - RF_ETIMER_START(raidPtr->recon_tracerecs[col].recon_timer); - retcode = TryToRead(raidPtr, row, col); - return (retcode); -} - -/* - * tries to issue the next read on the indicated disk. We may be - * blocked by (a) the heads being too far apart, or (b) recon on the - * indicated RU being blocked due to a write by a user thread. In - * this case, we issue a head-sep or blockage wait request, which will - * cause this same routine to be invoked again later when the blockage - * has cleared. - */ - -static int -TryToRead(raidPtr, row, col) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_RowCol_t col; -{ - RF_PerDiskReconCtrl_t *ctrl = &raidPtr->reconControl[row]->perDiskInfo[col]; - RF_SectorCount_t sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU; - RF_StripeNum_t psid = ctrl->curPSID; - RF_ReconUnitNum_t which_ru = ctrl->ru_count; - RF_DiskQueueData_t *req; - int status, created = 0; - RF_ReconParityStripeStatus_t *pssPtr; - - /* if the current disk is too far ahead of the others, issue a - * head-separation wait and return */ - if (CheckHeadSeparation(raidPtr, ctrl, row, col, ctrl->headSepCounter, which_ru)) - return (0); - RF_LOCK_PSS_MUTEX(raidPtr, row, psid); - pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl[row]->pssTable, psid, which_ru, RF_PSS_CREATE, &created); - - /* if recon is blocked on the indicated parity stripe, issue a - * block-wait request and return. this also must mark the indicated RU - * in the stripe as under reconstruction if not blocked. */ - status = CheckForcedOrBlockedReconstruction(raidPtr, pssPtr, ctrl, row, col, psid, which_ru); - if (status == RF_PSS_RECON_BLOCKED) { - Dprintf2("RECON: Stalling psid %ld ru %d: recon blocked\n", psid, which_ru); - goto out; - } else - if (status == RF_PSS_FORCED_ON_WRITE) { - rf_CauseReconEvent(raidPtr, row, col, NULL, RF_REVENT_SKIP); - goto out; - } - /* make one last check to be sure that the indicated RU didn't get - * reconstructed while we were waiting for something else to happen. - * This is unfortunate in that it causes us to make this check twice - * in the normal case. Might want to make some attempt to re-work - * this so that we only do this check if we've definitely blocked on - * one of the above checks. When this condition is detected, we may - * have just created a bogus status entry, which we need to delete. */ - if (rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, ctrl->rbuf->failedDiskSectorOffset)) { - Dprintf2("RECON: Skipping psid %ld ru %d: prior recon after stall\n", psid, which_ru); - if (created) - rf_PSStatusDelete(raidPtr, raidPtr->reconControl[row]->pssTable, pssPtr); - rf_CauseReconEvent(raidPtr, row, col, NULL, RF_REVENT_SKIP); - goto out; - } - /* found something to read. issue the I/O */ - Dprintf5("RECON: Read for psid %ld on row %d col %d offset %ld buf %lx\n", - psid, row, col, ctrl->diskOffset, ctrl->rbuf->buffer); - RF_ETIMER_STOP(raidPtr->recon_tracerecs[col].recon_timer); - RF_ETIMER_EVAL(raidPtr->recon_tracerecs[col].recon_timer); - raidPtr->recon_tracerecs[col].specific.recon.recon_start_to_fetch_us = - RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[col].recon_timer); - RF_ETIMER_START(raidPtr->recon_tracerecs[col].recon_timer); - - /* should be ok to use a NULL proc pointer here, all the bufs we use - * should be in kernel space */ - req = rf_CreateDiskQueueData(RF_IO_TYPE_READ, ctrl->diskOffset, sectorsPerRU, ctrl->rbuf->buffer, psid, which_ru, - ReconReadDoneProc, (void *) ctrl, NULL, &raidPtr->recon_tracerecs[col], (void *) raidPtr, 0, NULL); - - RF_ASSERT(req); /* XXX -- fix this -- XXX */ - - ctrl->rbuf->arg = (void *) req; - rf_DiskIOEnqueue(&raidPtr->Queues[row][col], req, RF_IO_RECON_PRIORITY); - pssPtr->issued[col] = 1; - -out: - RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid); - return (0); -} - - -/* - * given a parity stripe ID, we want to find out whether both the - * current disk and the failed disk exist in that parity stripe. If - * not, we want to skip this whole PS. If so, we want to find the - * disk offset of the start of the PS on both the current disk and the - * failed disk. - * - * this works by getting a list of disks comprising the indicated - * parity stripe, and searching the list for the current and failed - * disks. Once we've decided they both exist in the parity stripe, we - * need to decide whether each is data or parity, so that we'll know - * which mapping function to call to get the corresponding disk - * offsets. - * - * this is kind of unpleasant, but doing it this way allows the - * reconstruction code to use parity stripe IDs rather than physical - * disks address to march through the failed disk, which greatly - * simplifies a lot of code, as well as eliminating the need for a - * reverse-mapping function. I also think it will execute faster, - * since the calls to the mapping module are kept to a minimum. - * - * ASSUMES THAT THE STRIPE IDENTIFIER IDENTIFIES THE DISKS COMPRISING - * THE STRIPE IN THE CORRECT ORDER */ - - -static int -ComputePSDiskOffsets( - RF_Raid_t * raidPtr, /* raid descriptor */ - RF_StripeNum_t psid, /* parity stripe identifier */ - RF_RowCol_t row, /* row and column of disk to find the offsets - * for */ - RF_RowCol_t col, - RF_SectorNum_t * outDiskOffset, - RF_SectorNum_t * outFailedDiskSectorOffset, - RF_RowCol_t * spRow, /* OUT: row,col of spare unit for failed unit */ - RF_RowCol_t * spCol, - RF_SectorNum_t * spOffset) -{ /* OUT: offset into disk containing spare unit */ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_RowCol_t fcol = raidPtr->reconControl[row]->fcol; - RF_RaidAddr_t sosRaidAddress; /* start-of-stripe */ - RF_RowCol_t *diskids; - u_int i, j, k, i_offset, j_offset; - RF_RowCol_t prow, pcol; - int testcol, testrow; - RF_RowCol_t stripe; - RF_SectorNum_t poffset; - char i_is_parity = 0, j_is_parity = 0; - RF_RowCol_t stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; - - /* get a listing of the disks comprising that stripe */ - sosRaidAddress = rf_ParityStripeIDToRaidAddress(layoutPtr, psid); - (layoutPtr->map->IdentifyStripe) (raidPtr, sosRaidAddress, &diskids, &stripe); - RF_ASSERT(diskids); - - /* reject this entire parity stripe if it does not contain the - * indicated disk or it does not contain the failed disk */ - if (row != stripe) - goto skipit; - for (i = 0; i < stripeWidth; i++) { - if (col == diskids[i]) - break; - } - if (i == stripeWidth) - goto skipit; - for (j = 0; j < stripeWidth; j++) { - if (fcol == diskids[j]) - break; - } - if (j == stripeWidth) { - goto skipit; - } - /* find out which disk the parity is on */ - (layoutPtr->map->MapParity) (raidPtr, sosRaidAddress, &prow, &pcol, &poffset, RF_DONT_REMAP); - - /* find out if either the current RU or the failed RU is parity */ - /* also, if the parity occurs in this stripe prior to the data and/or - * failed col, we need to decrement i and/or j */ - for (k = 0; k < stripeWidth; k++) - if (diskids[k] == pcol) - break; - RF_ASSERT(k < stripeWidth); - i_offset = i; - j_offset = j; - if (k < i) - i_offset--; - else - if (k == i) { - i_is_parity = 1; - i_offset = 0; - } /* set offsets to zero to disable multiply - * below */ - if (k < j) - j_offset--; - else - if (k == j) { - j_is_parity = 1; - j_offset = 0; - } - /* at this point, [ij]_is_parity tells us whether the [current,failed] - * disk is parity at the start of this RU, and, if data, "[ij]_offset" - * tells us how far into the stripe the [current,failed] disk is. */ - - /* call the mapping routine to get the offset into the current disk, - * repeat for failed disk. */ - if (i_is_parity) - layoutPtr->map->MapParity(raidPtr, sosRaidAddress + i_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outDiskOffset, RF_DONT_REMAP); - else - layoutPtr->map->MapSector(raidPtr, sosRaidAddress + i_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outDiskOffset, RF_DONT_REMAP); - - RF_ASSERT(row == testrow && col == testcol); - - if (j_is_parity) - layoutPtr->map->MapParity(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outFailedDiskSectorOffset, RF_DONT_REMAP); - else - layoutPtr->map->MapSector(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outFailedDiskSectorOffset, RF_DONT_REMAP); - RF_ASSERT(row == testrow && fcol == testcol); - - /* now locate the spare unit for the failed unit */ - if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) { - if (j_is_parity) - layoutPtr->map->MapParity(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, spRow, spCol, spOffset, RF_REMAP); - else - layoutPtr->map->MapSector(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, spRow, spCol, spOffset, RF_REMAP); - } else { - *spRow = raidPtr->reconControl[row]->spareRow; - *spCol = raidPtr->reconControl[row]->spareCol; - *spOffset = *outFailedDiskSectorOffset; - } - - return (0); - -skipit: - Dprintf3("RECON: Skipping psid %ld: nothing needed from r%d c%d\n", - psid, row, col); - return (1); -} -/* this is called when a buffer has become ready to write to the replacement disk */ -static int -IssueNextWriteRequest(raidPtr, row) - RF_Raid_t *raidPtr; - RF_RowCol_t row; -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_SectorCount_t sectorsPerRU = layoutPtr->sectorsPerStripeUnit * layoutPtr->SUsPerRU; - RF_RowCol_t fcol = raidPtr->reconControl[row]->fcol; - RF_ReconBuffer_t *rbuf; - RF_DiskQueueData_t *req; - - rbuf = rf_GetFullReconBuffer(raidPtr->reconControl[row]); - RF_ASSERT(rbuf); /* there must be one available, or we wouldn't - * have gotten the event that sent us here */ - RF_ASSERT(rbuf->pssPtr); - - rbuf->pssPtr->writeRbuf = rbuf; - rbuf->pssPtr = NULL; - - Dprintf7("RECON: New write (r %d c %d offs %d) for psid %ld ru %d (failed disk offset %ld) buf %lx\n", - rbuf->spRow, rbuf->spCol, rbuf->spOffset, rbuf->parityStripeID, - rbuf->which_ru, rbuf->failedDiskSectorOffset, rbuf->buffer); - Dprintf6("RECON: new write psid %ld %02x %02x %02x %02x %02x\n", - rbuf->parityStripeID, rbuf->buffer[0] & 0xff, rbuf->buffer[1] & 0xff, - rbuf->buffer[2] & 0xff, rbuf->buffer[3] & 0xff, rbuf->buffer[4] & 0xff); - - /* should be ok to use a NULL b_proc here b/c all addrs should be in - * kernel space */ - req = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, rbuf->spOffset, - sectorsPerRU, rbuf->buffer, - rbuf->parityStripeID, rbuf->which_ru, - ReconWriteDoneProc, (void *) rbuf, NULL, - &raidPtr->recon_tracerecs[fcol], - (void *) raidPtr, 0, NULL); - - RF_ASSERT(req); /* XXX -- fix this -- XXX */ - - rbuf->arg = (void *) req; - rf_DiskIOEnqueue(&raidPtr->Queues[rbuf->spRow][rbuf->spCol], req, RF_IO_RECON_PRIORITY); - - return (0); -} - -/* - * this gets called upon the completion of a reconstruction read - * operation the arg is a pointer to the per-disk reconstruction - * control structure for the process that just finished a read. - * - * called at interrupt context in the kernel, so don't do anything - * illegal here. - */ -static int -ReconReadDoneProc(arg, status) - void *arg; - int status; -{ - RF_PerDiskReconCtrl_t *ctrl = (RF_PerDiskReconCtrl_t *) arg; - RF_Raid_t *raidPtr = ctrl->reconCtrl->reconDesc->raidPtr; - - if (status) { - /* - * XXX - */ - printf("Recon read failed!\n"); - RF_PANIC(); - } - RF_ETIMER_STOP(raidPtr->recon_tracerecs[ctrl->col].recon_timer); - RF_ETIMER_EVAL(raidPtr->recon_tracerecs[ctrl->col].recon_timer); - raidPtr->recon_tracerecs[ctrl->col].specific.recon.recon_fetch_to_return_us = - RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[ctrl->col].recon_timer); - RF_ETIMER_START(raidPtr->recon_tracerecs[ctrl->col].recon_timer); - - rf_CauseReconEvent(raidPtr, ctrl->row, ctrl->col, NULL, RF_REVENT_READDONE); - return (0); -} -/* this gets called upon the completion of a reconstruction write operation. - * the arg is a pointer to the rbuf that was just written - * - * called at interrupt context in the kernel, so don't do anything illegal here. - */ -static int -ReconWriteDoneProc(arg, status) - void *arg; - int status; -{ - RF_ReconBuffer_t *rbuf = (RF_ReconBuffer_t *) arg; - - Dprintf2("Reconstruction completed on psid %ld ru %d\n", rbuf->parityStripeID, rbuf->which_ru); - if (status) { - printf("Recon write failed!\n"); /* fprintf(stderr,"Recon - * write failed!\n"); */ - RF_PANIC(); - } - rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, rbuf->row, rbuf->col, arg, RF_REVENT_WRITEDONE); - return (0); -} - - -/* - * computes a new minimum head sep, and wakes up anyone who needs to - * be woken as a result - */ -static void -CheckForNewMinHeadSep(raidPtr, row, hsCtr) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_HeadSepLimit_t hsCtr; -{ - RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[row]; - RF_HeadSepLimit_t new_min; - RF_RowCol_t i; - RF_CallbackDesc_t *p; - RF_ASSERT(hsCtr >= reconCtrlPtr->minHeadSepCounter); /* from the definition - * of a minimum */ - - - RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); - - new_min = ~(1L << (8 * sizeof(long) - 1)); /* 0x7FFF....FFF */ - for (i = 0; i < raidPtr->numCol; i++) - if (i != reconCtrlPtr->fcol) { - if (reconCtrlPtr->perDiskInfo[i].headSepCounter < new_min) - new_min = reconCtrlPtr->perDiskInfo[i].headSepCounter; - } - /* set the new minimum and wake up anyone who can now run again */ - if (new_min != reconCtrlPtr->minHeadSepCounter) { - reconCtrlPtr->minHeadSepCounter = new_min; - Dprintf1("RECON: new min head pos counter val is %ld\n", new_min); - while (reconCtrlPtr->headSepCBList) { - if (reconCtrlPtr->headSepCBList->callbackArg.v > new_min) - break; - p = reconCtrlPtr->headSepCBList; - reconCtrlPtr->headSepCBList = p->next; - p->next = NULL; - rf_CauseReconEvent(raidPtr, p->row, p->col, NULL, RF_REVENT_HEADSEPCLEAR); - rf_FreeCallbackDesc(p); - } - - } - RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); -} - -/* - * checks to see that the maximum head separation will not be violated - * if we initiate a reconstruction I/O on the indicated disk. - * Limiting the maximum head separation between two disks eliminates - * the nasty buffer-stall conditions that occur when one disk races - * ahead of the others and consumes all of the floating recon buffers. - * This code is complex and unpleasant but it's necessary to avoid - * some very nasty, albeit fairly rare, reconstruction behavior. - * - * returns non-zero if and only if we have to stop working on the - * indicated disk due to a head-separation delay. - */ -static int -CheckHeadSeparation( - RF_Raid_t * raidPtr, - RF_PerDiskReconCtrl_t * ctrl, - RF_RowCol_t row, - RF_RowCol_t col, - RF_HeadSepLimit_t hsCtr, - RF_ReconUnitNum_t which_ru) -{ - RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[row]; - RF_CallbackDesc_t *cb, *p, *pt; - int retval = 0; - - /* if we're too far ahead of the slowest disk, stop working on this - * disk until the slower ones catch up. We do this by scheduling a - * wakeup callback for the time when the slowest disk has caught up. - * We define "caught up" with 20% hysteresis, i.e. the head separation - * must have fallen to at most 80% of the max allowable head - * separation before we'll wake up. - * - */ - RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); - if ((raidPtr->headSepLimit >= 0) && - ((ctrl->headSepCounter - reconCtrlPtr->minHeadSepCounter) > raidPtr->headSepLimit)) { - Dprintf6("raid%d: RECON: head sep stall: row %d col %d hsCtr %ld minHSCtr %ld limit %ld\n", - raidPtr->raidid, row, col, ctrl->headSepCounter, - reconCtrlPtr->minHeadSepCounter, - raidPtr->headSepLimit); - cb = rf_AllocCallbackDesc(); - /* the minHeadSepCounter value we have to get to before we'll - * wake up. build in 20% hysteresis. */ - cb->callbackArg.v = (ctrl->headSepCounter - raidPtr->headSepLimit + raidPtr->headSepLimit / 5); - cb->row = row; - cb->col = col; - cb->next = NULL; - - /* insert this callback descriptor into the sorted list of - * pending head-sep callbacks */ - p = reconCtrlPtr->headSepCBList; - if (!p) - reconCtrlPtr->headSepCBList = cb; - else - if (cb->callbackArg.v < p->callbackArg.v) { - cb->next = reconCtrlPtr->headSepCBList; - reconCtrlPtr->headSepCBList = cb; - } else { - for (pt = p, p = p->next; p && (p->callbackArg.v < cb->callbackArg.v); pt = p, p = p->next); - cb->next = p; - pt->next = cb; - } - retval = 1; -#if RF_RECON_STATS > 0 - ctrl->reconCtrl->reconDesc->hsStallCount++; -#endif /* RF_RECON_STATS > 0 */ - } - RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); - - return (retval); -} -/* - * checks to see if reconstruction has been either forced or blocked - * by a user operation. if forced, we skip this RU entirely. else if - * blocked, put ourselves on the wait list. else return 0. - * - * ASSUMES THE PSS MUTEX IS LOCKED UPON ENTRY - */ -static int -CheckForcedOrBlockedReconstruction( - RF_Raid_t * raidPtr, - RF_ReconParityStripeStatus_t * pssPtr, - RF_PerDiskReconCtrl_t * ctrl, - RF_RowCol_t row, - RF_RowCol_t col, - RF_StripeNum_t psid, - RF_ReconUnitNum_t which_ru) -{ - RF_CallbackDesc_t *cb; - int retcode = 0; - - if ((pssPtr->flags & RF_PSS_FORCED_ON_READ) || (pssPtr->flags & RF_PSS_FORCED_ON_WRITE)) - retcode = RF_PSS_FORCED_ON_WRITE; - else - if (pssPtr->flags & RF_PSS_RECON_BLOCKED) { - Dprintf4("RECON: row %d col %d blocked at psid %ld ru %d\n", row, col, psid, which_ru); - cb = rf_AllocCallbackDesc(); /* append ourselves to - * the blockage-wait - * list */ - cb->row = row; - cb->col = col; - cb->next = pssPtr->blockWaitList; - pssPtr->blockWaitList = cb; - retcode = RF_PSS_RECON_BLOCKED; - } - if (!retcode) - pssPtr->flags |= RF_PSS_UNDER_RECON; /* mark this RU as under - * reconstruction */ - - return (retcode); -} -/* - * if reconstruction is currently ongoing for the indicated stripeID, - * reconstruction is forced to completion and we return non-zero to - * indicate that the caller must wait. If not, then reconstruction is - * blocked on the indicated stripe and the routine returns zero. If - * and only if we return non-zero, we'll cause the cbFunc to get - * invoked with the cbArg when the reconstruction has completed. - */ -int -rf_ForceOrBlockRecon(raidPtr, asmap, cbFunc, cbArg) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *asmap; - void (*cbFunc) (RF_Raid_t *, void *); - void *cbArg; -{ - RF_RowCol_t row = asmap->physInfo->row; /* which row of the array - * we're working on */ - RF_StripeNum_t stripeID = asmap->stripeID; /* the stripe ID we're - * forcing recon on */ - RF_SectorCount_t sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU; /* num sects in one RU */ - RF_ReconParityStripeStatus_t *pssPtr; /* a pointer to the parity - * stripe status structure */ - RF_StripeNum_t psid; /* parity stripe id */ - RF_SectorNum_t offset, fd_offset; /* disk offset, failed-disk - * offset */ - RF_RowCol_t *diskids; - RF_RowCol_t stripe; - RF_ReconUnitNum_t which_ru; /* RU within parity stripe */ - RF_RowCol_t fcol, diskno, i; - RF_ReconBuffer_t *new_rbuf; /* ptr to newly allocated rbufs */ - RF_DiskQueueData_t *req;/* disk I/O req to be enqueued */ - RF_CallbackDesc_t *cb; - int created = 0, nPromoted; - - psid = rf_MapStripeIDToParityStripeID(&raidPtr->Layout, stripeID, &which_ru); - - RF_LOCK_PSS_MUTEX(raidPtr, row, psid); - - pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl[row]->pssTable, psid, which_ru, RF_PSS_CREATE | RF_PSS_RECON_BLOCKED, &created); - - /* if recon is not ongoing on this PS, just return */ - if (!(pssPtr->flags & RF_PSS_UNDER_RECON)) { - RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid); - return (0); - } - /* otherwise, we have to wait for reconstruction to complete on this - * RU. */ - /* In order to avoid waiting for a potentially large number of - * low-priority accesses to complete, we force a normal-priority (i.e. - * not low-priority) reconstruction on this RU. */ - if (!(pssPtr->flags & RF_PSS_FORCED_ON_WRITE) && !(pssPtr->flags & RF_PSS_FORCED_ON_READ)) { - DDprintf1("Forcing recon on psid %ld\n", psid); - pssPtr->flags |= RF_PSS_FORCED_ON_WRITE; /* mark this RU as under - * forced recon */ - pssPtr->flags &= ~RF_PSS_RECON_BLOCKED; /* clear the blockage - * that we just set */ - fcol = raidPtr->reconControl[row]->fcol; - - /* get a listing of the disks comprising the indicated stripe */ - (raidPtr->Layout.map->IdentifyStripe) (raidPtr, asmap->raidAddress, &diskids, &stripe); - RF_ASSERT(row == stripe); - - /* For previously issued reads, elevate them to normal - * priority. If the I/O has already completed, it won't be - * found in the queue, and hence this will be a no-op. For - * unissued reads, allocate buffers and issue new reads. The - * fact that we've set the FORCED bit means that the regular - * recon procs will not re-issue these reqs */ - for (i = 0; i < raidPtr->Layout.numDataCol + raidPtr->Layout.numParityCol; i++) - if ((diskno = diskids[i]) != fcol) { - if (pssPtr->issued[diskno]) { - nPromoted = rf_DiskIOPromote(&raidPtr->Queues[row][diskno], psid, which_ru); - if (rf_reconDebug && nPromoted) - printf("raid%d: promoted read from row %d col %d\n", raidPtr->raidid, row, diskno); - } else { - new_rbuf = rf_MakeReconBuffer(raidPtr, row, diskno, RF_RBUF_TYPE_FORCED); /* create new buf */ - ComputePSDiskOffsets(raidPtr, psid, row, diskno, &offset, &fd_offset, - &new_rbuf->spRow, &new_rbuf->spCol, &new_rbuf->spOffset); /* find offsets & spare - * location */ - new_rbuf->parityStripeID = psid; /* fill in the buffer */ - new_rbuf->which_ru = which_ru; - new_rbuf->failedDiskSectorOffset = fd_offset; - new_rbuf->priority = RF_IO_NORMAL_PRIORITY; - - /* use NULL b_proc b/c all addrs - * should be in kernel space */ - req = rf_CreateDiskQueueData(RF_IO_TYPE_READ, offset + which_ru * sectorsPerRU, sectorsPerRU, new_rbuf->buffer, - psid, which_ru, (int (*) (void *, int)) ForceReconReadDoneProc, (void *) new_rbuf, NULL, - NULL, (void *) raidPtr, 0, NULL); - - RF_ASSERT(req); /* XXX -- fix this -- - * XXX */ - - new_rbuf->arg = req; - rf_DiskIOEnqueue(&raidPtr->Queues[row][diskno], req, RF_IO_NORMAL_PRIORITY); /* enqueue the I/O */ - Dprintf3("raid%d: Issued new read req on row %d col %d\n", raidPtr->raidid, row, diskno); - } - } - /* if the write is sitting in the disk queue, elevate its - * priority */ - if (rf_DiskIOPromote(&raidPtr->Queues[row][fcol], psid, which_ru)) - printf("raid%d: promoted write to row %d col %d\n", - raidPtr->raidid, row, fcol); - } - /* install a callback descriptor to be invoked when recon completes on - * this parity stripe. */ - cb = rf_AllocCallbackDesc(); - /* XXX the following is bogus.. These functions don't really match!! - * GO */ - cb->callbackFunc = (void (*) (RF_CBParam_t)) cbFunc; - cb->callbackArg.p = (void *) cbArg; - cb->next = pssPtr->procWaitList; - pssPtr->procWaitList = cb; - DDprintf2("raid%d: Waiting for forced recon on psid %ld\n", - raidPtr->raidid, psid); - - RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid); - return (1); -} -/* called upon the completion of a forced reconstruction read. - * all we do is schedule the FORCEDREADONE event. - * called at interrupt context in the kernel, so don't do anything illegal here. - */ -static void -ForceReconReadDoneProc(arg, status) - void *arg; - int status; -{ - RF_ReconBuffer_t *rbuf = arg; - - if (status) { - printf("Forced recon read failed!\n"); /* fprintf(stderr,"Forced - * recon read - * failed!\n"); */ - RF_PANIC(); - } - rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, rbuf->row, rbuf->col, (void *) rbuf, RF_REVENT_FORCEDREADDONE); -} -/* releases a block on the reconstruction of the indicated stripe */ -int -rf_UnblockRecon(raidPtr, asmap) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *asmap; -{ - RF_RowCol_t row = asmap->origRow; - RF_StripeNum_t stripeID = asmap->stripeID; - RF_ReconParityStripeStatus_t *pssPtr; - RF_ReconUnitNum_t which_ru; - RF_StripeNum_t psid; - int created = 0; - RF_CallbackDesc_t *cb; - - psid = rf_MapStripeIDToParityStripeID(&raidPtr->Layout, stripeID, &which_ru); - RF_LOCK_PSS_MUTEX(raidPtr, row, psid); - pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl[row]->pssTable, psid, which_ru, RF_PSS_NONE, &created); - - /* When recon is forced, the pss desc can get deleted before we get - * back to unblock recon. But, this can _only_ happen when recon is - * forced. It would be good to put some kind of sanity check here, but - * how to decide if recon was just forced or not? */ - if (!pssPtr) { - /* printf("Warning: no pss descriptor upon unblock on psid %ld - * RU %d\n",psid,which_ru); */ - if (rf_reconDebug || rf_pssDebug) - printf("Warning: no pss descriptor upon unblock on psid %ld RU %d\n", (long) psid, which_ru); - goto out; - } - pssPtr->blockCount--; - Dprintf3("raid%d: unblocking recon on psid %ld: blockcount is %d\n", - raidPtr->raidid, psid, pssPtr->blockCount); - if (pssPtr->blockCount == 0) { /* if recon blockage has been released */ - - /* unblock recon before calling CauseReconEvent in case - * CauseReconEvent causes us to try to issue a new read before - * returning here. */ - pssPtr->flags &= ~RF_PSS_RECON_BLOCKED; - - - while (pssPtr->blockWaitList) { - /* spin through the block-wait list and - release all the waiters */ - cb = pssPtr->blockWaitList; - pssPtr->blockWaitList = cb->next; - cb->next = NULL; - rf_CauseReconEvent(raidPtr, cb->row, cb->col, NULL, RF_REVENT_BLOCKCLEAR); - rf_FreeCallbackDesc(cb); - } - if (!(pssPtr->flags & RF_PSS_UNDER_RECON)) { - /* if no recon was requested while recon was blocked */ - rf_PSStatusDelete(raidPtr, raidPtr->reconControl[row]->pssTable, pssPtr); - } - } -out: - RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid); - return (0); -} diff --git a/sys/dev/raidframe/rf_reconstruct.h b/sys/dev/raidframe/rf_reconstruct.h deleted file mode 100644 index 318d546..0000000 --- a/sys/dev/raidframe/rf_reconstruct.h +++ /dev/null @@ -1,202 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_reconstruct.h,v 1.5 2000/05/28 00:48:30 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/********************************************************* - * rf_reconstruct.h -- header file for reconstruction code - *********************************************************/ - -#ifndef _RF__RF_RECONSTRUCT_H_ -#define _RF__RF_RECONSTRUCT_H_ - -#include <dev/raidframe/rf_types.h> -#include <sys/time.h> -#include <dev/raidframe/rf_reconmap.h> -#include <dev/raidframe/rf_psstatus.h> - -/* reconstruction configuration information */ -struct RF_ReconConfig_s { - unsigned numFloatingReconBufs; /* number of floating recon bufs to - * use */ - RF_HeadSepLimit_t headSepLimit; /* how far apart the heads are allow - * to become, in parity stripes */ -}; -/* a reconstruction buffer */ -struct RF_ReconBuffer_s { - RF_Raid_t *raidPtr; /* void * to avoid recursive includes */ - caddr_t buffer; /* points to the data */ - RF_StripeNum_t parityStripeID; /* the parity stripe that this data - * relates to */ - int which_ru; /* which reconstruction unit within the PSS */ - RF_SectorNum_t failedDiskSectorOffset; /* the offset into the failed - * disk */ - RF_RowCol_t row, col; /* which disk this buffer belongs to or is - * targeted at */ - RF_StripeCount_t count; /* counts the # of SUs installed so far */ - int priority; /* used to force hi priority recon */ - RF_RbufType_t type; /* FORCED or FLOATING */ - char *arrived; /* [x] = 1/0 if SU from disk x has/hasn't - * arrived */ - RF_ReconBuffer_t *next; /* used for buffer management */ - void *arg; /* generic field for general use */ - RF_RowCol_t spRow, spCol; /* spare disk to which this buf should - * be written */ - /* if dist sparing off, always identifies the replacement disk */ - RF_SectorNum_t spOffset;/* offset into the spare disk */ - /* if dist sparing off, identical to failedDiskSectorOffset */ - RF_ReconParityStripeStatus_t *pssPtr; /* debug- pss associated with - * issue-pending write */ -}; -/* a reconstruction event descriptor. The event types currently are: - * RF_REVENT_READDONE -- a read operation has completed - * RF_REVENT_WRITEDONE -- a write operation has completed - * RF_REVENT_BUFREADY -- the buffer manager has produced a full buffer - * RF_REVENT_BLOCKCLEAR -- a reconstruction blockage has been cleared - * RF_REVENT_BUFCLEAR -- the buffer manager has released a process blocked on submission - * RF_REVENT_SKIP -- we need to skip the current RU and go on to the next one, typ. b/c we found recon forced - * RF_REVENT_FORCEDREADONE- a forced-reconstructoin read operation has completed - */ -typedef enum RF_Revent_e { - RF_REVENT_READDONE, - RF_REVENT_WRITEDONE, - RF_REVENT_BUFREADY, - RF_REVENT_BLOCKCLEAR, - RF_REVENT_BUFCLEAR, - RF_REVENT_HEADSEPCLEAR, - RF_REVENT_SKIP, - RF_REVENT_FORCEDREADDONE -} RF_Revent_t; - -struct RF_ReconEvent_s { - RF_Revent_t type; /* what kind of event has occurred */ - RF_RowCol_t col; /* row ID is implicit in the queue in which - * the event is placed */ - void *arg; /* a generic argument */ - RF_ReconEvent_t *next; -}; -/* - * Reconstruction control information maintained per-disk - * (for surviving disks) - */ -struct RF_PerDiskReconCtrl_s { - RF_ReconCtrl_t *reconCtrl; - RF_RowCol_t row, col; /* to make this structure self-identifying */ - RF_StripeNum_t curPSID; /* the next parity stripe ID to check on this - * disk */ - RF_HeadSepLimit_t headSepCounter; /* counter used to control - * maximum head separation */ - RF_SectorNum_t diskOffset; /* the offset into the indicated disk - * of the current PU */ - RF_ReconUnitNum_t ru_count; /* this counts off the recon units - * within each parity unit */ - RF_ReconBuffer_t *rbuf; /* the recon buffer assigned to this disk */ -}; -/* main reconstruction control structure */ -struct RF_ReconCtrl_s { - RF_RaidReconDesc_t *reconDesc; - RF_RowCol_t fcol; /* which column has failed */ - RF_PerDiskReconCtrl_t *perDiskInfo; /* information maintained - * per-disk */ - RF_ReconMap_t *reconMap;/* map of what has/has not been reconstructed */ - RF_RowCol_t spareRow; /* which of the spare disks we're using */ - RF_RowCol_t spareCol; - RF_StripeNum_t lastPSID;/* the ID of the last parity stripe we want - * reconstructed */ - int percentComplete;/* percentage completion of reconstruction */ - int numRUsComplete; /* number of Reconstruction Units done */ - int numRUsTotal; /* total number of Reconstruction Units */ - - /* reconstruction event queue */ - RF_ReconEvent_t *eventQueue; /* queue of pending reconstruction - * events */ - RF_DECLARE_MUTEX(eq_mutex) /* mutex for locking event - * queue */ - RF_DECLARE_COND(eq_cond) /* condition variable for - * signalling recon events */ - int eq_count; /* debug only */ - - /* reconstruction buffer management */ - RF_DECLARE_MUTEX(rb_mutex) /* mutex for messing around - * with recon buffers */ - RF_ReconBuffer_t *floatingRbufs; /* available floating - * reconstruction buffers */ - RF_ReconBuffer_t *committedRbufs; /* recon buffers that have - * been committed to some - * waiting disk */ - RF_ReconBuffer_t *fullBufferList; /* full buffers waiting to be - * written out */ - RF_ReconBuffer_t *priorityList; /* full buffers that have been - * elevated to higher priority */ - RF_CallbackDesc_t *bufferWaitList; /* disks that are currently - * blocked waiting for buffers */ - - /* parity stripe status table */ - RF_PSStatusHeader_t *pssTable; /* stores the reconstruction status of - * active parity stripes */ - - /* maximum-head separation control */ - RF_HeadSepLimit_t minHeadSepCounter; /* the minimum hs counter over - * all disks */ - RF_CallbackDesc_t *headSepCBList; /* list of callbacks to be - * done as minPSID advances */ - - /* performance monitoring */ - struct timeval starttime; /* recon start time */ - - void (*continueFunc) (void *); /* function to call when io - * returns */ - void *continueArg; /* argument for Func */ -}; -/* the default priority for reconstruction accesses */ -#define RF_IO_RECON_PRIORITY RF_IO_LOW_PRIORITY - -int rf_ConfigureReconstruction(RF_ShutdownList_t ** listp); - -int -rf_ReconstructFailedDisk(RF_Raid_t * raidPtr, RF_RowCol_t row, - RF_RowCol_t col); - -int -rf_ReconstructFailedDiskBasic(RF_Raid_t * raidPtr, RF_RowCol_t row, - RF_RowCol_t col); - -int -rf_ReconstructInPlace(RF_Raid_t * raidPtr, RF_RowCol_t row, RF_RowCol_t col); - -int rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t * reconDesc); - -int -rf_ForceOrBlockRecon(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, - void (*cbFunc) (RF_Raid_t *, void *), void *cbArg); - - int rf_UnblockRecon(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap); - - int rf_RegisterReconDoneProc(RF_Raid_t * raidPtr, void (*proc) (RF_Raid_t *, void *), void *arg, - RF_ReconDoneProc_t ** handlep); - -#endif /* !_RF__RF_RECONSTRUCT_H_ */ diff --git a/sys/dev/raidframe/rf_reconutil.c b/sys/dev/raidframe/rf_reconutil.c deleted file mode 100644 index bafff69..0000000 --- a/sys/dev/raidframe/rf_reconutil.c +++ /dev/null @@ -1,338 +0,0 @@ -/* $NetBSD: rf_reconutil.c,v 1.3 1999/02/05 00:06:17 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/******************************************** - * rf_reconutil.c -- reconstruction utilities - ********************************************/ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_desc.h> -#include <dev/raidframe/rf_reconutil.h> -#include <dev/raidframe/rf_reconbuffer.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_decluster.h> -#include <dev/raidframe/rf_raid5_rotatedspare.h> -#include <dev/raidframe/rf_interdecluster.h> -#include <dev/raidframe/rf_chaindecluster.h> - -/******************************************************************* - * allocates/frees the reconstruction control information structures - *******************************************************************/ -RF_ReconCtrl_t * -rf_MakeReconControl(reconDesc, frow, fcol, srow, scol) - RF_RaidReconDesc_t *reconDesc; - RF_RowCol_t frow; /* failed row and column */ - RF_RowCol_t fcol; - RF_RowCol_t srow; /* identifies which spare we're using */ - RF_RowCol_t scol; -{ - RF_Raid_t *raidPtr = reconDesc->raidPtr; - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_ReconUnitCount_t RUsPerPU = layoutPtr->SUsPerPU / layoutPtr->SUsPerRU; - RF_ReconUnitCount_t numSpareRUs; - RF_ReconCtrl_t *reconCtrlPtr; - RF_ReconBuffer_t *rbuf; - RF_LayoutSW_t *lp; - int retcode, rc; - RF_RowCol_t i; - - lp = raidPtr->Layout.map; - - /* make and zero the global reconstruction structure and the per-disk - * structure */ - RF_Calloc(reconCtrlPtr, 1, sizeof(RF_ReconCtrl_t), (RF_ReconCtrl_t *)); - RF_Calloc(reconCtrlPtr->perDiskInfo, raidPtr->numCol, sizeof(RF_PerDiskReconCtrl_t), (RF_PerDiskReconCtrl_t *)); /* this zeros it */ - reconCtrlPtr->reconDesc = reconDesc; - reconCtrlPtr->fcol = fcol; - reconCtrlPtr->spareRow = srow; - reconCtrlPtr->spareCol = scol; - reconCtrlPtr->lastPSID = layoutPtr->numStripe / layoutPtr->SUsPerPU; - reconCtrlPtr->percentComplete = 0; - - /* initialize each per-disk recon information structure */ - for (i = 0; i < raidPtr->numCol; i++) { - reconCtrlPtr->perDiskInfo[i].reconCtrl = reconCtrlPtr; - reconCtrlPtr->perDiskInfo[i].row = frow; - reconCtrlPtr->perDiskInfo[i].col = i; - reconCtrlPtr->perDiskInfo[i].curPSID = -1; /* make it appear as if - * we just finished an - * RU */ - reconCtrlPtr->perDiskInfo[i].ru_count = RUsPerPU - 1; - } - - /* Get the number of spare units per disk and the sparemap in case - * spare is distributed */ - - if (lp->GetNumSpareRUs) { - numSpareRUs = lp->GetNumSpareRUs(raidPtr); - } else { - numSpareRUs = 0; - } - - /* - * Not all distributed sparing archs need dynamic mappings - */ - if (lp->InstallSpareTable) { - retcode = rf_InstallSpareTable(raidPtr, frow, fcol); - if (retcode) { - RF_PANIC(); /* XXX fix this */ - } - } - /* make the reconstruction map */ - reconCtrlPtr->reconMap = rf_MakeReconMap(raidPtr, (int) (layoutPtr->SUsPerRU * layoutPtr->sectorsPerStripeUnit), - raidPtr->sectorsPerDisk, numSpareRUs); - - /* make the per-disk reconstruction buffers */ - for (i = 0; i < raidPtr->numCol; i++) { - reconCtrlPtr->perDiskInfo[i].rbuf = (i == fcol) ? NULL : rf_MakeReconBuffer(raidPtr, frow, i, RF_RBUF_TYPE_EXCLUSIVE); - } - - /* initialize the event queue */ - rc = rf_mutex_init(&reconCtrlPtr->eq_mutex, __FUNCTION__); - if (rc) { - /* XXX deallocate, cleanup */ - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return (NULL); - } - rc = rf_cond_init(&reconCtrlPtr->eq_cond); - if (rc) { - /* XXX deallocate, cleanup */ - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return (NULL); - } - reconCtrlPtr->eventQueue = NULL; - reconCtrlPtr->eq_count = 0; - - /* make the floating recon buffers and append them to the free list */ - rc = rf_mutex_init(&reconCtrlPtr->rb_mutex, __FUNCTION__); - if (rc) { - /* XXX deallocate, cleanup */ - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return (NULL); - } - reconCtrlPtr->fullBufferList = NULL; - reconCtrlPtr->priorityList = NULL; - reconCtrlPtr->floatingRbufs = NULL; - reconCtrlPtr->committedRbufs = NULL; - for (i = 0; i < raidPtr->numFloatingReconBufs; i++) { - rbuf = rf_MakeReconBuffer(raidPtr, frow, fcol, RF_RBUF_TYPE_FLOATING); - rbuf->next = reconCtrlPtr->floatingRbufs; - reconCtrlPtr->floatingRbufs = rbuf; - } - - /* create the parity stripe status table */ - reconCtrlPtr->pssTable = rf_MakeParityStripeStatusTable(raidPtr); - - /* set the initial min head sep counter val */ - reconCtrlPtr->minHeadSepCounter = 0; - - return (reconCtrlPtr); -} - -void -rf_FreeReconControl(raidPtr, row) - RF_Raid_t *raidPtr; - RF_RowCol_t row; -{ - RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[row]; - RF_ReconBuffer_t *t; - RF_ReconUnitNum_t i; - - RF_ASSERT(reconCtrlPtr); - for (i = 0; i < raidPtr->numCol; i++) - if (reconCtrlPtr->perDiskInfo[i].rbuf) - rf_FreeReconBuffer(reconCtrlPtr->perDiskInfo[i].rbuf); - for (i = 0; i < raidPtr->numFloatingReconBufs; i++) { - t = reconCtrlPtr->floatingRbufs; - RF_ASSERT(t); - reconCtrlPtr->floatingRbufs = t->next; - rf_FreeReconBuffer(t); - } - rf_mutex_destroy(&reconCtrlPtr->rb_mutex); - rf_mutex_destroy(&reconCtrlPtr->eq_mutex); - rf_cond_destroy(&reconCtrlPtr->eq_cond); - rf_FreeReconMap(reconCtrlPtr->reconMap); - rf_FreeParityStripeStatusTable(raidPtr, reconCtrlPtr->pssTable); - RF_Free(reconCtrlPtr->perDiskInfo, raidPtr->numCol * sizeof(RF_PerDiskReconCtrl_t)); - RF_Free(reconCtrlPtr, sizeof(*reconCtrlPtr)); -} - - -/****************************************************************************** - * computes the default head separation limit - *****************************************************************************/ -RF_HeadSepLimit_t -rf_GetDefaultHeadSepLimit(raidPtr) - RF_Raid_t *raidPtr; -{ - RF_HeadSepLimit_t hsl; - RF_LayoutSW_t *lp; - - lp = raidPtr->Layout.map; - if (lp->GetDefaultHeadSepLimit == NULL) - return (-1); - hsl = lp->GetDefaultHeadSepLimit(raidPtr); - return (hsl); -} - - -/****************************************************************************** - * computes the default number of floating recon buffers - *****************************************************************************/ -int -rf_GetDefaultNumFloatingReconBuffers(raidPtr) - RF_Raid_t *raidPtr; -{ - RF_LayoutSW_t *lp; - int nrb; - - lp = raidPtr->Layout.map; - if (lp->GetDefaultNumFloatingReconBuffers == NULL) - return (3 * raidPtr->numCol); - nrb = lp->GetDefaultNumFloatingReconBuffers(raidPtr); - return (nrb); -} - - -/****************************************************************************** - * creates and initializes a reconstruction buffer - *****************************************************************************/ -RF_ReconBuffer_t * -rf_MakeReconBuffer( - RF_Raid_t * raidPtr, - RF_RowCol_t row, - RF_RowCol_t col, - RF_RbufType_t type) -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_ReconBuffer_t *t; - u_int recon_buffer_size = rf_RaidAddressToByte(raidPtr, layoutPtr->SUsPerRU * layoutPtr->sectorsPerStripeUnit); - - RF_Malloc(t, sizeof(RF_ReconBuffer_t), (RF_ReconBuffer_t *)); - RF_Malloc(t->buffer, recon_buffer_size, (caddr_t)); - RF_Malloc(t->arrived, raidPtr->numCol * sizeof(char), (char *)); - t->raidPtr = raidPtr; - t->row = row; - t->col = col; - t->priority = RF_IO_RECON_PRIORITY; - t->type = type; - t->pssPtr = NULL; - t->next = NULL; - return (t); -} -/****************************************************************************** - * frees a reconstruction buffer - *****************************************************************************/ -void -rf_FreeReconBuffer(rbuf) - RF_ReconBuffer_t *rbuf; -{ - RF_Raid_t *raidPtr = rbuf->raidPtr; - u_int recon_buffer_size = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.SUsPerRU * raidPtr->Layout.sectorsPerStripeUnit); - - RF_Free(rbuf->arrived, raidPtr->numCol * sizeof(char)); - RF_Free(rbuf->buffer, recon_buffer_size); - RF_Free(rbuf, sizeof(*rbuf)); -} - - -/****************************************************************************** - * debug only: sanity check the number of floating recon bufs in use - *****************************************************************************/ -void -rf_CheckFloatingRbufCount(raidPtr, dolock) - RF_Raid_t *raidPtr; - int dolock; -{ - RF_ReconParityStripeStatus_t *p; - RF_PSStatusHeader_t *pssTable; - RF_ReconBuffer_t *rbuf; - int i, j, sum = 0; - RF_RowCol_t frow = 0; - - for (i = 0; i < raidPtr->numRow; i++) - if (raidPtr->reconControl[i]) { - frow = i; - break; - } - RF_ASSERT(frow >= 0); - - if (dolock) - RF_LOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex); - pssTable = raidPtr->reconControl[frow]->pssTable; - - for (i = 0; i < raidPtr->pssTableSize; i++) { - RF_LOCK_MUTEX(pssTable[i].mutex); - for (p = pssTable[i].chain; p; p = p->next) { - rbuf = (RF_ReconBuffer_t *) p->rbuf; - if (rbuf && rbuf->type == RF_RBUF_TYPE_FLOATING) - sum++; - - rbuf = (RF_ReconBuffer_t *) p->writeRbuf; - if (rbuf && rbuf->type == RF_RBUF_TYPE_FLOATING) - sum++; - - for (j = 0; j < p->xorBufCount; j++) { - rbuf = (RF_ReconBuffer_t *) p->rbufsForXor[j]; - RF_ASSERT(rbuf); - if (rbuf->type == RF_RBUF_TYPE_FLOATING) - sum++; - } - } - RF_UNLOCK_MUTEX(pssTable[i].mutex); - } - - for (rbuf = raidPtr->reconControl[frow]->floatingRbufs; rbuf; rbuf = rbuf->next) { - if (rbuf->type == RF_RBUF_TYPE_FLOATING) - sum++; - } - for (rbuf = raidPtr->reconControl[frow]->committedRbufs; rbuf; rbuf = rbuf->next) { - if (rbuf->type == RF_RBUF_TYPE_FLOATING) - sum++; - } - for (rbuf = raidPtr->reconControl[frow]->fullBufferList; rbuf; rbuf = rbuf->next) { - if (rbuf->type == RF_RBUF_TYPE_FLOATING) - sum++; - } - for (rbuf = raidPtr->reconControl[frow]->priorityList; rbuf; rbuf = rbuf->next) { - if (rbuf->type == RF_RBUF_TYPE_FLOATING) - sum++; - } - - RF_ASSERT(sum == raidPtr->numFloatingReconBufs); - - if (dolock) - RF_UNLOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex); -} diff --git a/sys/dev/raidframe/rf_reconutil.h b/sys/dev/raidframe/rf_reconutil.h deleted file mode 100644 index 744d7b9..0000000 --- a/sys/dev/raidframe/rf_reconutil.h +++ /dev/null @@ -1,52 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_reconutil.h,v 1.3 1999/02/05 00:06:17 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/************************************************************ - * rf_reconutil.h -- header file for reconstruction utilities - ************************************************************/ - -#ifndef _RF__RF_RECONUTIL_H_ -#define _RF__RF_RECONUTIL_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_reconstruct.h> - -RF_ReconCtrl_t * -rf_MakeReconControl(RF_RaidReconDesc_t * reconDesc, - RF_RowCol_t frow, RF_RowCol_t fcol, RF_RowCol_t srow, RF_RowCol_t scol); -void rf_FreeReconControl(RF_Raid_t * raidPtr, RF_RowCol_t row); -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimit(RF_Raid_t * raidPtr); -int rf_GetDefaultNumFloatingReconBuffers(RF_Raid_t * raidPtr); -RF_ReconBuffer_t * -rf_MakeReconBuffer(RF_Raid_t * raidPtr, RF_RowCol_t row, - RF_RowCol_t col, RF_RbufType_t type); -void rf_FreeReconBuffer(RF_ReconBuffer_t * rbuf); -void rf_CheckFloatingRbufCount(RF_Raid_t * raidPtr, int dolock); - -#endif /* !_RF__RF_RECONUTIL_H_ */ diff --git a/sys/dev/raidframe/rf_revent.c b/sys/dev/raidframe/rf_revent.c deleted file mode 100644 index fcdf82e..0000000 --- a/sys/dev/raidframe/rf_revent.c +++ /dev/null @@ -1,230 +0,0 @@ -/* $NetBSD: rf_revent.c,v 1.9 2000/09/21 01:45:46 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ -/* - * revent.c -- reconstruction event handling code - */ - -#include <sys/errno.h> - -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_revent.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_freelist.h> -#include <dev/raidframe/rf_desc.h> -#include <dev/raidframe/rf_shutdown.h> -#include <dev/raidframe/rf_kintf.h> - -static RF_FreeList_t *rf_revent_freelist; -#define RF_MAX_FREE_REVENT 128 -#define RF_REVENT_INC 8 -#define RF_REVENT_INITIAL 8 - - - -#include <sys/proc.h> -#include <sys/kernel.h> - -#define DO_WAIT(_rc) \ - RF_LTSLEEP(&(_rc)->eventQueue, PRIBIO, "raidframe eventq", \ - 0, &((_rc)->eq_mutex)) - -#define DO_SIGNAL(_rc) wakeup(&(_rc)->eventQueue) - - -static void rf_ShutdownReconEvent(void *); - -static RF_ReconEvent_t * -GetReconEventDesc(RF_RowCol_t row, RF_RowCol_t col, - void *arg, RF_Revent_t type); - -static void rf_ShutdownReconEvent(ignored) - void *ignored; -{ - RF_FREELIST_DESTROY(rf_revent_freelist, next, (RF_ReconEvent_t *)); -} - -int -rf_ConfigureReconEvent(listp) - RF_ShutdownList_t **listp; -{ - int rc; - - RF_FREELIST_CREATE(rf_revent_freelist, RF_MAX_FREE_REVENT, - RF_REVENT_INC, sizeof(RF_ReconEvent_t)); - if (rf_revent_freelist == NULL) - return (ENOMEM); - rc = rf_ShutdownCreate(listp, rf_ShutdownReconEvent, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownReconEvent(NULL); - return (rc); - } - RF_FREELIST_PRIME(rf_revent_freelist, RF_REVENT_INITIAL, next, - (RF_ReconEvent_t *)); - return (0); -} - -/* returns the next reconstruction event, blocking the calling thread - * until one becomes available. will now return null if it is blocked - * or will return an event if it is not */ - -RF_ReconEvent_t * -rf_GetNextReconEvent(reconDesc, row, continueFunc, continueArg) - RF_RaidReconDesc_t *reconDesc; - RF_RowCol_t row; - void (*continueFunc) (void *); - void *continueArg; -{ - RF_Raid_t *raidPtr = reconDesc->raidPtr; - RF_ReconCtrl_t *rctrl = raidPtr->reconControl[row]; - RF_ReconEvent_t *event; - - RF_ASSERT(row >= 0 && row <= raidPtr->numRow); - RF_LOCK_MUTEX(rctrl->eq_mutex); - /* q null and count==0 must be equivalent conditions */ - RF_ASSERT((rctrl->eventQueue == NULL) == (rctrl->eq_count == 0)); - - rctrl->continueFunc = continueFunc; - rctrl->continueArg = continueArg; - - - /* mpsleep timeout value: secs = timo_val/hz. 'ticks' here is - defined as cycle-counter ticks, not softclock ticks */ - -#define MAX_RECON_EXEC_USECS (100 * 1000) /* 100 ms */ -#define RECON_DELAY_MS 25 -#define RECON_TIMO ((RECON_DELAY_MS * hz) / 1000) - - /* we are not pre-emptible in the kernel, but we don't want to run - * forever. If we run w/o blocking for more than MAX_RECON_EXEC_TICKS - * ticks of the cycle counter, delay for RECON_DELAY before - * continuing. this may murder us with context switches, so we may - * need to increase both the MAX...TICKS and the RECON_DELAY_MS. */ - if (reconDesc->reconExecTimerRunning) { - int status; - - RF_ETIMER_STOP(reconDesc->recon_exec_timer); - RF_ETIMER_EVAL(reconDesc->recon_exec_timer); - reconDesc->reconExecTicks += - RF_ETIMER_VAL_US(reconDesc->recon_exec_timer); - if (reconDesc->reconExecTicks > reconDesc->maxReconExecTicks) - reconDesc->maxReconExecTicks = - reconDesc->reconExecTicks; - if (reconDesc->reconExecTicks >= MAX_RECON_EXEC_USECS) { - /* we've been running too long. delay for - * RECON_DELAY_MS */ -#if RF_RECON_STATS > 0 - reconDesc->numReconExecDelays++; -#endif /* RF_RECON_STATS > 0 */ - - status = RF_LTSLEEP(&reconDesc->reconExecTicks, PRIBIO, - "recon delay", RECON_TIMO, - &rctrl->eq_mutex); - RF_ASSERT(status == EWOULDBLOCK); - reconDesc->reconExecTicks = 0; - } - } - while (!rctrl->eventQueue) { -#if RF_RECON_STATS > 0 - reconDesc->numReconEventWaits++; -#endif /* RF_RECON_STATS > 0 */ - DO_WAIT(rctrl); - reconDesc->reconExecTicks = 0; /* we've just waited */ - } - - reconDesc->reconExecTimerRunning = 1; - if (RF_ETIMER_VAL_US(reconDesc->recon_exec_timer)!=0) { - /* it moved!! reset the timer. */ - RF_ETIMER_START(reconDesc->recon_exec_timer); - } - event = rctrl->eventQueue; - rctrl->eventQueue = event->next; - event->next = NULL; - rctrl->eq_count--; - - /* q null and count==0 must be equivalent conditions */ - RF_ASSERT((rctrl->eventQueue == NULL) == (rctrl->eq_count == 0)); - RF_UNLOCK_MUTEX(rctrl->eq_mutex); - return (event); -} -/* enqueues a reconstruction event on the indicated queue */ -void -rf_CauseReconEvent(raidPtr, row, col, arg, type) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_RowCol_t col; - void *arg; - RF_Revent_t type; -{ - RF_ReconCtrl_t *rctrl = raidPtr->reconControl[row]; - RF_ReconEvent_t *event = GetReconEventDesc(row, col, arg, type); - - if (type == RF_REVENT_BUFCLEAR) { - RF_ASSERT(col != rctrl->fcol); - } - RF_ASSERT(row >= 0 && row <= raidPtr->numRow && col >= 0 && col <= raidPtr->numCol); - RF_LOCK_MUTEX(rctrl->eq_mutex); - /* q null and count==0 must be equivalent conditions */ - RF_ASSERT((rctrl->eventQueue == NULL) == (rctrl->eq_count == 0)); - event->next = rctrl->eventQueue; - rctrl->eventQueue = event; - rctrl->eq_count++; - RF_UNLOCK_MUTEX(rctrl->eq_mutex); - - DO_SIGNAL(rctrl); -} -/* allocates and initializes a recon event descriptor */ -static RF_ReconEvent_t * -GetReconEventDesc(row, col, arg, type) - RF_RowCol_t row; - RF_RowCol_t col; - void *arg; - RF_Revent_t type; -{ - RF_ReconEvent_t *t; - - RF_FREELIST_GET(rf_revent_freelist, t, next, (RF_ReconEvent_t *)); - if (t == NULL) - return (NULL); - t->col = col; - t->arg = arg; - t->type = type; - return (t); -} - -void -rf_FreeReconEventDesc(event) - RF_ReconEvent_t *event; -{ - RF_FREELIST_FREE(rf_revent_freelist, event, next); -} diff --git a/sys/dev/raidframe/rf_revent.h b/sys/dev/raidframe/rf_revent.h deleted file mode 100644 index 51c3202..0000000 --- a/sys/dev/raidframe/rf_revent.h +++ /dev/null @@ -1,52 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_revent.h,v 1.3 1999/02/05 00:06:17 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/******************************************************************* - * - * rf_revent.h -- header file for reconstruction event handling code - * - *******************************************************************/ - -#ifndef _RF__RF_REVENT_H_ -#define _RF__RF_REVENT_H_ - -#include <dev/raidframe/rf_types.h> - -int rf_ConfigureReconEvent(RF_ShutdownList_t ** listp); - -RF_ReconEvent_t * -rf_GetNextReconEvent(RF_RaidReconDesc_t * reconDesc, - RF_RowCol_t row, void (*continueFunc) (void *), void *continueArg); - - void rf_CauseReconEvent(RF_Raid_t * raidPtr, RF_RowCol_t row, RF_RowCol_t col, - void *arg, RF_Revent_t type); - - void rf_FreeReconEventDesc(RF_ReconEvent_t * event); - -#endif /* !_RF__RF_REVENT_H_ */ diff --git a/sys/dev/raidframe/rf_shutdown.c b/sys/dev/raidframe/rf_shutdown.c deleted file mode 100644 index e6b5292..0000000 --- a/sys/dev/raidframe/rf_shutdown.c +++ /dev/null @@ -1,104 +0,0 @@ -/* $NetBSD: rf_shutdown.c,v 1.6 2000/01/13 23:41:18 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * rf_shutdown.c - */ -/* - * Copyright (c) 1996 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ -/* - * Maintain lists of cleanup functions. Also, mechanisms for coordinating - * thread startup and shutdown. - */ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_shutdown.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_freelist.h> - -static void -rf_FreeShutdownEnt(RF_ShutdownList_t * ent) -{ - FREE(ent, M_RAIDFRAME); -} - -int -_rf_ShutdownCreate( - RF_ShutdownList_t ** listp, - void (*cleanup) (void *arg), - void *arg, - char *file, - int line) -{ - RF_ShutdownList_t *ent; - - /* - * Have to directly allocate memory here, since we start up before - * and shutdown after RAIDframe internal allocation system. - */ - /* ent = (RF_ShutdownList_t *) malloc(sizeof(RF_ShutdownList_t), - M_RAIDFRAME, M_WAITOK); */ - ent = (RF_ShutdownList_t *) malloc(sizeof(RF_ShutdownList_t), - M_RAIDFRAME, M_NOWAIT); - if (ent == NULL) - return (ENOMEM); - ent->cleanup = cleanup; - ent->arg = arg; - ent->file = file; - ent->line = line; - ent->next = *listp; - *listp = ent; - return (0); -} - -int -rf_ShutdownList(RF_ShutdownList_t ** list) -{ - RF_ShutdownList_t *r, *next; - char *file; - int line; - - for (r = *list; r; r = next) { - next = r->next; - file = r->file; - line = r->line; - - if (rf_shutdownDebug) { - printf("call shutdown, created %s:%d\n", file, line); - } - r->cleanup(r->arg); - - if (rf_shutdownDebug) { - printf("completed shutdown, created %s:%d\n", file, line); - } - rf_FreeShutdownEnt(r); - } - *list = NULL; - return (0); -} diff --git a/sys/dev/raidframe/rf_shutdown.h b/sys/dev/raidframe/rf_shutdown.h deleted file mode 100644 index 5abc5ba..0000000 --- a/sys/dev/raidframe/rf_shutdown.h +++ /dev/null @@ -1,67 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_shutdown.h,v 1.2 1999/02/05 00:06:17 oster Exp $ */ -/* - * rf_shutdown.h - */ -/* - * Copyright (c) 1996 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ -/* - * Maintain lists of cleanup functions. Also, mechanisms for coordinating - * thread startup and shutdown. - */ - -#ifndef _RF__RF_SHUTDOWN_H_ -#define _RF__RF_SHUTDOWN_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> - -/* - * Important note: the shutdown list is run like a stack, new - * entries pushed on top. Therefore, the most recently added - * entry (last started) is the first removed (stopped). This - * should handle system-dependencies pretty nicely- if a system - * is there when you start another, it'll be there when you - * shut down another. Hopefully, this subsystem will remove - * more complexity than it introduces. - */ - -struct RF_ShutdownList_s { - void (*cleanup) (void *arg); - void *arg; - char *file; - int line; - RF_ShutdownList_t *next; -}; -#define rf_ShutdownCreate(_listp_,_func_,_arg_) \ - _rf_ShutdownCreate(_listp_,_func_,_arg_,__FILE__,__LINE__) - -int _rf_ShutdownCreate(RF_ShutdownList_t ** listp, void (*cleanup) (void *arg), - void *arg, char *file, int line); -int rf_ShutdownList(RF_ShutdownList_t ** listp); - -#endif /* !_RF__RF_SHUTDOWN_H_ */ diff --git a/sys/dev/raidframe/rf_sstf.c b/sys/dev/raidframe/rf_sstf.c deleted file mode 100644 index cd9ea56..0000000 --- a/sys/dev/raidframe/rf_sstf.c +++ /dev/null @@ -1,658 +0,0 @@ -/* $NetBSD: rf_sstf.c,v 1.6 2001/01/27 20:18:55 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/******************************************************************************* - * - * sstf.c -- prioritized shortest seek time first disk queueing code - * - ******************************************************************************/ - -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_stripelocks.h> -#include <dev/raidframe/rf_layout.h> -#include <dev/raidframe/rf_diskqueue.h> -#include <dev/raidframe/rf_sstf.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_options.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_types.h> - -#define DIR_LEFT 1 -#define DIR_RIGHT 2 -#define DIR_EITHER 3 - -#define SNUM_DIFF(_a_,_b_) (((_a_)>(_b_))?((_a_)-(_b_)):((_b_)-(_a_))) - -#define QSUM(_sstfq_) (((_sstfq_)->lopri.qlen)+((_sstfq_)->left.qlen)+((_sstfq_)->right.qlen)) - - -static void -do_sstf_ord_q(RF_DiskQueueData_t **, - RF_DiskQueueData_t **, - RF_DiskQueueData_t *); - -static RF_DiskQueueData_t * -closest_to_arm(RF_SstfQ_t *, - RF_SectorNum_t, - int *, - int); -static void do_dequeue(RF_SstfQ_t *, RF_DiskQueueData_t *); - - -static void -do_sstf_ord_q(queuep, tailp, req) - RF_DiskQueueData_t **queuep; - RF_DiskQueueData_t **tailp; - RF_DiskQueueData_t *req; -{ - RF_DiskQueueData_t *r, *s; - - if (*queuep == NULL) { - *queuep = req; - *tailp = req; - req->next = NULL; - req->prev = NULL; - return; - } - if (req->sectorOffset <= (*queuep)->sectorOffset) { - req->next = *queuep; - req->prev = NULL; - (*queuep)->prev = req; - *queuep = req; - return; - } - if (req->sectorOffset > (*tailp)->sectorOffset) { - /* optimization */ - r = NULL; - s = *tailp; - goto q_at_end; - } - for (s = NULL, r = *queuep; r; s = r, r = r->next) { - if (r->sectorOffset >= req->sectorOffset) { - /* insert after s, before r */ - RF_ASSERT(s); - req->next = r; - r->prev = req; - s->next = req; - req->prev = s; - return; - } - } -q_at_end: - /* insert after s, at end of queue */ - RF_ASSERT(r == NULL); - RF_ASSERT(s); - RF_ASSERT(s == (*tailp)); - req->next = NULL; - req->prev = s; - s->next = req; - *tailp = req; -} -/* for removing from head-of-queue */ -#define DO_HEAD_DEQ(_r_,_q_) { \ - _r_ = (_q_)->queue; \ - RF_ASSERT((_r_) != NULL); \ - (_q_)->queue = (_r_)->next; \ - (_q_)->qlen--; \ - if ((_q_)->qlen == 0) { \ - RF_ASSERT((_r_) == (_q_)->qtail); \ - RF_ASSERT((_q_)->queue == NULL); \ - (_q_)->qtail = NULL; \ - } \ - else { \ - RF_ASSERT((_q_)->queue->prev == (_r_)); \ - (_q_)->queue->prev = NULL; \ - } \ -} - -/* for removing from end-of-queue */ -#define DO_TAIL_DEQ(_r_,_q_) { \ - _r_ = (_q_)->qtail; \ - RF_ASSERT((_r_) != NULL); \ - (_q_)->qtail = (_r_)->prev; \ - (_q_)->qlen--; \ - if ((_q_)->qlen == 0) { \ - RF_ASSERT((_r_) == (_q_)->queue); \ - RF_ASSERT((_q_)->qtail == NULL); \ - (_q_)->queue = NULL; \ - } \ - else { \ - RF_ASSERT((_q_)->qtail->next == (_r_)); \ - (_q_)->qtail->next = NULL; \ - } \ -} - -#define DO_BEST_DEQ(_l_,_r_,_q_) { \ - if (SNUM_DIFF((_q_)->queue->sectorOffset,_l_) \ - < SNUM_DIFF((_q_)->qtail->sectorOffset,_l_)) \ - { \ - DO_HEAD_DEQ(_r_,_q_); \ - } \ - else { \ - DO_TAIL_DEQ(_r_,_q_); \ - } \ -} - -static RF_DiskQueueData_t * -closest_to_arm(queue, arm_pos, dir, allow_reverse) - RF_SstfQ_t *queue; - RF_SectorNum_t arm_pos; - int *dir; - int allow_reverse; -{ - RF_SectorNum_t best_pos_l = 0, this_pos_l = 0, last_pos = 0; - RF_SectorNum_t best_pos_r = 0, this_pos_r = 0; - RF_DiskQueueData_t *r, *best_l, *best_r; - - best_r = best_l = NULL; - for (r = queue->queue; r; r = r->next) { - if (r->sectorOffset < arm_pos) { - if (best_l == NULL) { - best_l = r; - last_pos = best_pos_l = this_pos_l; - } else { - this_pos_l = arm_pos - r->sectorOffset; - if (this_pos_l < best_pos_l) { - best_l = r; - last_pos = best_pos_l = this_pos_l; - } else { - last_pos = this_pos_l; - } - } - } else { - if (best_r == NULL) { - best_r = r; - last_pos = best_pos_r = this_pos_r; - } else { - this_pos_r = r->sectorOffset - arm_pos; - if (this_pos_r < best_pos_r) { - best_r = r; - last_pos = best_pos_r = this_pos_r; - } else { - last_pos = this_pos_r; - } - if (this_pos_r > last_pos) { - /* getting farther away */ - break; - } - } - } - } - if ((best_r == NULL) && (best_l == NULL)) - return (NULL); - if ((*dir == DIR_RIGHT) && best_r) - return (best_r); - if ((*dir == DIR_LEFT) && best_l) - return (best_l); - if (*dir == DIR_EITHER) { - if (best_l == NULL) - return (best_r); - if (best_r == NULL) - return (best_l); - if (best_pos_r < best_pos_l) - return (best_r); - else - return (best_l); - } - /* - * Nothing in the direction we want to go. Reverse or - * reset the arm. We know we have an I/O in the other - * direction. - */ - if (allow_reverse) { - if (*dir == DIR_RIGHT) { - *dir = DIR_LEFT; - return (best_l); - } else { - *dir = DIR_RIGHT; - return (best_r); - } - } - /* - * Reset (beginning of queue). - */ - RF_ASSERT(*dir == DIR_RIGHT); - return (queue->queue); -} - -void * -rf_SstfCreate(sect_per_disk, cl_list, listp) - RF_SectorCount_t sect_per_disk; - RF_AllocListElem_t *cl_list; - RF_ShutdownList_t **listp; -{ - RF_Sstf_t *sstfq; - - RF_CallocAndAdd(sstfq, 1, sizeof(RF_Sstf_t), (RF_Sstf_t *), cl_list); - sstfq->dir = DIR_EITHER; - sstfq->allow_reverse = 1; - return ((void *) sstfq); -} - -void * -rf_ScanCreate(sect_per_disk, cl_list, listp) - RF_SectorCount_t sect_per_disk; - RF_AllocListElem_t *cl_list; - RF_ShutdownList_t **listp; -{ - RF_Sstf_t *scanq; - - RF_CallocAndAdd(scanq, 1, sizeof(RF_Sstf_t), (RF_Sstf_t *), cl_list); - scanq->dir = DIR_RIGHT; - scanq->allow_reverse = 1; - return ((void *) scanq); -} - -void * -rf_CscanCreate(sect_per_disk, cl_list, listp) - RF_SectorCount_t sect_per_disk; - RF_AllocListElem_t *cl_list; - RF_ShutdownList_t **listp; -{ - RF_Sstf_t *cscanq; - - RF_CallocAndAdd(cscanq, 1, sizeof(RF_Sstf_t), (RF_Sstf_t *), cl_list); - cscanq->dir = DIR_RIGHT; - return ((void *) cscanq); -} - -void -rf_SstfEnqueue(qptr, req, priority) - void *qptr; - RF_DiskQueueData_t *req; - int priority; -{ - RF_Sstf_t *sstfq; - - sstfq = (RF_Sstf_t *) qptr; - - if (priority == RF_IO_LOW_PRIORITY) { - if (rf_sstfDebug || rf_scanDebug || rf_cscanDebug) { - RF_DiskQueue_t *dq; - dq = (RF_DiskQueue_t *) req->queue; - printf("raid%d: ENQ lopri %d,%d queues are %d,%d,%d\n", - req->raidPtr->raidid, - dq->row, dq->col, - sstfq->left.qlen, sstfq->right.qlen, - sstfq->lopri.qlen); - } - do_sstf_ord_q(&sstfq->lopri.queue, &sstfq->lopri.qtail, req); - sstfq->lopri.qlen++; - } else { - if (req->sectorOffset < sstfq->last_sector) { - do_sstf_ord_q(&sstfq->left.queue, &sstfq->left.qtail, req); - sstfq->left.qlen++; - } else { - do_sstf_ord_q(&sstfq->right.queue, &sstfq->right.qtail, req); - sstfq->right.qlen++; - } - } -} - -static void -do_dequeue(queue, req) - RF_SstfQ_t *queue; - RF_DiskQueueData_t *req; -{ - RF_DiskQueueData_t *req2; - - if (rf_sstfDebug || rf_scanDebug || rf_cscanDebug) { - printf("raid%d: do_dequeue\n", req->raidPtr->raidid); - } - if (req == queue->queue) { - DO_HEAD_DEQ(req2, queue); - RF_ASSERT(req2 == req); - } else - if (req == queue->qtail) { - DO_TAIL_DEQ(req2, queue); - RF_ASSERT(req2 == req); - } else { - /* dequeue from middle of list */ - RF_ASSERT(req->next); - RF_ASSERT(req->prev); - queue->qlen--; - req->next->prev = req->prev; - req->prev->next = req->next; - req->next = req->prev = NULL; - } -} - -RF_DiskQueueData_t * -rf_SstfDequeue(qptr) - void *qptr; -{ - RF_DiskQueueData_t *req = NULL; - RF_Sstf_t *sstfq; - - sstfq = (RF_Sstf_t *) qptr; - - if (rf_sstfDebug) { - RF_DiskQueue_t *dq; - dq = (RF_DiskQueue_t *) req->queue; - RF_ASSERT(QSUM(sstfq) == dq->queueLength); - printf("raid%d: sstf: Dequeue %d,%d queues are %d,%d,%d\n", - req->raidPtr->raidid, dq->row, dq->col, - sstfq->left.qlen, sstfq->right.qlen, sstfq->lopri.qlen); - } - if (sstfq->left.queue == NULL) { - RF_ASSERT(sstfq->left.qlen == 0); - if (sstfq->right.queue == NULL) { - RF_ASSERT(sstfq->right.qlen == 0); - if (sstfq->lopri.queue == NULL) { - RF_ASSERT(sstfq->lopri.qlen == 0); - return (NULL); - } - if (rf_sstfDebug) { - printf("raid%d: sstf: check for close lopri", - req->raidPtr->raidid); - } - req = closest_to_arm(&sstfq->lopri, sstfq->last_sector, - &sstfq->dir, sstfq->allow_reverse); - if (rf_sstfDebug) { - printf("raid%d: sstf: closest_to_arm said %lx", - req->raidPtr->raidid, (long) req); - } - if (req == NULL) - return (NULL); - do_dequeue(&sstfq->lopri, req); - } else { - DO_BEST_DEQ(sstfq->last_sector, req, &sstfq->right); - } - } else { - if (sstfq->right.queue == NULL) { - RF_ASSERT(sstfq->right.qlen == 0); - DO_BEST_DEQ(sstfq->last_sector, req, &sstfq->left); - } else { - if (SNUM_DIFF(sstfq->last_sector, sstfq->right.queue->sectorOffset) - < SNUM_DIFF(sstfq->last_sector, sstfq->left.qtail->sectorOffset)) { - DO_HEAD_DEQ(req, &sstfq->right); - } else { - DO_TAIL_DEQ(req, &sstfq->left); - } - } - } - RF_ASSERT(req); - sstfq->last_sector = req->sectorOffset; - return (req); -} - -RF_DiskQueueData_t * -rf_ScanDequeue(qptr) - void *qptr; -{ - RF_DiskQueueData_t *req = NULL; - RF_Sstf_t *scanq; - - scanq = (RF_Sstf_t *) qptr; - - if (rf_scanDebug) { - RF_DiskQueue_t *dq; - dq = (RF_DiskQueue_t *) req->queue; - RF_ASSERT(QSUM(scanq) == dq->queueLength); - printf("raid%d: scan: Dequeue %d,%d queues are %d,%d,%d\n", - req->raidPtr->raidid, dq->row, dq->col, - scanq->left.qlen, scanq->right.qlen, scanq->lopri.qlen); - } - if (scanq->left.queue == NULL) { - RF_ASSERT(scanq->left.qlen == 0); - if (scanq->right.queue == NULL) { - RF_ASSERT(scanq->right.qlen == 0); - if (scanq->lopri.queue == NULL) { - RF_ASSERT(scanq->lopri.qlen == 0); - return (NULL); - } - req = closest_to_arm(&scanq->lopri, scanq->last_sector, - &scanq->dir, scanq->allow_reverse); - if (req == NULL) - return (NULL); - do_dequeue(&scanq->lopri, req); - } else { - scanq->dir = DIR_RIGHT; - DO_HEAD_DEQ(req, &scanq->right); - } - } else - if (scanq->right.queue == NULL) { - RF_ASSERT(scanq->right.qlen == 0); - RF_ASSERT(scanq->left.queue); - scanq->dir = DIR_LEFT; - DO_TAIL_DEQ(req, &scanq->left); - } else { - RF_ASSERT(scanq->right.queue); - RF_ASSERT(scanq->left.queue); - if (scanq->dir == DIR_RIGHT) { - DO_HEAD_DEQ(req, &scanq->right); - } else { - DO_TAIL_DEQ(req, &scanq->left); - } - } - RF_ASSERT(req); - scanq->last_sector = req->sectorOffset; - return (req); -} - -RF_DiskQueueData_t * -rf_CscanDequeue(qptr) - void *qptr; -{ - RF_DiskQueueData_t *req = NULL; - RF_Sstf_t *cscanq; - - cscanq = (RF_Sstf_t *) qptr; - - RF_ASSERT(cscanq->dir == DIR_RIGHT); - if (rf_cscanDebug) { - RF_DiskQueue_t *dq; - dq = (RF_DiskQueue_t *) req->queue; - RF_ASSERT(QSUM(cscanq) == dq->queueLength); - printf("raid%d: scan: Dequeue %d,%d queues are %d,%d,%d\n", - req->raidPtr->raidid, dq->row, dq->col, - cscanq->left.qlen, cscanq->right.qlen, - cscanq->lopri.qlen); - } - if (cscanq->right.queue) { - DO_HEAD_DEQ(req, &cscanq->right); - } else { - RF_ASSERT(cscanq->right.qlen == 0); - if (cscanq->left.queue == NULL) { - RF_ASSERT(cscanq->left.qlen == 0); - if (cscanq->lopri.queue == NULL) { - RF_ASSERT(cscanq->lopri.qlen == 0); - return (NULL); - } - req = closest_to_arm(&cscanq->lopri, cscanq->last_sector, - &cscanq->dir, cscanq->allow_reverse); - if (req == NULL) - return (NULL); - do_dequeue(&cscanq->lopri, req); - } else { - /* - * There's I/Os to the left of the arm. Swing - * on back (swap queues). - */ - cscanq->right = cscanq->left; - cscanq->left.qlen = 0; - cscanq->left.queue = cscanq->left.qtail = NULL; - DO_HEAD_DEQ(req, &cscanq->right); - } - } - RF_ASSERT(req); - cscanq->last_sector = req->sectorOffset; - return (req); -} - -RF_DiskQueueData_t * -rf_SstfPeek(qptr) - void *qptr; -{ - RF_DiskQueueData_t *req; - RF_Sstf_t *sstfq; - - sstfq = (RF_Sstf_t *) qptr; - - if ((sstfq->left.queue == NULL) && (sstfq->right.queue == NULL)) { - req = closest_to_arm(&sstfq->lopri, sstfq->last_sector, &sstfq->dir, - sstfq->allow_reverse); - } else { - if (sstfq->left.queue == NULL) - req = sstfq->right.queue; - else { - if (sstfq->right.queue == NULL) - req = sstfq->left.queue; - else { - if (SNUM_DIFF(sstfq->last_sector, sstfq->right.queue->sectorOffset) - < SNUM_DIFF(sstfq->last_sector, sstfq->left.qtail->sectorOffset)) { - req = sstfq->right.queue; - } else { - req = sstfq->left.qtail; - } - } - } - } - if (req == NULL) { - RF_ASSERT(QSUM(sstfq) == 0); - } - return (req); -} - -RF_DiskQueueData_t * -rf_ScanPeek(qptr) - void *qptr; -{ - RF_DiskQueueData_t *req; - RF_Sstf_t *scanq; - int dir; - - scanq = (RF_Sstf_t *) qptr; - dir = scanq->dir; - - if (scanq->left.queue == NULL) { - RF_ASSERT(scanq->left.qlen == 0); - if (scanq->right.queue == NULL) { - RF_ASSERT(scanq->right.qlen == 0); - if (scanq->lopri.queue == NULL) { - RF_ASSERT(scanq->lopri.qlen == 0); - return (NULL); - } - req = closest_to_arm(&scanq->lopri, scanq->last_sector, - &dir, scanq->allow_reverse); - } else { - req = scanq->right.queue; - } - } else - if (scanq->right.queue == NULL) { - RF_ASSERT(scanq->right.qlen == 0); - RF_ASSERT(scanq->left.queue); - req = scanq->left.qtail; - } else { - RF_ASSERT(scanq->right.queue); - RF_ASSERT(scanq->left.queue); - if (scanq->dir == DIR_RIGHT) { - req = scanq->right.queue; - } else { - req = scanq->left.qtail; - } - } - if (req == NULL) { - RF_ASSERT(QSUM(scanq) == 0); - } - return (req); -} - -RF_DiskQueueData_t * -rf_CscanPeek(qptr) - void *qptr; -{ - RF_DiskQueueData_t *req; - RF_Sstf_t *cscanq; - - cscanq = (RF_Sstf_t *) qptr; - - RF_ASSERT(cscanq->dir == DIR_RIGHT); - if (cscanq->right.queue) { - req = cscanq->right.queue; - } else { - RF_ASSERT(cscanq->right.qlen == 0); - if (cscanq->left.queue == NULL) { - RF_ASSERT(cscanq->left.qlen == 0); - if (cscanq->lopri.queue == NULL) { - RF_ASSERT(cscanq->lopri.qlen == 0); - return (NULL); - } - req = closest_to_arm(&cscanq->lopri, cscanq->last_sector, - &cscanq->dir, cscanq->allow_reverse); - } else { - /* - * There's I/Os to the left of the arm. We'll end - * up swinging on back. - */ - req = cscanq->left.queue; - } - } - if (req == NULL) { - RF_ASSERT(QSUM(cscanq) == 0); - } - return (req); -} - -int -rf_SstfPromote(qptr, parityStripeID, which_ru) - void *qptr; - RF_StripeNum_t parityStripeID; - RF_ReconUnitNum_t which_ru; -{ - RF_DiskQueueData_t *r, *next; - RF_Sstf_t *sstfq; - int n; - - sstfq = (RF_Sstf_t *) qptr; - - n = 0; - for (r = sstfq->lopri.queue; r; r = next) { - next = r->next; - if (rf_sstfDebug || rf_scanDebug || rf_cscanDebug) { - printf("raid%d: check promote %lx\n", - r->raidPtr->raidid, (long) r); - } - if ((r->parityStripeID == parityStripeID) - && (r->which_ru == which_ru)) { - do_dequeue(&sstfq->lopri, r); - rf_SstfEnqueue(qptr, r, RF_IO_NORMAL_PRIORITY); - n++; - } - } - if (rf_sstfDebug || rf_scanDebug || rf_cscanDebug) { - printf("raid%d: promoted %d matching I/Os queues are %d,%d,%d\n", - r->raidPtr->raidid, n, sstfq->left.qlen, - sstfq->right.qlen, sstfq->lopri.qlen); - } - return (n); -} diff --git a/sys/dev/raidframe/rf_sstf.h b/sys/dev/raidframe/rf_sstf.h deleted file mode 100644 index 2fc1c0d..0000000 --- a/sys/dev/raidframe/rf_sstf.h +++ /dev/null @@ -1,70 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_sstf.h,v 1.3 1999/02/05 00:06:17 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_SSTF_H_ -#define _RF__RF_SSTF_H_ - -#include <dev/raidframe/rf_diskqueue.h> - -typedef struct RF_SstfQ_s { - RF_DiskQueueData_t *queue; - RF_DiskQueueData_t *qtail; - int qlen; -} RF_SstfQ_t; - -typedef struct RF_Sstf_s { - RF_SstfQ_t left; - RF_SstfQ_t right; - RF_SstfQ_t lopri; - RF_SectorNum_t last_sector; - int dir; - int allow_reverse; -} RF_Sstf_t; - -void * -rf_SstfCreate(RF_SectorCount_t sect_per_disk, - RF_AllocListElem_t * cl_list, RF_ShutdownList_t ** listp); -void * -rf_ScanCreate(RF_SectorCount_t sect_per_disk, - RF_AllocListElem_t * cl_list, RF_ShutdownList_t ** listp); -void * -rf_CscanCreate(RF_SectorCount_t sect_per_disk, - RF_AllocListElem_t * cl_list, RF_ShutdownList_t ** listp); -void rf_SstfEnqueue(void *qptr, RF_DiskQueueData_t * req, int priority); -RF_DiskQueueData_t *rf_SstfDequeue(void *qptr); -RF_DiskQueueData_t *rf_SstfPeek(void *qptr); -int -rf_SstfPromote(void *qptr, RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru); -RF_DiskQueueData_t *rf_ScanDequeue(void *qptr); -RF_DiskQueueData_t *rf_ScanPeek(void *qptr); -RF_DiskQueueData_t *rf_CscanDequeue(void *qptr); -RF_DiskQueueData_t *rf_CscanPeek(void *qptr); - -#endif /* !_RF__RF_SSTF_H_ */ diff --git a/sys/dev/raidframe/rf_states.c b/sys/dev/raidframe/rf_states.c deleted file mode 100644 index bc686ec..0000000 --- a/sys/dev/raidframe/rf_states.c +++ /dev/null @@ -1,669 +0,0 @@ -/* $NetBSD: rf_states.c,v 1.15 2000/10/20 02:24:45 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, William V. Courtright II, Robby Findler - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#include <sys/errno.h> - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_desc.h> -#include <dev/raidframe/rf_aselect.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_states.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_driver.h> -#include <dev/raidframe/rf_engine.h> -#include <dev/raidframe/rf_map.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_kintf.h> - -/* prototypes for some of the available states. - - States must: - - - not block. - - - either schedule rf_ContinueRaidAccess as a callback and return - RF_TRUE, or complete all of their work and return RF_FALSE. - - - increment desc->state when they have finished their work. -*/ - -static char * -StateName(RF_AccessState_t state) -{ - switch (state) { - case rf_QuiesceState:return "QuiesceState"; - case rf_MapState: - return "MapState"; - case rf_LockState: - return "LockState"; - case rf_CreateDAGState: - return "CreateDAGState"; - case rf_ExecuteDAGState: - return "ExecuteDAGState"; - case rf_ProcessDAGState: - return "ProcessDAGState"; - case rf_CleanupState: - return "CleanupState"; - case rf_LastState: - return "LastState"; - case rf_IncrAccessesCountState: - return "IncrAccessesCountState"; - case rf_DecrAccessesCountState: - return "DecrAccessesCountState"; - default: - return "!!! UnnamedState !!!"; - } -} - -void -rf_ContinueRaidAccess(RF_RaidAccessDesc_t * desc) -{ - int suspended = RF_FALSE; - int current_state_index = desc->state; - RF_AccessState_t current_state = desc->states[current_state_index]; - int unit = desc->raidPtr->raidid; - - do { - - current_state_index = desc->state; - current_state = desc->states[current_state_index]; - - switch (current_state) { - - case rf_QuiesceState: - suspended = rf_State_Quiesce(desc); - break; - case rf_IncrAccessesCountState: - suspended = rf_State_IncrAccessCount(desc); - break; - case rf_MapState: - suspended = rf_State_Map(desc); - break; - case rf_LockState: - suspended = rf_State_Lock(desc); - break; - case rf_CreateDAGState: - suspended = rf_State_CreateDAG(desc); - break; - case rf_ExecuteDAGState: - suspended = rf_State_ExecuteDAG(desc); - break; - case rf_ProcessDAGState: - suspended = rf_State_ProcessDAG(desc); - break; - case rf_CleanupState: - suspended = rf_State_Cleanup(desc); - break; - case rf_DecrAccessesCountState: - suspended = rf_State_DecrAccessCount(desc); - break; - case rf_LastState: - suspended = rf_State_LastState(desc); - break; - } - - /* after this point, we cannot dereference desc since desc may - * have been freed. desc is only freed in LastState, so if we - * renter this function or loop back up, desc should be valid. */ - - if (rf_printStatesDebug) { - printf("raid%d: State: %-24s StateIndex: %3i desc: 0x%ld %s\n", - unit, StateName(current_state), - current_state_index, (long) desc, - suspended ? "callback scheduled" : "looping"); - } - } while (!suspended && current_state != rf_LastState); - - return; -} - - -void -rf_ContinueDagAccess(RF_DagList_t * dagList) -{ - RF_AccTraceEntry_t *tracerec = &(dagList->desc->tracerec); - RF_RaidAccessDesc_t *desc; - RF_DagHeader_t *dag_h; - RF_Etimer_t timer; - int i; - - desc = dagList->desc; - - timer = tracerec->timer; - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->specific.user.exec_us = RF_ETIMER_VAL_US(timer); - RF_ETIMER_START(tracerec->timer); - - /* skip to dag which just finished */ - dag_h = dagList->dags; - for (i = 0; i < dagList->numDagsDone; i++) { - dag_h = dag_h->next; - } - - /* check to see if retry is required */ - if (dag_h->status == rf_rollBackward) { - /* when a dag fails, mark desc status as bad and allow all - * other dags in the desc to execute to completion. then, - * free all dags and start over */ - desc->status = 1; /* bad status */ - { - printf("raid%d: DAG failure: %c addr 0x%lx (%ld) nblk 0x%x (%d) buf 0x%lx\n", - desc->raidPtr->raidid, desc->type, - (long) desc->raidAddress, - (long) desc->raidAddress, (int) desc->numBlocks, - (int) desc->numBlocks, - (unsigned long) (desc->bufPtr)); - } - } - dagList->numDagsDone++; - rf_ContinueRaidAccess(desc); -} - -int -rf_State_LastState(RF_RaidAccessDesc_t * desc) -{ - void (*callbackFunc) (RF_CBParam_t) = desc->callbackFunc; - RF_CBParam_t callbackArg; - - callbackArg.p = desc->callbackArg; - - /* - * If this is not an async request, wake up the caller - */ - if (desc->async_flag == 0) - wakeup(desc->bp); - - /* - * That's all the IO for this one... unbusy the 'disk'. - */ - - rf_disk_unbusy(desc); - - /* - * Wakeup any requests waiting to go. - */ - - RF_LOCK_MUTEX(((RF_Raid_t *) desc->raidPtr)->mutex); - ((RF_Raid_t *) desc->raidPtr)->openings++; - RF_UNLOCK_MUTEX(((RF_Raid_t *) desc->raidPtr)->mutex); - - /* wake up any pending IO */ - raidstart(((RF_Raid_t *) desc->raidPtr)); - - /* printf("Calling biodone on 0x%x\n",desc->bp); */ - biodone(desc->bp); /* access came through ioctl */ - - if (callbackFunc) - callbackFunc(callbackArg); - rf_FreeRaidAccDesc(desc); - - return RF_FALSE; -} - -int -rf_State_IncrAccessCount(RF_RaidAccessDesc_t * desc) -{ - RF_Raid_t *raidPtr; - - raidPtr = desc->raidPtr; - /* Bummer. We have to do this to be 100% safe w.r.t. the increment - * below */ - RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); - raidPtr->accs_in_flight++; /* used to detect quiescence */ - RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); - - desc->state++; - return RF_FALSE; -} - -int -rf_State_DecrAccessCount(RF_RaidAccessDesc_t * desc) -{ - RF_Raid_t *raidPtr; - - raidPtr = desc->raidPtr; - - RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); - raidPtr->accs_in_flight--; - if (raidPtr->accesses_suspended && raidPtr->accs_in_flight == 0) { - rf_SignalQuiescenceLock(raidPtr, raidPtr->reconDesc); - } - rf_UpdateUserStats(raidPtr, RF_ETIMER_VAL_US(desc->timer), desc->numBlocks); - RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); - - desc->state++; - return RF_FALSE; -} - -int -rf_State_Quiesce(RF_RaidAccessDesc_t * desc) -{ - RF_AccTraceEntry_t *tracerec = &desc->tracerec; - RF_Etimer_t timer; - int suspended = RF_FALSE; - RF_Raid_t *raidPtr; - - raidPtr = desc->raidPtr; - - RF_ETIMER_START(timer); - RF_ETIMER_START(desc->timer); - - RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); - if (raidPtr->accesses_suspended) { - RF_CallbackDesc_t *cb; - cb = rf_AllocCallbackDesc(); - /* XXX the following cast is quite bogus... - * rf_ContinueRaidAccess takes a (RF_RaidAccessDesc_t *) as an - * argument.. GO */ - cb->callbackFunc = (void (*) (RF_CBParam_t)) rf_ContinueRaidAccess; - cb->callbackArg.p = (void *) desc; - cb->next = raidPtr->quiesce_wait_list; - raidPtr->quiesce_wait_list = cb; - suspended = RF_TRUE; - } - RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->specific.user.suspend_ovhd_us += RF_ETIMER_VAL_US(timer); - - if (suspended && rf_quiesceDebug) - printf("Stalling access due to quiescence lock\n"); - - desc->state++; - return suspended; -} - -int -rf_State_Map(RF_RaidAccessDesc_t * desc) -{ - RF_Raid_t *raidPtr = desc->raidPtr; - RF_AccTraceEntry_t *tracerec = &desc->tracerec; - RF_Etimer_t timer; - - RF_ETIMER_START(timer); - - if (!(desc->asmap = rf_MapAccess(raidPtr, desc->raidAddress, desc->numBlocks, - desc->bufPtr, RF_DONT_REMAP))) - RF_PANIC(); - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->specific.user.map_us = RF_ETIMER_VAL_US(timer); - - desc->state++; - return RF_FALSE; -} - -int -rf_State_Lock(RF_RaidAccessDesc_t * desc) -{ - RF_AccTraceEntry_t *tracerec = &desc->tracerec; - RF_Raid_t *raidPtr = desc->raidPtr; - RF_AccessStripeMapHeader_t *asmh = desc->asmap; - RF_AccessStripeMap_t *asm_p; - RF_Etimer_t timer; - int suspended = RF_FALSE; - - RF_ETIMER_START(timer); - if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) { - RF_StripeNum_t lastStripeID = -1; - - /* acquire each lock that we don't already hold */ - for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) { - RF_ASSERT(RF_IO_IS_R_OR_W(desc->type)); - if (!rf_suppressLocksAndLargeWrites && - asm_p->parityInfo && - !(desc->flags & RF_DAG_SUPPRESS_LOCKS) && - !(asm_p->flags & RF_ASM_FLAGS_LOCK_TRIED)) { - asm_p->flags |= RF_ASM_FLAGS_LOCK_TRIED; - RF_ASSERT(asm_p->stripeID > lastStripeID); - - /* locks must be acquired hierarchically */ - - lastStripeID = asm_p->stripeID; - /* XXX the cast to (void (*)(RF_CBParam_t)) - * below is bogus! GO */ - RF_INIT_LOCK_REQ_DESC(asm_p->lockReqDesc, - desc->type, - (void (*) (RF_Buf_t)) rf_ContinueRaidAccess, - desc, asm_p, - raidPtr->Layout.dataSectorsPerStripe); - if (rf_AcquireStripeLock(raidPtr->lockTable, - asm_p->stripeID, &asm_p->lockReqDesc)) { - suspended = RF_TRUE; - break; - } - } - if (desc->type == RF_IO_TYPE_WRITE && - raidPtr->status[asm_p->physInfo->row] == rf_rs_reconstructing) { - if (!(asm_p->flags & RF_ASM_FLAGS_FORCE_TRIED)) { - int val; - - asm_p->flags |= RF_ASM_FLAGS_FORCE_TRIED; - /* XXX the cast below is quite - * bogus!!! XXX GO */ - val = rf_ForceOrBlockRecon(raidPtr, asm_p, - (void (*) (RF_Raid_t *, void *)) rf_ContinueRaidAccess, desc); - if (val == 0) { - asm_p->flags |= RF_ASM_FLAGS_RECON_BLOCKED; - } else { - suspended = RF_TRUE; - break; - } - } else { - if (rf_pssDebug) { - printf("raid%d: skipping force/block because already done, psid %ld\n", - desc->raidPtr->raidid, - (long) asm_p->stripeID); - } - } - } else { - if (rf_pssDebug) { - printf("raid%d: skipping force/block because not write or not under recon, psid %ld\n", - desc->raidPtr->raidid, - (long) asm_p->stripeID); - } - } - } - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer); - - if (suspended) - return (RF_TRUE); - } - desc->state++; - return (RF_FALSE); -} -/* - * the following three states create, execute, and post-process dags - * the error recovery unit is a single dag. - * by default, SelectAlgorithm creates an array of dags, one per parity stripe - * in some tricky cases, multiple dags per stripe are created - * - dags within a parity stripe are executed sequentially (arbitrary order) - * - dags for distinct parity stripes are executed concurrently - * - * repeat until all dags complete successfully -or- dag selection fails - * - * while !done - * create dag(s) (SelectAlgorithm) - * if dag - * execute dag (DispatchDAG) - * if dag successful - * done (SUCCESS) - * else - * !done (RETRY - start over with new dags) - * else - * done (FAIL) - */ -int -rf_State_CreateDAG(RF_RaidAccessDesc_t * desc) -{ - RF_AccTraceEntry_t *tracerec = &desc->tracerec; - RF_Etimer_t timer; - RF_DagHeader_t *dag_h; - int i, selectStatus; - - /* generate a dag for the access, and fire it off. When the dag - * completes, we'll get re-invoked in the next state. */ - RF_ETIMER_START(timer); - /* SelectAlgorithm returns one or more dags */ - selectStatus = rf_SelectAlgorithm(desc, desc->flags | RF_DAG_SUPPRESS_LOCKS); - if (rf_printDAGsDebug) - for (i = 0; i < desc->numStripes; i++) - rf_PrintDAGList(desc->dagArray[i].dags); - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - /* update time to create all dags */ - tracerec->specific.user.dag_create_us = RF_ETIMER_VAL_US(timer); - - desc->status = 0; /* good status */ - - if (selectStatus) { - /* failed to create a dag */ - /* this happens when there are too many faults or incomplete - * dag libraries */ - printf("[Failed to create a DAG]\n"); - RF_PANIC(); - } else { - /* bind dags to desc */ - for (i = 0; i < desc->numStripes; i++) { - dag_h = desc->dagArray[i].dags; - while (dag_h) { - dag_h->bp = (RF_Buf_t) desc->bp; - dag_h->tracerec = tracerec; - dag_h = dag_h->next; - } - } - desc->flags |= RF_DAG_DISPATCH_RETURNED; - desc->state++; /* next state should be rf_State_ExecuteDAG */ - } - return RF_FALSE; -} - - - -/* the access has an array of dagLists, one dagList per parity stripe. - * fire the first dag in each parity stripe (dagList). - * dags within a stripe (dagList) must be executed sequentially - * - this preserves atomic parity update - * dags for independents parity groups (stripes) are fired concurrently */ - -int -rf_State_ExecuteDAG(RF_RaidAccessDesc_t * desc) -{ - int i; - RF_DagHeader_t *dag_h; - RF_DagList_t *dagArray = desc->dagArray; - - /* next state is always rf_State_ProcessDAG important to do this - * before firing the first dag (it may finish before we leave this - * routine) */ - desc->state++; - - /* sweep dag array, a stripe at a time, firing the first dag in each - * stripe */ - for (i = 0; i < desc->numStripes; i++) { - RF_ASSERT(dagArray[i].numDags > 0); - RF_ASSERT(dagArray[i].numDagsDone == 0); - RF_ASSERT(dagArray[i].numDagsFired == 0); - RF_ETIMER_START(dagArray[i].tracerec.timer); - /* fire first dag in this stripe */ - dag_h = dagArray[i].dags; - RF_ASSERT(dag_h); - dagArray[i].numDagsFired++; - /* XXX Yet another case where we pass in a conflicting - * function pointer :-( XXX GO */ - rf_DispatchDAG(dag_h, (void (*) (void *)) rf_ContinueDagAccess, &dagArray[i]); - } - - /* the DAG will always call the callback, even if there was no - * blocking, so we are always suspended in this state */ - return RF_TRUE; -} - - - -/* rf_State_ProcessDAG is entered when a dag completes. - * first, check to all dags in the access have completed - * if not, fire as many dags as possible */ - -int -rf_State_ProcessDAG(RF_RaidAccessDesc_t * desc) -{ - RF_AccessStripeMapHeader_t *asmh = desc->asmap; - RF_Raid_t *raidPtr = desc->raidPtr; - RF_DagHeader_t *dag_h; - int i, j, done = RF_TRUE; - RF_DagList_t *dagArray = desc->dagArray; - RF_Etimer_t timer; - - /* check to see if this is the last dag */ - for (i = 0; i < desc->numStripes; i++) - if (dagArray[i].numDags != dagArray[i].numDagsDone) - done = RF_FALSE; - - if (done) { - if (desc->status) { - /* a dag failed, retry */ - RF_ETIMER_START(timer); - /* free all dags */ - for (i = 0; i < desc->numStripes; i++) { - rf_FreeDAG(desc->dagArray[i].dags); - } - rf_MarkFailuresInASMList(raidPtr, asmh); - /* back up to rf_State_CreateDAG */ - desc->state = desc->state - 2; - return RF_FALSE; - } else { - /* move on to rf_State_Cleanup */ - desc->state++; - } - return RF_FALSE; - } else { - /* more dags to execute */ - /* see if any are ready to be fired. if so, fire them */ - /* don't fire the initial dag in a list, it's fired in - * rf_State_ExecuteDAG */ - for (i = 0; i < desc->numStripes; i++) { - if ((dagArray[i].numDagsDone < dagArray[i].numDags) - && (dagArray[i].numDagsDone == dagArray[i].numDagsFired) - && (dagArray[i].numDagsFired > 0)) { - RF_ETIMER_START(dagArray[i].tracerec.timer); - /* fire next dag in this stripe */ - /* first, skip to next dag awaiting execution */ - dag_h = dagArray[i].dags; - for (j = 0; j < dagArray[i].numDagsDone; j++) - dag_h = dag_h->next; - dagArray[i].numDagsFired++; - /* XXX and again we pass a different function - * pointer.. GO */ - rf_DispatchDAG(dag_h, (void (*) (void *)) rf_ContinueDagAccess, - &dagArray[i]); - } - } - return RF_TRUE; - } -} -/* only make it this far if all dags complete successfully */ -int -rf_State_Cleanup(RF_RaidAccessDesc_t * desc) -{ - RF_AccTraceEntry_t *tracerec = &desc->tracerec; - RF_AccessStripeMapHeader_t *asmh = desc->asmap; - RF_Raid_t *raidPtr = desc->raidPtr; - RF_AccessStripeMap_t *asm_p; - RF_DagHeader_t *dag_h; - RF_Etimer_t timer; - int i; - - desc->state++; - - timer = tracerec->timer; - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->specific.user.dag_retry_us = RF_ETIMER_VAL_US(timer); - - /* the RAID I/O is complete. Clean up. */ - tracerec->specific.user.dag_retry_us = 0; - - RF_ETIMER_START(timer); - if (desc->flags & RF_DAG_RETURN_DAG) { - /* copy dags into paramDAG */ - *(desc->paramDAG) = desc->dagArray[0].dags; - dag_h = *(desc->paramDAG); - for (i = 1; i < desc->numStripes; i++) { - /* concatenate dags from remaining stripes */ - RF_ASSERT(dag_h); - while (dag_h->next) - dag_h = dag_h->next; - dag_h->next = desc->dagArray[i].dags; - } - } else { - /* free all dags */ - for (i = 0; i < desc->numStripes; i++) { - rf_FreeDAG(desc->dagArray[i].dags); - } - } - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->specific.user.cleanup_us = RF_ETIMER_VAL_US(timer); - - RF_ETIMER_START(timer); - if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) { - for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) { - if (!rf_suppressLocksAndLargeWrites && - asm_p->parityInfo && - !(desc->flags & RF_DAG_SUPPRESS_LOCKS)) { - RF_ASSERT_VALID_LOCKREQ(&asm_p->lockReqDesc); - rf_ReleaseStripeLock(raidPtr->lockTable, - asm_p->stripeID, - &asm_p->lockReqDesc); - } - if (asm_p->flags & RF_ASM_FLAGS_RECON_BLOCKED) { - rf_UnblockRecon(raidPtr, asm_p); - } - } - } - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer); - - RF_ETIMER_START(timer); - if (desc->flags & RF_DAG_RETURN_ASM) - *(desc->paramASM) = asmh; - else - rf_FreeAccessStripeMap(asmh); - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->specific.user.cleanup_us += RF_ETIMER_VAL_US(timer); - - RF_ETIMER_STOP(desc->timer); - RF_ETIMER_EVAL(desc->timer); - - timer = desc->tracerec.tot_timer; - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - desc->tracerec.total_us = RF_ETIMER_VAL_US(timer); - - rf_LogTraceRec(raidPtr, tracerec); - - desc->flags |= RF_DAG_ACCESS_COMPLETE; - - return RF_FALSE; -} diff --git a/sys/dev/raidframe/rf_states.h b/sys/dev/raidframe/rf_states.h deleted file mode 100644 index 6c0aee4..0000000 --- a/sys/dev/raidframe/rf_states.h +++ /dev/null @@ -1,48 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_states.h,v 1.3 1999/02/05 00:06:17 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, William V. Courtright II, Robby Findler - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_STATES_H_ -#define _RF__RF_STATES_H_ - -#include <dev/raidframe/rf_types.h> - -void rf_ContinueRaidAccess(RF_RaidAccessDesc_t * desc); -void rf_ContinueDagAccess(RF_DagList_t * dagList); -int rf_State_LastState(RF_RaidAccessDesc_t * desc); -int rf_State_IncrAccessCount(RF_RaidAccessDesc_t * desc); -int rf_State_DecrAccessCount(RF_RaidAccessDesc_t * desc); -int rf_State_Quiesce(RF_RaidAccessDesc_t * desc); -int rf_State_Map(RF_RaidAccessDesc_t * desc); -int rf_State_Lock(RF_RaidAccessDesc_t * desc); -int rf_State_CreateDAG(RF_RaidAccessDesc_t * desc); -int rf_State_ExecuteDAG(RF_RaidAccessDesc_t * desc); -int rf_State_ProcessDAG(RF_RaidAccessDesc_t * desc); -int rf_State_Cleanup(RF_RaidAccessDesc_t * desc); - -#endif /* !_RF__RF_STATES_H_ */ diff --git a/sys/dev/raidframe/rf_stripelocks.c b/sys/dev/raidframe/rf_stripelocks.c deleted file mode 100644 index 409c0f4..0000000 --- a/sys/dev/raidframe/rf_stripelocks.c +++ /dev/null @@ -1,669 +0,0 @@ -/* $NetBSD: rf_stripelocks.c,v 1.6 2000/12/04 11:35:46 fvdl Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Authors: Mark Holland, Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * stripelocks.c -- code to lock stripes for read and write access - * - * The code distinguishes between read locks and write locks. There can be - * as many readers to given stripe as desired. When a write request comes - * in, no further readers are allowed to enter, and all subsequent requests - * are queued in FIFO order. When a the number of readers goes to zero, the - * writer is given the lock. When a writer releases the lock, the list of - * queued requests is scanned, and all readersq up to the next writer are - * given the lock. - * - * The lock table size must be one less than a power of two, but HASH_STRIPEID - * is the only function that requires this. - * - * The code now supports "range locks". When you ask to lock a stripe, you - * specify a range of addresses in that stripe that you want to lock. When - * you acquire the lock, you've locked only this range of addresses, and - * other threads can concurrently read/write any non-overlapping portions - * of the stripe. The "addresses" that you lock are abstract in that you - * can pass in anything you like. The expectation is that you'll pass in - * the range of physical disk offsets of the parity bits you're planning - * to update. The idea behind this, of course, is to allow sub-stripe - * locking. The implementation is perhaps not the best imaginable; in the - * worst case a lock release is O(n^2) in the total number of outstanding - * requests to a given stripe. Note that if you're striping with a - * stripe unit size equal to an entire disk (i.e. not striping), there will - * be only one stripe and you may spend some significant number of cycles - * searching through stripe lock descriptors. - */ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_stripelocks.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_freelist.h> -#include <dev/raidframe/rf_debugprint.h> -#include <dev/raidframe/rf_driver.h> -#include <dev/raidframe/rf_shutdown.h> - -#define Dprintf1(s,a) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf2(s,a,b) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf3(s,a,b,c) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL) -#define Dprintf4(s,a,b,c,d) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),NULL,NULL,NULL,NULL) -#define Dprintf5(s,a,b,c,d,e) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),NULL,NULL,NULL) -#define Dprintf6(s,a,b,c,d,e,f) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),NULL,NULL) -#define Dprintf7(s,a,b,c,d,e,f,g) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),(void *)((unsigned long)g),NULL) -#define Dprintf8(s,a,b,c,d,e,f,g,h) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),(void *)((unsigned long)g),(void *)((unsigned long)h)) - -#define FLUSH - -#define HASH_STRIPEID(_sid_) ( (_sid_) & (rf_lockTableSize-1) ) - -static void AddToWaitersQueue(RF_LockTableEntry_t * lockTable, RF_StripeLockDesc_t * lockDesc, RF_LockReqDesc_t * lockReqDesc); -static RF_StripeLockDesc_t *AllocStripeLockDesc(RF_StripeNum_t stripeID); -static void FreeStripeLockDesc(RF_StripeLockDesc_t * p); -static void PrintLockedStripes(RF_LockTableEntry_t * lockTable); - -/* determines if two ranges overlap. always yields false if either start value is negative */ -#define SINGLE_RANGE_OVERLAP(_strt1, _stop1, _strt2, _stop2) \ - ( (_strt1 >= 0) && (_strt2 >= 0) && (RF_MAX(_strt1, _strt2) <= RF_MIN(_stop1, _stop2)) ) - -/* determines if any of the ranges specified in the two lock descriptors overlap each other */ -#define RANGE_OVERLAP(_cand, _pred) \ - ( SINGLE_RANGE_OVERLAP((_cand)->start, (_cand)->stop, (_pred)->start, (_pred)->stop ) || \ - SINGLE_RANGE_OVERLAP((_cand)->start2, (_cand)->stop2, (_pred)->start, (_pred)->stop ) || \ - SINGLE_RANGE_OVERLAP((_cand)->start, (_cand)->stop, (_pred)->start2, (_pred)->stop2) || \ - SINGLE_RANGE_OVERLAP((_cand)->start2, (_cand)->stop2, (_pred)->start2, (_pred)->stop2) ) - -/* Determines if a candidate lock request conflicts with a predecessor lock req. - * Note that the arguments are not interchangeable. - * The rules are: - * a candidate read conflicts with a predecessor write if any ranges overlap - * a candidate write conflicts with a predecessor read if any ranges overlap - * a candidate write conflicts with a predecessor write if any ranges overlap - */ -#define STRIPELOCK_CONFLICT(_cand, _pred) \ - RANGE_OVERLAP((_cand), (_pred)) && \ - ( ( (((_cand)->type == RF_IO_TYPE_READ) && ((_pred)->type == RF_IO_TYPE_WRITE)) || \ - (((_cand)->type == RF_IO_TYPE_WRITE) && ((_pred)->type == RF_IO_TYPE_READ)) || \ - (((_cand)->type == RF_IO_TYPE_WRITE) && ((_pred)->type == RF_IO_TYPE_WRITE)) \ - ) \ - ) - -static RF_FreeList_t *rf_stripelock_freelist; -#define RF_MAX_FREE_STRIPELOCK 128 -#define RF_STRIPELOCK_INC 8 -#define RF_STRIPELOCK_INITIAL 32 - -static void rf_ShutdownStripeLockFreeList(void *); -static void rf_RaidShutdownStripeLocks(void *); - -static void -rf_ShutdownStripeLockFreeList(ignored) - void *ignored; -{ - RF_FREELIST_DESTROY(rf_stripelock_freelist, next, (RF_StripeLockDesc_t *)); -} - -int -rf_ConfigureStripeLockFreeList(listp) - RF_ShutdownList_t **listp; -{ - unsigned mask; - int rc; - - RF_FREELIST_CREATE(rf_stripelock_freelist, RF_MAX_FREE_STRIPELOCK, - RF_STRIPELOCK_INITIAL, sizeof(RF_StripeLockDesc_t)); - rc = rf_ShutdownCreate(listp, rf_ShutdownStripeLockFreeList, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - rf_ShutdownStripeLockFreeList(NULL); - return (rc); - } - RF_FREELIST_PRIME(rf_stripelock_freelist, RF_STRIPELOCK_INITIAL, next, - (RF_StripeLockDesc_t *)); - for (mask = 0x1; mask; mask <<= 1) - if (rf_lockTableSize == mask) - break; - if (!mask) { - printf("[WARNING: lock table size must be a power of two. Setting to %d.]\n", RF_DEFAULT_LOCK_TABLE_SIZE); - rf_lockTableSize = RF_DEFAULT_LOCK_TABLE_SIZE; - } - return (0); -} - -RF_LockTableEntry_t * -rf_MakeLockTable() -{ - RF_LockTableEntry_t *lockTable; - int i, rc; - - RF_Calloc(lockTable, ((int) rf_lockTableSize), sizeof(RF_LockTableEntry_t), (RF_LockTableEntry_t *)); - if (lockTable == NULL) - return (NULL); - for (i = 0; i < rf_lockTableSize; i++) { - rc = rf_mutex_init(&lockTable[i].mutex, __FUNCTION__); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - /* XXX clean up other mutexes */ - return (NULL); - } - } - return (lockTable); -} - -void -rf_ShutdownStripeLocks(RF_LockTableEntry_t * lockTable) -{ - int i; - - if (rf_stripeLockDebug) { - PrintLockedStripes(lockTable); - } - for (i = 0; i < rf_lockTableSize; i++) { - rf_mutex_destroy(&lockTable[i].mutex); - } - RF_Free(lockTable, rf_lockTableSize * sizeof(RF_LockTableEntry_t)); -} - -static void -rf_RaidShutdownStripeLocks(arg) - void *arg; -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) arg; - rf_ShutdownStripeLocks(raidPtr->lockTable); -} - -int -rf_ConfigureStripeLocks( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - int rc; - - raidPtr->lockTable = rf_MakeLockTable(); - if (raidPtr->lockTable == NULL) - return (ENOMEM); - rc = rf_ShutdownCreate(listp, rf_RaidShutdownStripeLocks, raidPtr); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - rf_ShutdownStripeLocks(raidPtr->lockTable); - return (rc); - } - return (0); -} -/* returns 0 if you've got the lock, and non-zero if you have to wait. - * if and only if you have to wait, we'll cause cbFunc to get invoked - * with cbArg when you are granted the lock. We store a tag in *releaseTag - * that you need to give back to us when you release the lock. - */ -int -rf_AcquireStripeLock( - RF_LockTableEntry_t * lockTable, - RF_StripeNum_t stripeID, - RF_LockReqDesc_t * lockReqDesc) -{ - RF_StripeLockDesc_t *lockDesc; - RF_LockReqDesc_t *p; - int tid = 0, hashval = HASH_STRIPEID(stripeID); - int retcode = 0; - - RF_ASSERT(RF_IO_IS_R_OR_W(lockReqDesc->type)); - - if (rf_stripeLockDebug) { - if (stripeID == -1) - Dprintf1("[%d] Lock acquisition supressed (stripeID == -1)\n", tid); - else { - Dprintf8("[%d] Trying to acquire stripe lock table 0x%lx SID %ld type %c range %ld-%ld, range2 %ld-%ld hashval %d\n", - tid, (unsigned long) lockTable, stripeID, lockReqDesc->type, lockReqDesc->start, - lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2); - Dprintf3("[%d] lock %ld hashval %d\n", tid, stripeID, hashval); - FLUSH; - } - } - if (stripeID == -1) - return (0); - lockReqDesc->next = NULL; /* just to be sure */ - - RF_LOCK_MUTEX(lockTable[hashval].mutex); - for (lockDesc = lockTable[hashval].descList; lockDesc; lockDesc = lockDesc->next) { - if (lockDesc->stripeID == stripeID) - break; - } - - if (!lockDesc) { /* no entry in table => no one reading or - * writing */ - lockDesc = AllocStripeLockDesc(stripeID); - lockDesc->next = lockTable[hashval].descList; - lockTable[hashval].descList = lockDesc; - if (lockReqDesc->type == RF_IO_TYPE_WRITE) - lockDesc->nWriters++; - lockDesc->granted = lockReqDesc; - if (rf_stripeLockDebug) { - Dprintf7("[%d] no one waiting: lock %ld %c %ld-%ld %ld-%ld granted\n", - tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2); - FLUSH; - } - } else { - - if (lockReqDesc->type == RF_IO_TYPE_WRITE) - lockDesc->nWriters++; - - if (lockDesc->nWriters == 0) { /* no need to search any lists - * if there are no writers - * anywhere */ - lockReqDesc->next = lockDesc->granted; - lockDesc->granted = lockReqDesc; - if (rf_stripeLockDebug) { - Dprintf7("[%d] no writers: lock %ld %c %ld-%ld %ld-%ld granted\n", - tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2); - FLUSH; - } - } else { - - /* search the granted & waiting lists for a conflict. - * stop searching as soon as we find one */ - retcode = 0; - for (p = lockDesc->granted; p; p = p->next) - if (STRIPELOCK_CONFLICT(lockReqDesc, p)) { - retcode = 1; - break; - } - if (!retcode) - for (p = lockDesc->waitersH; p; p = p->next) - if (STRIPELOCK_CONFLICT(lockReqDesc, p)) { - retcode = 2; - break; - } - if (!retcode) { - lockReqDesc->next = lockDesc->granted; /* no conflicts found => - * grant lock */ - lockDesc->granted = lockReqDesc; - if (rf_stripeLockDebug) { - Dprintf7("[%d] no conflicts: lock %ld %c %ld-%ld %ld-%ld granted\n", - tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, - lockReqDesc->start2, lockReqDesc->stop2); - FLUSH; - } - } else { - if (rf_stripeLockDebug) { - Dprintf6("[%d] conflict: lock %ld %c %ld-%ld hashval=%d not granted\n", - tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, - hashval); - Dprintf3("[%d] lock %ld retcode=%d\n", tid, stripeID, retcode); - FLUSH; - } - AddToWaitersQueue(lockTable, lockDesc, lockReqDesc); /* conflict => the - * current access must - * wait */ - } - } - } - - RF_UNLOCK_MUTEX(lockTable[hashval].mutex); - return (retcode); -} - -void -rf_ReleaseStripeLock( - RF_LockTableEntry_t * lockTable, - RF_StripeNum_t stripeID, - RF_LockReqDesc_t * lockReqDesc) -{ - RF_StripeLockDesc_t *lockDesc, *ld_t; - RF_LockReqDesc_t *lr, *lr_t, *callbacklist, *t; - RF_IoType_t type = lockReqDesc->type; - int tid = 0, hashval = HASH_STRIPEID(stripeID); - int release_it, consider_it; - RF_LockReqDesc_t *candidate, *candidate_t, *predecessor; - - RF_ASSERT(RF_IO_IS_R_OR_W(type)); - - if (rf_stripeLockDebug) { - if (stripeID == -1) - Dprintf1("[%d] Lock release supressed (stripeID == -1)\n", tid); - else { - Dprintf8("[%d] Releasing stripe lock on stripe ID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", - tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2, lockTable); - FLUSH; - } - } - if (stripeID == -1) - return; - - RF_LOCK_MUTEX(lockTable[hashval].mutex); - - /* find the stripe lock descriptor */ - for (ld_t = NULL, lockDesc = lockTable[hashval].descList; lockDesc; ld_t = lockDesc, lockDesc = lockDesc->next) { - if (lockDesc->stripeID == stripeID) - break; - } - RF_ASSERT(lockDesc); /* major error to release a lock that doesn't - * exist */ - - /* find the stripe lock request descriptor & delete it from the list */ - for (lr_t = NULL, lr = lockDesc->granted; lr; lr_t = lr, lr = lr->next) - if (lr == lockReqDesc) - break; - - RF_ASSERT(lr && (lr == lockReqDesc)); /* major error to release a - * lock that hasn't been - * granted */ - if (lr_t) - lr_t->next = lr->next; - else { - RF_ASSERT(lr == lockDesc->granted); - lockDesc->granted = lr->next; - } - lr->next = NULL; - - if (lockReqDesc->type == RF_IO_TYPE_WRITE) - lockDesc->nWriters--; - - /* search through the waiters list to see if anyone needs to be woken - * up. for each such descriptor in the wait list, we check it against - * everything granted and against everything _in front_ of it in the - * waiters queue. If it conflicts with none of these, we release it. - * - * DON'T TOUCH THE TEMPLINK POINTER OF ANYTHING IN THE GRANTED LIST HERE. - * This will roach the case where the callback tries to acquire a new - * lock in the same stripe. There are some asserts to try and detect - * this. - * - * We apply 2 performance optimizations: (1) if releasing this lock - * results in no more writers to this stripe, we just release - * everybody waiting, since we place no restrictions on the number of - * concurrent reads. (2) we consider as candidates for wakeup only - * those waiters that have a range overlap with either the descriptor - * being woken up or with something in the callbacklist (i.e. - * something we've just now woken up). This allows us to avoid the - * long evaluation for some descriptors. */ - - callbacklist = NULL; - if (lockDesc->nWriters == 0) { /* performance tweak (1) */ - while (lockDesc->waitersH) { - - lr = lockDesc->waitersH; /* delete from waiters - * list */ - lockDesc->waitersH = lr->next; - - RF_ASSERT(lr->type == RF_IO_TYPE_READ); - - lr->next = lockDesc->granted; /* add to granted list */ - lockDesc->granted = lr; - - RF_ASSERT(!lr->templink); - lr->templink = callbacklist; /* put on callback list - * so that we'll invoke - * callback below */ - callbacklist = lr; - if (rf_stripeLockDebug) { - Dprintf8("[%d] No writers: granting lock stripe ID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", - tid, stripeID, lr->type, lr->start, lr->stop, lr->start2, lr->stop2, (unsigned long) lockTable); - FLUSH; - } - } - lockDesc->waitersT = NULL; /* we've purged the whole - * waiters list */ - - } else - for (candidate_t = NULL, candidate = lockDesc->waitersH; candidate;) { - - /* performance tweak (2) */ - consider_it = 0; - if (RANGE_OVERLAP(lockReqDesc, candidate)) - consider_it = 1; - else - for (t = callbacklist; t; t = t->templink) - if (RANGE_OVERLAP(t, candidate)) { - consider_it = 1; - break; - } - if (!consider_it) { - if (rf_stripeLockDebug) { - Dprintf8("[%d] No overlap: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", - tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, - (unsigned long) lockTable); - FLUSH; - } - candidate_t = candidate; - candidate = candidate->next; - continue; - } - /* we have a candidate for release. check to make - * sure it is not blocked by any granted locks */ - release_it = 1; - for (predecessor = lockDesc->granted; predecessor; predecessor = predecessor->next) { - if (STRIPELOCK_CONFLICT(candidate, predecessor)) { - if (rf_stripeLockDebug) { - Dprintf8("[%d] Conflicts with granted lock: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", - tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, - (unsigned long) lockTable); - FLUSH; - } - release_it = 0; - break; - } - } - - /* now check to see if the candidate is blocked by any - * waiters that occur before it it the wait queue */ - if (release_it) - for (predecessor = lockDesc->waitersH; predecessor != candidate; predecessor = predecessor->next) { - if (STRIPELOCK_CONFLICT(candidate, predecessor)) { - if (rf_stripeLockDebug) { - Dprintf8("[%d] Conflicts with waiting lock: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", - tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, - (unsigned long) lockTable); - FLUSH; - } - release_it = 0; - break; - } - } - - /* release it if indicated */ - if (release_it) { - if (rf_stripeLockDebug) { - Dprintf8("[%d] Granting lock to candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", - tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, - (unsigned long) lockTable); - FLUSH; - } - if (candidate_t) { - candidate_t->next = candidate->next; - if (lockDesc->waitersT == candidate) - lockDesc->waitersT = candidate_t; /* cannot be waitersH - * since candidate_t is - * not NULL */ - } else { - RF_ASSERT(candidate == lockDesc->waitersH); - lockDesc->waitersH = lockDesc->waitersH->next; - if (!lockDesc->waitersH) - lockDesc->waitersT = NULL; - } - candidate->next = lockDesc->granted; /* move it to the - * granted list */ - lockDesc->granted = candidate; - - RF_ASSERT(!candidate->templink); - candidate->templink = callbacklist; /* put it on the list of - * things to be called - * after we release the - * mutex */ - callbacklist = candidate; - - if (!candidate_t) - candidate = lockDesc->waitersH; - else - candidate = candidate_t->next; /* continue with the - * rest of the list */ - } else { - candidate_t = candidate; - candidate = candidate->next; /* continue with the - * rest of the list */ - } - } - - /* delete the descriptor if no one is waiting or active */ - if (!lockDesc->granted && !lockDesc->waitersH) { - RF_ASSERT(lockDesc->nWriters == 0); - if (rf_stripeLockDebug) { - Dprintf3("[%d] Last lock released (table 0x%lx): deleting desc for stripeID %ld\n", tid, (unsigned long) lockTable, stripeID); - FLUSH; - } - if (ld_t) - ld_t->next = lockDesc->next; - else { - RF_ASSERT(lockDesc == lockTable[hashval].descList); - lockTable[hashval].descList = lockDesc->next; - } - FreeStripeLockDesc(lockDesc); - lockDesc = NULL;/* only for the ASSERT below */ - } - RF_UNLOCK_MUTEX(lockTable[hashval].mutex); - - /* now that we've unlocked the mutex, invoke the callback on all the - * descriptors in the list */ - RF_ASSERT(!((callbacklist) && (!lockDesc))); /* if we deleted the - * descriptor, we should - * have no callbacks to - * do */ - for (candidate = callbacklist; candidate;) { - t = candidate; - candidate = candidate->templink; - t->templink = NULL; - (t->cbFunc) (t->cbArg); - } -} -/* must have the indicated lock table mutex upon entry */ -static void -AddToWaitersQueue( - RF_LockTableEntry_t * lockTable, - RF_StripeLockDesc_t * lockDesc, - RF_LockReqDesc_t * lockReqDesc) -{ -#if 0 /* XXX fvdl -- unitialized use of 'tid' */ - int tid; - - if (rf_stripeLockDebug) { - Dprintf3("[%d] Waiting on lock for stripe %ld table 0x%lx\n", tid, lockDesc->stripeID, (unsigned long) lockTable); - FLUSH; - } -#endif - if (!lockDesc->waitersH) { - lockDesc->waitersH = lockDesc->waitersT = lockReqDesc; - } else { - lockDesc->waitersT->next = lockReqDesc; - lockDesc->waitersT = lockReqDesc; - } -} - -static RF_StripeLockDesc_t * -AllocStripeLockDesc(RF_StripeNum_t stripeID) -{ - RF_StripeLockDesc_t *p; - - RF_FREELIST_GET(rf_stripelock_freelist, p, next, (RF_StripeLockDesc_t *)); - if (p) { - p->stripeID = stripeID; - } - return (p); -} - -static void -FreeStripeLockDesc(RF_StripeLockDesc_t * p) -{ - RF_FREELIST_FREE(rf_stripelock_freelist, p, next); -} - -static void -PrintLockedStripes(lockTable) - RF_LockTableEntry_t *lockTable; -{ - int i, j, foundone = 0, did; - RF_StripeLockDesc_t *p; - RF_LockReqDesc_t *q; - - RF_LOCK_MUTEX(rf_printf_mutex); - printf("Locked stripes:\n"); - for (i = 0; i < rf_lockTableSize; i++) - if (lockTable[i].descList) { - foundone = 1; - for (p = lockTable[i].descList; p; p = p->next) { - printf("Stripe ID 0x%lx (%d) nWriters %d\n", - (long) p->stripeID, (int) p->stripeID, p->nWriters); - - if (!(p->granted)) - printf("Granted: (none)\n"); - else - printf("Granted:\n"); - for (did = 1, j = 0, q = p->granted; q; j++, q = q->next) { - printf(" %c(%ld-%ld", q->type, (long) q->start, (long) q->stop); - if (q->start2 != -1) - printf(",%ld-%ld) ", (long) q->start2, - (long) q->stop2); - else - printf(") "); - if (j && !(j % 4)) { - printf("\n"); - did = 1; - } else - did = 0; - } - if (!did) - printf("\n"); - - if (!(p->waitersH)) - printf("Waiting: (none)\n"); - else - printf("Waiting:\n"); - for (did = 1, j = 0, q = p->waitersH; q; j++, q = q->next) { - printf("%c(%ld-%ld", q->type, (long) q->start, (long) q->stop); - if (q->start2 != -1) - printf(",%ld-%ld) ", (long) q->start2, (long) q->stop2); - else - printf(") "); - if (j && !(j % 4)) { - printf("\n "); - did = 1; - } else - did = 0; - } - if (!did) - printf("\n"); - } - } - if (!foundone) - printf("(none)\n"); - else - printf("\n"); - RF_UNLOCK_MUTEX(rf_printf_mutex); -} diff --git a/sys/dev/raidframe/rf_stripelocks.h b/sys/dev/raidframe/rf_stripelocks.h deleted file mode 100644 index ab960c1..0000000 --- a/sys/dev/raidframe/rf_stripelocks.h +++ /dev/null @@ -1,130 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_stripelocks.h,v 1.3 1999/02/05 00:06:18 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************** - * - * stripelocks.h -- header file for locking stripes - * - * Note that these functions are called from the execution routines of certain - * DAG Nodes, and so they must be NON-BLOCKING to assure maximum parallelism - * in the DAG. Accordingly, when a node wants to acquire a lock, it calls - * AcquireStripeLock, supplying a pointer to a callback function. If the lock - * is free at the time of the call, 0 is returned, indicating that the lock - * has been acquired. If the lock is not free, 1 is returned, and a copy of - * the function pointer and argument are held in the lock table. When the - * lock becomes free, the callback function is invoked. - * - *****************************************************************************/ - -#ifndef _RF__RF_STRIPELOCKS_H_ -#define _RF__RF_STRIPELOCKS_H_ - -#if defined(__FreeBSD__) -#include <sys/types.h> -#if __FreeBSD_version > 500005 -#include <sys/bio.h> -#endif -#if _KERNEL -#include <sys/systm.h> -#endif -#endif -#include <sys/buf.h> - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_general.h> - -struct RF_LockReqDesc_s { - RF_IoType_t type; /* read or write */ - RF_int64 start, stop; /* start and end of range to be locked */ - RF_int64 start2, stop2; /* start and end of 2nd range to be locked */ - void (*cbFunc) (RF_Buf_t); /* callback function */ - void *cbArg; /* argument to callback function */ - RF_LockReqDesc_t *next; /* next element in chain */ - RF_LockReqDesc_t *templink; /* for making short-lived lists of - * request descriptors */ -}; -#define RF_ASSERT_VALID_LOCKREQ(_lr_) { \ - RF_ASSERT(RF_IO_IS_R_OR_W((_lr_)->type)); \ -} - -struct RF_StripeLockDesc_s { - RF_StripeNum_t stripeID;/* the stripe ID */ - RF_LockReqDesc_t *granted; /* unordered list of granted requests */ - RF_LockReqDesc_t *waitersH; /* FIFO queue of all waiting reqs, - * both read and write (Head and Tail) */ - RF_LockReqDesc_t *waitersT; - int nWriters; /* number of writers either granted or waiting */ - RF_StripeLockDesc_t *next; /* for hash table collision resolution */ -}; - -struct RF_LockTableEntry_s { - RF_DECLARE_MUTEX(mutex) /* mutex on this hash chain */ - RF_StripeLockDesc_t *descList; /* hash chain of lock descriptors */ -}; -/* - * Initializes a stripe lock descriptor. _defSize is the number of sectors - * that we lock when there is no parity information in the ASM (e.g. RAID0). - */ - -#define RF_INIT_LOCK_REQ_DESC(_lrd, _typ, _cbf, _cba, _asm, _defSize) \ - { \ - (_lrd).type = _typ; \ - (_lrd).start2 = -1; \ - (_lrd).stop2 = -1; \ - if ((_asm)->parityInfo) { \ - (_lrd).start = (_asm)->parityInfo->startSector; \ - (_lrd).stop = (_asm)->parityInfo->startSector + (_asm)->parityInfo->numSector-1; \ - if ((_asm)->parityInfo->next) { \ - (_lrd).start2 = (_asm)->parityInfo->next->startSector; \ - (_lrd).stop2 = (_asm)->parityInfo->next->startSector + (_asm)->parityInfo->next->numSector-1; \ - } \ - } else { \ - (_lrd).start = 0; \ - (_lrd).stop = (_defSize); \ - } \ - (_lrd).templink= NULL; \ - (_lrd).cbFunc = (_cbf); \ - (_lrd).cbArg = (void *) (_cba); \ - } - -int rf_ConfigureStripeLockFreeList(RF_ShutdownList_t ** listp); -RF_LockTableEntry_t *rf_MakeLockTable(void); -void rf_ShutdownStripeLocks(RF_LockTableEntry_t * lockTable); -int -rf_ConfigureStripeLocks(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int -rf_AcquireStripeLock(RF_LockTableEntry_t * lockTable, - RF_StripeNum_t stripeID, RF_LockReqDesc_t * lockReqDesc); -void -rf_ReleaseStripeLock(RF_LockTableEntry_t * lockTable, - RF_StripeNum_t stripeID, RF_LockReqDesc_t * lockReqDesc); - -#endif /* !_RF__RF_STRIPELOCKS_H_ */ diff --git a/sys/dev/raidframe/rf_strutils.c b/sys/dev/raidframe/rf_strutils.c deleted file mode 100644 index d434f0a..0000000 --- a/sys/dev/raidframe/rf_strutils.c +++ /dev/null @@ -1,58 +0,0 @@ -/* $NetBSD: rf_strutils.c,v 1.3 1999/02/05 00:06:18 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * rf_strutils.c - * - * String-parsing funcs - */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ -/* - * rf_strutils.c -- some simple utilities for munging on strings. - * I put them in a file by themselves because they're needed in - * setconfig, in the user-level driver, and in the kernel. - * - */ - -#include <dev/raidframe/rf_utils.h> - -/* finds a non-white character in the line */ -char * -rf_find_non_white(char *p) -{ - for (; *p != '\0' && (*p == ' ' || *p == '\t'); p++); - return (p); -} -/* finds a white character in the line */ -char * -rf_find_white(char *p) -{ - for (; *p != '\0' && (*p != ' ' && *p != '\t'); p++); - return (p); -} diff --git a/sys/dev/raidframe/rf_threadstuff.c b/sys/dev/raidframe/rf_threadstuff.c deleted file mode 100644 index 657ffee..0000000 --- a/sys/dev/raidframe/rf_threadstuff.c +++ /dev/null @@ -1,223 +0,0 @@ -/* $NetBSD: rf_threadstuff.c,v 1.5 1999/12/07 02:13:28 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * rf_threadstuff.c - */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_shutdown.h> - -static void mutex_destroyer(void *); -static void cond_destroyer(void *); - -/* - * Shared stuff - */ - -static void -mutex_destroyer(arg) - void *arg; -{ - int rc; - - rc = rf_mutex_destroy(arg); - if (rc) { - RF_ERRORMSG1("RAIDFRAME: Error %d auto-destroying mutex\n", rc); - } -} - -static void -cond_destroyer(arg) - void *arg; -{ - int rc; - - rc = rf_cond_destroy(arg); - if (rc) { - RF_ERRORMSG1("RAIDFRAME: Error %d auto-destroying condition\n", rc); - } -} - -int -_rf_create_managed_mutex(listp, m, file, line) - RF_ShutdownList_t **listp; -RF_DECLARE_MUTEX(*m) - char *file; - int line; -{ - int rc, rc1; - - rc = rf_mutex_init(m, __FUNCTION__); - if (rc) - return (rc); - rc = _rf_ShutdownCreate(listp, mutex_destroyer, (void *) m, file, line); - if (rc) { - RF_ERRORMSG1("RAIDFRAME: Error %d adding shutdown entry\n", rc); - rc1 = rf_mutex_destroy(m); - if (rc1) { - RF_ERRORMSG1("RAIDFRAME: Error %d destroying mutex\n", rc1); - } - } - return (rc); -} - -int -_rf_create_managed_cond(listp, c, file, line) - RF_ShutdownList_t **listp; -RF_DECLARE_COND(*c) - char *file; - int line; -{ - int rc, rc1; - - rc = rf_cond_init(c); - if (rc) - return (rc); - rc = _rf_ShutdownCreate(listp, cond_destroyer, (void *) c, file, line); - if (rc) { - RF_ERRORMSG1("RAIDFRAME: Error %d adding shutdown entry\n", rc); - rc1 = rf_cond_destroy(c); - if (rc1) { - RF_ERRORMSG1("RAIDFRAME: Error %d destroying cond\n", rc1); - } - } - return (rc); -} - -int -_rf_init_managed_threadgroup(listp, g, file, line) - RF_ShutdownList_t **listp; - RF_ThreadGroup_t *g; - char *file; - int line; -{ - int rc; - - rc = _rf_create_managed_mutex(listp, &g->mutex, file, line); - if (rc) - return (rc); - rc = _rf_create_managed_cond(listp, &g->cond, file, line); - if (rc) - return (rc); - g->created = g->running = g->shutdown = 0; - return (0); -} - -int -_rf_destroy_threadgroup(g, file, line) - RF_ThreadGroup_t *g; - char *file; - int line; -{ - int rc1, rc2; - - rc1 = rf_mutex_destroy(&g->mutex); - rc2 = rf_cond_destroy(&g->cond); - if (rc1) - return (rc1); - return (rc2); -} - -int -_rf_init_threadgroup(g, file, line) - RF_ThreadGroup_t *g; - char *file; - int line; -{ - int rc; - - rc = rf_mutex_init(&g->mutex, __FUNCTION__); - if (rc) - return (rc); - rc = rf_cond_init(&g->cond); - if (rc) { - rf_mutex_destroy(&g->mutex); - return (rc); - } - g->created = g->running = g->shutdown = 0; - return (0); -} - - -/* - * Kernel - */ -#if defined(__FreeBSD__) && __FreeBSD_version > 500005 -int -rf_mutex_init(m, s) -decl_simple_lock_data(, *m) -const char *s; -{ - mtx_init(m, s, NULL, MTX_DEF); - return (0); -} - -int -rf_mutex_destroy(m) -decl_simple_lock_data(, *m) -{ - mtx_destroy(m); - return (0); -} -#else -int -rf_mutex_init(m, s) -decl_simple_lock_data(, *m) -const char *s; -{ - simple_lock_init(m); - return (0); -} - -int -rf_mutex_destroy(m) -decl_simple_lock_data(, *m) -{ - return (0); -} -#endif - -int -rf_cond_init(c) -RF_DECLARE_COND(*c) -{ - *c = 0; /* no reason */ - return (0); -} - -int -rf_cond_destroy(c) -RF_DECLARE_COND(*c) -{ - return (0); -} diff --git a/sys/dev/raidframe/rf_threadstuff.h b/sys/dev/raidframe/rf_threadstuff.h deleted file mode 100644 index a3560cc..0000000 --- a/sys/dev/raidframe/rf_threadstuff.h +++ /dev/null @@ -1,229 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_threadstuff.h,v 1.10 2001/01/27 20:42:21 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, Daniel Stodolsky, Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * threadstuff.h -- definitions for threads, locks, and synchronization - * - * The purpose of this file is provide some illusion of portability. - * If the functions below can be implemented with the same semantics on - * some new system, then at least the synchronization and thread control - * part of the code should not require modification to port to a new machine. - * the only other place where the pthread package is explicitly used is - * threadid.h - * - * this file should be included above stdio.h to get some necessary defines. - * - */ - -#ifndef _RF__RF_THREADSTUFF_H_ -#define _RF__RF_THREADSTUFF_H_ - -#include <dev/raidframe/rf_types.h> -#include <sys/types.h> -#include <sys/param.h> -#ifdef _KERNEL -#include <sys/systm.h> -#include <sys/proc.h> -#include <sys/kthread.h> -#endif - -#define rf_create_managed_mutex(a,b) _rf_create_managed_mutex(a,b,__FILE__,__LINE__) -#define rf_create_managed_cond(a,b) _rf_create_managed_cond(a,b,__FILE__,__LINE__) -#define rf_init_managed_threadgroup(a,b) _rf_init_managed_threadgroup(a,b,__FILE__,__LINE__) -#define rf_init_threadgroup(a) _rf_init_threadgroup(a,__FILE__,__LINE__) -#define rf_destroy_threadgroup(a) _rf_destroy_threadgroup(a,__FILE__,__LINE__) - -int _rf_init_threadgroup(RF_ThreadGroup_t * g, char *file, int line); -int _rf_destroy_threadgroup(RF_ThreadGroup_t * g, char *file, int line); -int -_rf_init_managed_threadgroup(RF_ShutdownList_t ** listp, - RF_ThreadGroup_t * g, char *file, int line); - -#include <sys/lock.h> -#if defined(__FreeBSD__ ) && __FreeBSD_version > 500005 -#include <sys/mutex.h> -#define decl_simple_lock_data(a,b) a struct mtx b; -#define simple_lock_addr(a) ((struct mtx *)&(a)) - -typedef struct thread *RF_Thread_t; -typedef void *RF_ThreadArg_t; - -#ifdef _KERNEL -static __inline struct ucred * -rf_getucred(RF_Thread_t td) -{ - return (((struct thread *)td)->td_ucred); -} -#endif - -#define RF_LOCK_MUTEX(_m_) mtx_lock(&(_m_)) -#define RF_UNLOCK_MUTEX(_m_) mtx_unlock(&(_m_)) -#else -#define decl_simple_lock_data(a,b) a struct simplelock b; -#define simple_lock_addr(a) ((struct simplelock *)&(a)) - -typedef struct proc *RF_Thread_t; -typedef void *RF_ThreadArg_t; - -static __inline struct ucred * -rf_getucred(RF_Thread_t td) -{ - return (((struct proc *)td)->p_ucred); -} - -#define RF_LOCK_MUTEX(_m_) simple_lock(&(_m_)) -#define RF_UNLOCK_MUTEX(_m_) simple_unlock(&(_m_)) -#endif - -#define RF_DECLARE_MUTEX(_m_) decl_simple_lock_data(,(_m_)) -#define RF_DECLARE_STATIC_MUTEX(_m_) decl_simple_lock_data(static,(_m_)) -#define RF_DECLARE_EXTERN_MUTEX(_m_) decl_simple_lock_data(extern,(_m_)) - -#define RF_DECLARE_COND(_c_) int _c_; -#define RF_DECLARE_STATIC_COND(_c_) static int _c_; -#define RF_DECLARE_EXTERN_COND(_c_) extern int _c_; - -/* - * In NetBSD, kernel threads are simply processes which share several - * substructures and never run in userspace. - */ -#define RF_WAIT_COND(_c_,_m_) \ - RF_LTSLEEP(&(_c_), PRIBIO, "rfwcond", 0, &(_m_)) -#define RF_SIGNAL_COND(_c_) wakeup_one(&(_c_)) -#define RF_BROADCAST_COND(_c_) wakeup(&(_c_)) -#if defined(__NetBSD__) -#define RF_CREATE_THREAD(_handle_, _func_, _arg_, _name_) \ - kthread_create1((void (*)(void *))(_func_), (void *)(_arg_), \ - (struct proc **)&(_handle_), _name_) -#define RF_THREAD_EXIT(ret) \ - kthread_exit(ret) -#elif defined(__FreeBSD__) -#if __FreeBSD_version > 500005 -#define RF_CREATE_THREAD(_handle_, _func_, _arg_, _name_) \ - kthread_create((void (*)(void *))(_func_), (void *)(_arg_), \ - (struct proc **)&(_handle_), 0, 4, _name_) -#define RF_THREAD_EXIT(ret) \ - kthread_exit(ret) -#else -#define RF_CREATE_THREAD(_handle_, _func_, _arg_, _name_) \ - kthread_create((void (*)(void *))(_func_), (void *)(_arg_), \ - (struct proc **)&(_handle_), _name_) -#define RF_THREAD_EXIT(ret) \ - kthread_exit(ret); -#endif -#endif - -struct RF_ThreadGroup_s { - int created; - int running; - int shutdown; - RF_DECLARE_MUTEX(mutex) - RF_DECLARE_COND(cond) -}; -/* - * Someone has started a thread in the group - */ -#define RF_THREADGROUP_STARTED(_g_) { \ - RF_LOCK_MUTEX((_g_)->mutex); \ - (_g_)->created++; \ - RF_UNLOCK_MUTEX((_g_)->mutex); \ -} - -/* - * Thread announcing that it is now running - */ -#define RF_THREADGROUP_RUNNING(_g_) { \ - RF_LOCK_MUTEX((_g_)->mutex); \ - (_g_)->running++; \ - RF_UNLOCK_MUTEX((_g_)->mutex); \ - RF_SIGNAL_COND((_g_)->cond); \ -} - -/* - * Thread announcing that it is now done - */ -#define RF_THREADGROUP_DONE(_g_) { \ - RF_LOCK_MUTEX((_g_)->mutex); \ - (_g_)->shutdown++; \ - RF_UNLOCK_MUTEX((_g_)->mutex); \ - RF_SIGNAL_COND((_g_)->cond); \ -} - -/* - * Wait for all threads to start running - */ -#define RF_THREADGROUP_WAIT_START(_g_) { \ - RF_LOCK_MUTEX((_g_)->mutex); \ - while((_g_)->running < (_g_)->created) { \ - RF_WAIT_COND((_g_)->cond, (_g_)->mutex); \ - } \ - RF_UNLOCK_MUTEX((_g_)->mutex); \ -} - -/* - * Wait for all threads to stop running - */ -#ifndef __NetBSD__ -#define RF_THREADGROUP_WAIT_STOP(_g_) { \ - RF_LOCK_MUTEX((_g_)->mutex); \ - RF_ASSERT((_g_)->running == (_g_)->created); \ - while((_g_)->shutdown < (_g_)->running) { \ - RF_WAIT_COND((_g_)->cond, (_g_)->mutex); \ - } \ - RF_UNLOCK_MUTEX((_g_)->mutex); \ -} -#else - /* XXX Note that we've removed the assert. That should get put back in once - * we actually get something like a kernel thread running */ -#define RF_THREADGROUP_WAIT_STOP(_g_) { \ - RF_LOCK_MUTEX((_g_)->mutex); \ - while((_g_)->shutdown < (_g_)->running) { \ - RF_WAIT_COND((_g_)->cond, (_g_)->mutex); \ - } \ - RF_UNLOCK_MUTEX((_g_)->mutex); \ -} -#endif - -#if defined(__FreeBSD__) && __FreeBSD_version > 500005 -int rf_mutex_init(struct mtx *, const char *); -int rf_mutex_destroy(struct mtx *); -int _rf_create_managed_mutex(RF_ShutdownList_t **, struct mtx *, - char *, int); -#else -int rf_mutex_init(struct simplelock *, const char *); -int rf_mutex_destroy(struct simplelock *); -int _rf_create_managed_mutex(RF_ShutdownList_t **, struct simplelock *, - char *, int); -#endif -int _rf_create_managed_cond(RF_ShutdownList_t ** listp, int *, - char *file, int line); - -int rf_cond_init(int *c); -int rf_cond_destroy(int *c); -#endif /* !_RF__RF_THREADSTUFF_H_ */ diff --git a/sys/dev/raidframe/rf_types.h b/sys/dev/raidframe/rf_types.h deleted file mode 100644 index 25630ef..0000000 --- a/sys/dev/raidframe/rf_types.h +++ /dev/null @@ -1,247 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_types.h,v 1.6 1999/09/05 03:05:55 oster Exp $ */ -/* - * rf_types.h - */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ -/*********************************************************** - * - * rf_types.h -- standard types for RAIDframe - * - ***********************************************************/ - -#ifndef _RF__RF_TYPES_H_ -#define _RF__RF_TYPES_H_ - - -#include <dev/raidframe/rf_archs.h> - -#include <sys/errno.h> -#include <sys/types.h> - -#include <sys/uio.h> -#include <sys/param.h> -#ifdef _KERNEL -#include <sys/lock.h> -#endif - -/* - * First, define system-dependent types and constants. - * - * If the machine is big-endian, RF_BIG_ENDIAN should be 1. - * Otherwise, it should be 0. - * - * The various integer types should be self-explanatory; we - * use these elsewhere to avoid size confusion. - * - * LONGSHIFT is lg(sizeof(long)) (that is, log base two of sizeof(long) - * - */ - -#include <sys/types.h> -#include <sys/limits.h> -#include <machine/endian.h> - -#if BYTE_ORDER == BIG_ENDIAN -#define RF_IS_BIG_ENDIAN 1 -#elif BYTE_ORDER == LITTLE_ENDIAN -#define RF_IS_BIG_ENDIAN 0 -#else -#error byte order not defined -#endif -typedef int8_t RF_int8; -typedef u_int8_t RF_uint8; -typedef int16_t RF_int16; -typedef u_int16_t RF_uint16; -typedef int32_t RF_int32; -typedef u_int32_t RF_uint32; -typedef int64_t RF_int64; -typedef u_int64_t RF_uint64; -#if LONG_BIT == 32 -#define RF_LONGSHIFT 2 -#elif LONG_BIT == 64 -#define RF_LONGSHIFT 3 -#elif defined(__i386__) -#define RF_LONGSHIFT 2 -#elif defined(__alpha__) -#define RF_LONGSHIFT 3 -#else -#error word size not defined -#endif - -/* - * These are just zero and non-zero. We don't use "TRUE" - * and "FALSE" because there's too much nonsense trying - * to get them defined exactly once on every platform, given - * the different places they may be defined in system header - * files. - */ -#define RF_TRUE 1 -#define RF_FALSE 0 - -/* - * Now, some generic types - */ -typedef RF_uint64 RF_IoCount_t; -typedef RF_uint64 RF_Offset_t; -typedef RF_uint32 RF_PSSFlags_t; -typedef RF_uint64 RF_SectorCount_t; -typedef RF_uint64 RF_StripeCount_t; -typedef RF_int64 RF_SectorNum_t;/* these are unsigned so we can set them to - * (-1) for "uninitialized" */ -typedef RF_int64 RF_StripeNum_t; -typedef RF_int64 RF_RaidAddr_t; -typedef int RF_RowCol_t; /* unsigned so it can be (-1) */ -typedef RF_int64 RF_HeadSepLimit_t; -typedef RF_int64 RF_ReconUnitCount_t; -typedef int RF_ReconUnitNum_t; - -typedef char RF_ParityConfig_t; - -typedef char RF_DiskQueueType_t[1024]; -#define RF_DISK_QUEUE_TYPE_NONE "" - -/* values for the 'type' field in a reconstruction buffer */ -typedef int RF_RbufType_t; -#define RF_RBUF_TYPE_EXCLUSIVE 0 /* this buf assigned exclusively to - * one disk */ -#define RF_RBUF_TYPE_FLOATING 1 /* this is a floating recon buf */ -#define RF_RBUF_TYPE_FORCED 2 /* this rbuf was allocated to complete - * a forced recon */ - -typedef char RF_IoType_t; -#define RF_IO_TYPE_READ 'r' -#define RF_IO_TYPE_WRITE 'w' -#define RF_IO_TYPE_NOP 'n' -#define RF_IO_IS_R_OR_W(_type_) (((_type_) == RF_IO_TYPE_READ) \ - || ((_type_) == RF_IO_TYPE_WRITE)) - -typedef void (*RF_VoidFuncPtr) (void *,...); - -typedef RF_uint32 RF_AccessStripeMapFlags_t; -typedef RF_uint32 RF_DiskQueueDataFlags_t; -typedef RF_uint32 RF_DiskQueueFlags_t; -typedef RF_uint32 RF_RaidAccessFlags_t; - -#define RF_DISKQUEUE_DATA_FLAGS_NONE ((RF_DiskQueueDataFlags_t)0) - -typedef struct RF_AccessStripeMap_s RF_AccessStripeMap_t; -typedef struct RF_AccessStripeMapHeader_s RF_AccessStripeMapHeader_t; -typedef struct RF_AllocListElem_s RF_AllocListElem_t; -typedef struct RF_CallbackDesc_s RF_CallbackDesc_t; -typedef struct RF_ChunkDesc_s RF_ChunkDesc_t; -typedef struct RF_CommonLogData_s RF_CommonLogData_t; -typedef struct RF_Config_s RF_Config_t; -typedef struct RF_CumulativeStats_s RF_CumulativeStats_t; -typedef struct RF_DagHeader_s RF_DagHeader_t; -typedef struct RF_DagList_s RF_DagList_t; -typedef struct RF_DagNode_s RF_DagNode_t; -typedef struct RF_DeclusteredConfigInfo_s RF_DeclusteredConfigInfo_t; -typedef struct RF_DiskId_s RF_DiskId_t; -typedef struct RF_DiskMap_s RF_DiskMap_t; -typedef struct RF_DiskQueue_s RF_DiskQueue_t; -typedef struct RF_DiskQueueData_s RF_DiskQueueData_t; -typedef struct RF_DiskQueueSW_s RF_DiskQueueSW_t; -typedef struct RF_Etimer_s RF_Etimer_t; -typedef struct RF_EventCreate_s RF_EventCreate_t; -typedef struct RF_FreeList_s RF_FreeList_t; -typedef struct RF_LockReqDesc_s RF_LockReqDesc_t; -typedef struct RF_LockTableEntry_s RF_LockTableEntry_t; -typedef struct RF_MCPair_s RF_MCPair_t; -typedef struct RF_OwnerInfo_s RF_OwnerInfo_t; -typedef struct RF_ParityLog_s RF_ParityLog_t; -typedef struct RF_ParityLogAppendQueue_s RF_ParityLogAppendQueue_t; -typedef struct RF_ParityLogData_s RF_ParityLogData_t; -typedef struct RF_ParityLogDiskQueue_s RF_ParityLogDiskQueue_t; -typedef struct RF_ParityLogQueue_s RF_ParityLogQueue_t; -typedef struct RF_ParityLogRecord_s RF_ParityLogRecord_t; -typedef struct RF_PerDiskReconCtrl_s RF_PerDiskReconCtrl_t; -typedef struct RF_PSStatusHeader_s RF_PSStatusHeader_t; -typedef struct RF_PhysDiskAddr_s RF_PhysDiskAddr_t; -typedef struct RF_PropHeader_s RF_PropHeader_t; -typedef struct RF_Raid_s RF_Raid_t; -typedef struct RF_RaidAccessDesc_s RF_RaidAccessDesc_t; -typedef struct RF_RaidDisk_s RF_RaidDisk_t; -typedef struct RF_RaidLayout_s RF_RaidLayout_t; -typedef struct RF_RaidReconDesc_s RF_RaidReconDesc_t; -typedef struct RF_ReconBuffer_s RF_ReconBuffer_t; -typedef struct RF_ReconConfig_s RF_ReconConfig_t; -typedef struct RF_ReconCtrl_s RF_ReconCtrl_t; -typedef struct RF_ReconDoneProc_s RF_ReconDoneProc_t; -typedef struct RF_ReconEvent_s RF_ReconEvent_t; -typedef struct RF_ReconMap_s RF_ReconMap_t; -typedef struct RF_ReconMapListElem_s RF_ReconMapListElem_t; -typedef struct RF_ReconParityStripeStatus_s RF_ReconParityStripeStatus_t; -typedef struct RF_RedFuncs_s RF_RedFuncs_t; -typedef struct RF_RegionBufferQueue_s RF_RegionBufferQueue_t; -typedef struct RF_RegionInfo_s RF_RegionInfo_t; -typedef struct RF_ShutdownList_s RF_ShutdownList_t; -typedef struct RF_SpareTableEntry_s RF_SpareTableEntry_t; -typedef struct RF_SparetWait_s RF_SparetWait_t; -typedef struct RF_StripeLockDesc_s RF_StripeLockDesc_t; -typedef struct RF_ThreadGroup_s RF_ThreadGroup_t; -typedef struct RF_ThroughputStats_s RF_ThroughputStats_t; - -/* - * Important assumptions regarding ordering of the states in this list - * have been made!!! - * Before disturbing this ordering, look at code in rf_states.c - */ -typedef enum RF_AccessState_e { - /* original states */ - rf_QuiesceState, /* handles queisence for reconstruction */ - rf_IncrAccessesCountState, /* count accesses in flight */ - rf_DecrAccessesCountState, - rf_MapState, /* map access to disk addresses */ - rf_LockState, /* take stripe locks */ - rf_CreateDAGState, /* create DAGs */ - rf_ExecuteDAGState, /* execute DAGs */ - rf_ProcessDAGState, /* DAGs are completing- check if correct, or - * if we need to retry */ - rf_CleanupState, /* release stripe locks, clean up */ - rf_LastState /* must be the last state */ -} RF_AccessState_t; -#define RF_MAXROW 10 /* these are arbitrary and can be modified at - * will */ -#define RF_MAXCOL 40 -#define RF_MAXSPARE 10 -#define RF_MAXDBGV 75 /* max number of debug variables */ - -union RF_GenericParam_u { - void *p; - RF_uint64 v; -}; -typedef union RF_GenericParam_u RF_DagParam_t; -typedef union RF_GenericParam_u RF_CBParam_t; - -#if defined(__FreeBSD__) && __FreeBSD_version > 500005 -typedef struct bio *RF_Buf_t; -#else -typedef struct buf *RF_Buf_t; -#endif -#endif /* _RF__RF_TYPES_H_ */ diff --git a/sys/dev/raidframe/rf_utils.c b/sys/dev/raidframe/rf_utils.c deleted file mode 100644 index 71f7b93..0000000 --- a/sys/dev/raidframe/rf_utils.c +++ /dev/null @@ -1,149 +0,0 @@ -/* $NetBSD: rf_utils.c,v 1.5 2000/01/07 03:41:03 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/**************************************** - * - * rf_utils.c -- various support routines - * - ****************************************/ - - -#include <dev/raidframe/rf_threadstuff.h> - -#include <sys/time.h> - -#include <dev/raidframe/rf_utils.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_general.h> - -/* creates & zeros 2-d array with b rows and k columns (MCH) */ -RF_RowCol_t ** -rf_make_2d_array(b, k, allocList) - int b; - int k; - RF_AllocListElem_t *allocList; -{ - RF_RowCol_t **retval, i; - - RF_MallocAndAdd(retval, b * sizeof(RF_RowCol_t *), (RF_RowCol_t **), allocList); - for (i = 0; i < b; i++) { - RF_MallocAndAdd(retval[i], k * sizeof(RF_RowCol_t), (RF_RowCol_t *), allocList); - (void) bzero((char *) retval[i], k * sizeof(RF_RowCol_t)); - } - return (retval); -} - -void -rf_free_2d_array(a, b, k) - RF_RowCol_t **a; - int b; - int k; -{ - RF_RowCol_t i; - - for (i = 0; i < b; i++) - RF_Free(a[i], k * sizeof(RF_RowCol_t)); - RF_Free(a, b * sizeof(RF_RowCol_t)); -} - - -/* creates & zeros a 1-d array with c columns */ -RF_RowCol_t * -rf_make_1d_array(c, allocList) - int c; - RF_AllocListElem_t *allocList; -{ - RF_RowCol_t *retval; - - RF_MallocAndAdd(retval, c * sizeof(RF_RowCol_t), (RF_RowCol_t *), allocList); - (void) bzero((char *) retval, c * sizeof(RF_RowCol_t)); - return (retval); -} - -void -rf_free_1d_array(a, n) - RF_RowCol_t *a; - int n; -{ - RF_Free(a, n * sizeof(RF_RowCol_t)); -} -/* Euclid's algorithm: finds and returns the greatest common divisor - * between a and b. (MCH) - */ -int -rf_gcd(m, n) - int m; - int n; -{ - int t; - - while (m > 0) { - t = n % m; - n = m; - m = t; - } - return (n); -} -/* these convert between text and integer. Apparently the regular C macros - * for doing this are not available in the kernel - */ - -#define ISDIGIT(x) ( (x) >= '0' && (x) <= '9' ) -#define ISHEXCHAR(x) ( ((x) >= 'a' && (x) <= 'f') || ((x) >= 'A' && (x) <= 'F') ) -#define ISHEX(x) ( ISDIGIT(x) || ISHEXCHAR(x) ) -#define HC2INT(x) ( ((x) >= 'a' && (x) <= 'f') ? (x) - 'a' + 10 : \ - ( ((x) >= 'A' && (x) <= 'F') ? (x) - 'A' + 10 : (x - '0') ) ) - -int -rf_atoi(p) - char *p; -{ - int val = 0, negate = 0; - - if (*p == '-') { - negate = 1; - p++; - } - for (; ISDIGIT(*p); p++) - val = 10 * val + (*p - '0'); - return ((negate) ? -val : val); -} - -int -rf_htoi(p) - char *p; -{ - int val = 0; - for (; ISHEXCHAR(*p); p++) - val = 16 * val + HC2INT(*p); - return (val); -} diff --git a/sys/dev/raidframe/rf_utils.h b/sys/dev/raidframe/rf_utils.h deleted file mode 100644 index 18eac84..0000000 --- a/sys/dev/raidframe/rf_utils.h +++ /dev/null @@ -1,70 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_utils.h,v 1.4 1999/08/13 03:26:55 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/*************************************** - * - * rf_utils.c -- header file for utils.c - * - ***************************************/ - - -#ifndef _RF__RF_UTILS_H_ -#define _RF__RF_UTILS_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_threadstuff.h> - -char *rf_find_non_white(char *p); -char *rf_find_white(char *p); -RF_RowCol_t **rf_make_2d_array(int b, int k, RF_AllocListElem_t * allocList); -RF_RowCol_t *rf_make_1d_array(int c, RF_AllocListElem_t * allocList); -void rf_free_2d_array(RF_RowCol_t ** a, int b, int k); -void rf_free_1d_array(RF_RowCol_t * a, int n); -int rf_gcd(int m, int n); -int rf_atoi(char *p); -int rf_htoi(char *p); - -#define RF_USEC_PER_SEC 1000000 -#define RF_TIMEVAL_TO_US(_t_) (((_t_).tv_sec) \ - * RF_USEC_PER_SEC + (_t_).tv_usec) - -#define RF_TIMEVAL_DIFF(_start_,_end_,_diff_) { \ - if ((_end_)->tv_usec < (_start_)->tv_usec) { \ - (_diff_)->tv_usec = ((_end_)->tv_usec + RF_USEC_PER_SEC) \ - - (_start_)->tv_usec; \ - (_diff_)->tv_sec = ((_end_)->tv_sec-1) - (_start_)->tv_sec; \ - } \ - else { \ - (_diff_)->tv_usec = (_end_)->tv_usec - (_start_)->tv_usec; \ - (_diff_)->tv_sec = (_end_)->tv_sec - (_start_)->tv_sec; \ - } \ -} - -#endif /* !_RF__RF_UTILS_H_ */ |