diff options
Diffstat (limited to 'sys')
138 files changed, 0 insertions, 42458 deletions
diff --git a/sys/conf/NOTES b/sys/conf/NOTES index f2e8ed5..cf5183c 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -1038,12 +1038,6 @@ device ccd #Concatenated disk driver device vinum #Vinum concat/mirror/raid driver options VINUMDEBUG #enable Vinum debugging hooks -# RAIDframe device. RAID_AUTOCONFIG allows RAIDframe to search all of the -# disk devices in the system looking for components that it recognizes (already -# configured once before) and auto-configured them into arrays. -device raidframe -options RAID_AUTOCONFIG - # Kernel side iconv library options LIBICONV diff --git a/sys/conf/files b/sys/conf/files index 339f7db..a905aea 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -606,66 +606,6 @@ dev/puc/puc_pci.c optional puc pci dev/puc/puc_pccard.c optional puc pccard dev/puc/puc_sbus.c optional puc sbus dev/puc/pucdata.c optional puc pci -dev/raidframe/rf_acctrace.c optional raidframe -dev/raidframe/rf_alloclist.c optional raidframe -dev/raidframe/rf_aselect.c optional raidframe -dev/raidframe/rf_callback.c optional raidframe -dev/raidframe/rf_chaindecluster.c optional raidframe -dev/raidframe/rf_copyback.c optional raidframe -dev/raidframe/rf_cvscan.c optional raidframe -dev/raidframe/rf_dagdegrd.c optional raidframe -dev/raidframe/rf_dagdegwr.c optional raidframe -dev/raidframe/rf_dagffrd.c optional raidframe -dev/raidframe/rf_dagffwr.c optional raidframe -dev/raidframe/rf_dagfuncs.c optional raidframe -dev/raidframe/rf_dagutils.c optional raidframe -dev/raidframe/rf_debugMem.c optional raidframe -dev/raidframe/rf_debugprint.c optional raidframe -dev/raidframe/rf_decluster.c optional raidframe -dev/raidframe/rf_declusterPQ.c optional raidframe -dev/raidframe/rf_diskqueue.c optional raidframe -dev/raidframe/rf_disks.c optional raidframe -dev/raidframe/rf_driver.c optional raidframe -dev/raidframe/rf_engine.c optional raidframe -dev/raidframe/rf_evenodd.c optional raidframe -dev/raidframe/rf_evenodd_dagfuncs.c optional raidframe -dev/raidframe/rf_evenodd_dags.c optional raidframe -dev/raidframe/rf_fifo.c optional raidframe -dev/raidframe/rf_freebsdkintf.c optional raidframe -dev/raidframe/rf_interdecluster.c optional raidframe -dev/raidframe/rf_invertq.c optional raidframe -dev/raidframe/rf_layout.c optional raidframe -dev/raidframe/rf_map.c optional raidframe -dev/raidframe/rf_mcpair.c optional raidframe -dev/raidframe/rf_memchunk.c optional raidframe -dev/raidframe/rf_nwayxor.c optional raidframe -dev/raidframe/rf_options.c optional raidframe -dev/raidframe/rf_paritylog.c optional raidframe -dev/raidframe/rf_paritylogDiskMgr.c optional raidframe -dev/raidframe/rf_paritylogging.c optional raidframe -dev/raidframe/rf_parityloggingdags.c optional raidframe -dev/raidframe/rf_parityscan.c optional raidframe -dev/raidframe/rf_pq.c optional raidframe -dev/raidframe/rf_pqdeg.c optional raidframe -dev/raidframe/rf_pqdegdags.c optional raidframe -dev/raidframe/rf_psstatus.c optional raidframe -dev/raidframe/rf_raid0.c optional raidframe -dev/raidframe/rf_raid1.c optional raidframe -dev/raidframe/rf_raid4.c optional raidframe -dev/raidframe/rf_raid5.c optional raidframe -dev/raidframe/rf_raid5_rotatedspare.c optional raidframe -dev/raidframe/rf_reconbuffer.c optional raidframe -dev/raidframe/rf_reconmap.c optional raidframe -dev/raidframe/rf_reconstruct.c optional raidframe -dev/raidframe/rf_reconutil.c optional raidframe -dev/raidframe/rf_revent.c optional raidframe -dev/raidframe/rf_shutdown.c optional raidframe -dev/raidframe/rf_sstf.c optional raidframe -dev/raidframe/rf_states.c optional raidframe -dev/raidframe/rf_stripelocks.c optional raidframe -dev/raidframe/rf_strutils.c optional raidframe -dev/raidframe/rf_threadstuff.c optional raidframe -dev/raidframe/rf_utils.c optional raidframe dev/random/harvest.c standard dev/random/randomdev.c optional random dev/random/yarrow.c optional random diff --git a/sys/conf/options b/sys/conf/options index f163ee3..4121e62 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -567,10 +567,6 @@ PCFCLOCK_VERBOSE opt_pcfclock.h PCFCLOCK_MAX_RETRIES opt_pcfclock.h TDFX_LINUX opt_tdfx.h -# RAIDframe options -RAID_AUTOCONFIG opt_raid.h -RAID_DEBUG opt_raid.h - KTR opt_global.h KTR_ALQ opt_ktr.h KTR_MASK opt_ktr.h diff --git a/sys/dev/raidframe/rf_acctrace.c b/sys/dev/raidframe/rf_acctrace.c deleted file mode 100644 index 91c1b6d4..0000000 --- a/sys/dev/raidframe/rf_acctrace.c +++ /dev/null @@ -1,174 +0,0 @@ -/* $NetBSD: rf_acctrace.c,v 1.4 1999/08/13 03:41:52 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************** - * - * acctrace.c -- code to support collecting information about each access - * - *****************************************************************************/ - -#if defined(__FreeBSD__) -#include <sys/types.h> -#include <sys/time.h> -#endif -#include <sys/stat.h> -#if defined(__NetBSD__) -#include <sys/types.h> -#endif - -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_acctrace.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_hist.h> -#include <dev/raidframe/rf_shutdown.h> - -static long numTracesSoFar; -static int accessTraceBufCount = 0; -static RF_AccTraceEntry_t *access_tracebuf; -static long traceCount; - -int rf_stopCollectingTraces; -RF_DECLARE_MUTEX(rf_tracing_mutex) - int rf_trace_fd; - - static void rf_ShutdownAccessTrace(void *); - - static void rf_ShutdownAccessTrace(ignored) - void *ignored; -{ - if (rf_accessTraceBufSize) { - if (accessTraceBufCount) - rf_FlushAccessTraceBuf(); - RF_Free(access_tracebuf, rf_accessTraceBufSize * sizeof(RF_AccTraceEntry_t)); - } - rf_mutex_destroy(&rf_tracing_mutex); -} - -int -rf_ConfigureAccessTrace(listp) - RF_ShutdownList_t **listp; -{ - int rc; - - numTracesSoFar = accessTraceBufCount = rf_stopCollectingTraces = 0; - if (rf_accessTraceBufSize) { - RF_Malloc(access_tracebuf, rf_accessTraceBufSize * sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); - accessTraceBufCount = 0; - } - traceCount = 0; - numTracesSoFar = 0; - rc = rf_mutex_init(&rf_tracing_mutex, __FUNCTION__); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - } - rc = rf_ShutdownCreate(listp, rf_ShutdownAccessTrace, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - if (rf_accessTraceBufSize) { - RF_Free(access_tracebuf, rf_accessTraceBufSize * sizeof(RF_AccTraceEntry_t)); - rf_mutex_destroy(&rf_tracing_mutex); - } - } - return (rc); -} -/* install a trace record. cause a flush to disk or to the trace collector daemon - * if the trace buffer is at least 1/2 full. - */ -void -rf_LogTraceRec(raid, rec) - RF_Raid_t *raid; - RF_AccTraceEntry_t *rec; -{ - RF_AccTotals_t *acc = &raid->acc_totals; -#if 0 - RF_Etimer_t timer; - int i, n; -#endif - - if (rf_stopCollectingTraces || ((rf_maxNumTraces >= 0) && (numTracesSoFar >= rf_maxNumTraces))) - return; - - /* update AccTotals for this device */ - if (!raid->keep_acc_totals) - return; - acc->num_log_ents++; - if (rec->reconacc) { - acc->recon_start_to_fetch_us += rec->specific.recon.recon_start_to_fetch_us; - acc->recon_fetch_to_return_us += rec->specific.recon.recon_fetch_to_return_us; - acc->recon_return_to_submit_us += rec->specific.recon.recon_return_to_submit_us; - acc->recon_num_phys_ios += rec->num_phys_ios; - acc->recon_phys_io_us += rec->phys_io_us; - acc->recon_diskwait_us += rec->diskwait_us; - acc->recon_reccount++; - } else { - RF_HIST_ADD(acc->tot_hist, rec->total_us); - RF_HIST_ADD(acc->dw_hist, rec->diskwait_us); - /* count of physical ios which are too big. often due to - * thermal recalibration */ - /* if bigvals > 0, you should probably ignore this data set */ - if (rec->diskwait_us > 100000) - acc->bigvals++; - acc->total_us += rec->total_us; - acc->suspend_ovhd_us += rec->specific.user.suspend_ovhd_us; - acc->map_us += rec->specific.user.map_us; - acc->lock_us += rec->specific.user.lock_us; - acc->dag_create_us += rec->specific.user.dag_create_us; - acc->dag_retry_us += rec->specific.user.dag_retry_us; - acc->exec_us += rec->specific.user.exec_us; - acc->cleanup_us += rec->specific.user.cleanup_us; - acc->exec_engine_us += rec->specific.user.exec_engine_us; - acc->xor_us += rec->xor_us; - acc->q_us += rec->q_us; - acc->plog_us += rec->plog_us; - acc->diskqueue_us += rec->diskqueue_us; - acc->diskwait_us += rec->diskwait_us; - acc->num_phys_ios += rec->num_phys_ios; - acc->phys_io_us = rec->phys_io_us; - acc->user_reccount++; - } -} - - -/* assumes the tracing mutex is locked at entry. In order to allow this to be called - * from interrupt context, we don't do any copyouts here, but rather just wake trace - * buffer collector thread. - */ -void -rf_FlushAccessTraceBuf() -{ - accessTraceBufCount = 0; -} diff --git a/sys/dev/raidframe/rf_acctrace.h b/sys/dev/raidframe/rf_acctrace.h deleted file mode 100644 index c211514..0000000 --- a/sys/dev/raidframe/rf_acctrace.h +++ /dev/null @@ -1,134 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_acctrace.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************** - * - * acctrace.h -- header file for acctrace.c - * - *****************************************************************************/ - - -#ifndef _RF__RF_ACCTRACE_H_ -#define _RF__RF_ACCTRACE_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_hist.h> -#include <dev/raidframe/rf_etimer.h> - -typedef struct RF_user_acc_stats_s { - RF_uint64 suspend_ovhd_us; /* us spent mucking in the - * access-suspension code */ - RF_uint64 map_us; /* us spent mapping the access */ - RF_uint64 lock_us; /* us spent locking & unlocking stripes, - * including time spent blocked */ - RF_uint64 dag_create_us;/* us spent creating the DAGs */ - RF_uint64 dag_retry_us; /* _total_ us spent retrying the op -- not - * broken down into components */ - RF_uint64 exec_us; /* us spent in DispatchDAG */ - RF_uint64 exec_engine_us; /* us spent in engine, not including - * blocking time */ - RF_uint64 cleanup_us; /* us spent tearing down the dag & maps, and - * generally cleaning up */ -} RF_user_acc_stats_t; - -typedef struct RF_recon_acc_stats_s { - RF_uint32 recon_start_to_fetch_us; - RF_uint32 recon_fetch_to_return_us; - RF_uint32 recon_return_to_submit_us; -} RF_recon_acc_stats_t; - -typedef struct RF_acctrace_entry_s { - union { - RF_user_acc_stats_t user; - RF_recon_acc_stats_t recon; - } specific; - RF_uint8 reconacc; /* whether this is a tracerec for a user acc - * or a recon acc */ - RF_uint64 xor_us; /* us spent doing XORs */ - RF_uint64 q_us; /* us spent doing XORs */ - RF_uint64 plog_us; /* us spent waiting to stuff parity into log */ - RF_uint64 diskqueue_us; /* _total_ us spent in disk queue(s), incl - * concurrent ops */ - RF_uint64 diskwait_us; /* _total_ us spent waiting actually waiting - * on the disk, incl concurrent ops */ - RF_uint64 total_us; /* total us spent on this access */ - RF_uint64 num_phys_ios; /* number of physical I/Os invoked */ - RF_uint64 phys_io_us; /* time of physical I/O */ - RF_Etimer_t tot_timer; /* a timer used to compute total access time */ - RF_Etimer_t timer; /* a generic timer val for timing events that - * live across procedure boundaries */ - RF_Etimer_t recon_timer;/* generic timer for recon stuff */ - RF_uint64 index; -} RF_AccTraceEntry_t; - -typedef struct RF_AccTotals_s { - /* user acc stats */ - RF_uint64 suspend_ovhd_us; - RF_uint64 map_us; - RF_uint64 lock_us; - RF_uint64 dag_create_us; - RF_uint64 dag_retry_us; - RF_uint64 exec_us; - RF_uint64 exec_engine_us; - RF_uint64 cleanup_us; - RF_uint64 user_reccount; - /* recon acc stats */ - RF_uint64 recon_start_to_fetch_us; - RF_uint64 recon_fetch_to_return_us; - RF_uint64 recon_return_to_submit_us; - RF_uint64 recon_io_overflow_count; - RF_uint64 recon_phys_io_us; - RF_uint64 recon_num_phys_ios; - RF_uint64 recon_diskwait_us; - RF_uint64 recon_reccount; - /* trace entry stats */ - RF_uint64 xor_us; - RF_uint64 q_us; - RF_uint64 plog_us; - RF_uint64 diskqueue_us; - RF_uint64 diskwait_us; - RF_uint64 total_us; - RF_uint64 num_log_ents; - RF_uint64 phys_io_overflow_count; - RF_uint64 num_phys_ios; - RF_uint64 phys_io_us; - RF_uint64 bigvals; - /* histograms */ - RF_Hist_t dw_hist[RF_HIST_NUM_BUCKETS]; - RF_Hist_t tot_hist[RF_HIST_NUM_BUCKETS]; -} RF_AccTotals_t; -#if RF_UTILITY == 0 -RF_DECLARE_EXTERN_MUTEX(rf_tracing_mutex) -#endif /* RF_UTILITY == 0 */ - - int rf_ConfigureAccessTrace(RF_ShutdownList_t ** listp); - void rf_LogTraceRec(RF_Raid_t * raid, RF_AccTraceEntry_t * rec); - void rf_FlushAccessTraceBuf(void); - -#endif /* !_RF__RF_ACCTRACE_H_ */ diff --git a/sys/dev/raidframe/rf_alloclist.c b/sys/dev/raidframe/rf_alloclist.c deleted file mode 100644 index 8c8b837..0000000 --- a/sys/dev/raidframe/rf_alloclist.c +++ /dev/null @@ -1,190 +0,0 @@ -/* $NetBSD: rf_alloclist.c,v 1.4 1999/08/13 03:41:53 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/**************************************************************************** - * - * Alloclist.c -- code to manipulate allocation lists - * - * an allocation list is just a list of AllocListElem structures. Each - * such structure contains a fixed-size array of pointers. Calling - * FreeAList() causes each pointer to be freed. - * - ***************************************************************************/ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_shutdown.h> - -RF_DECLARE_STATIC_MUTEX(alist_mutex) - static unsigned int fl_hit_count, fl_miss_count; - - static RF_AllocListElem_t *al_free_list = NULL; - static int al_free_list_count; - -#define RF_AL_FREELIST_MAX 256 - -#define DO_FREE(_p,_sz) RF_Free((_p),(_sz)) - - static void rf_ShutdownAllocList(void *); - - static void rf_ShutdownAllocList(ignored) - void *ignored; -{ - RF_AllocListElem_t *p, *pt; - - for (p = al_free_list; p;) { - pt = p; - p = p->next; - DO_FREE(pt, sizeof(*pt)); - } - rf_mutex_destroy(&alist_mutex); - /* - printf("Alloclist: Free list hit count %lu (%lu %%) miss count %lu (%lu %%)\n", - fl_hit_count, (100*fl_hit_count)/(fl_hit_count+fl_miss_count), - fl_miss_count, (100*fl_miss_count)/(fl_hit_count+fl_miss_count)); - */ -} - -int -rf_ConfigureAllocList(listp) - RF_ShutdownList_t **listp; -{ - int rc; - - rc = rf_mutex_init(&alist_mutex, __FUNCTION__); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return (rc); - } - al_free_list = NULL; - fl_hit_count = fl_miss_count = al_free_list_count = 0; - rc = rf_ShutdownCreate(listp, rf_ShutdownAllocList, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - rf_mutex_destroy(&alist_mutex); - return (rc); - } - return (0); -} - - -/* we expect the lists to have at most one or two elements, so we're willing - * to search for the end. If you ever observe the lists growing longer, - * increase POINTERS_PER_ALLOC_LIST_ELEMENT. - */ -void -rf_real_AddToAllocList(l, p, size, lockflag) - RF_AllocListElem_t *l; - void *p; - int size; - int lockflag; -{ - RF_AllocListElem_t *newelem; - - for (; l->next; l = l->next) - RF_ASSERT(l->numPointers == RF_POINTERS_PER_ALLOC_LIST_ELEMENT); /* find end of list */ - - RF_ASSERT(l->numPointers >= 0 && l->numPointers <= RF_POINTERS_PER_ALLOC_LIST_ELEMENT); - if (l->numPointers == RF_POINTERS_PER_ALLOC_LIST_ELEMENT) { - newelem = rf_real_MakeAllocList(lockflag); - l->next = newelem; - l = newelem; - } - l->pointers[l->numPointers] = p; - l->sizes[l->numPointers] = size; - l->numPointers++; - -} - - -/* we use the debug_mem_mutex here because we need to lock it anyway to call free. - * this is probably a bug somewhere else in the code, but when I call malloc/free - * outside of any lock I have endless trouble with malloc appearing to return the - * same pointer twice. Since we have to lock it anyway, we might as well use it - * as the lock around the al_free_list. Note that we can't call Free with the - * debug_mem_mutex locked. - */ -void -rf_FreeAllocList(l) - RF_AllocListElem_t *l; -{ - int i; - RF_AllocListElem_t *temp, *p; - - for (p = l; p; p = p->next) { - RF_ASSERT(p->numPointers >= 0 && p->numPointers <= RF_POINTERS_PER_ALLOC_LIST_ELEMENT); - for (i = 0; i < p->numPointers; i++) { - RF_ASSERT(p->pointers[i]); - RF_Free(p->pointers[i], p->sizes[i]); - } - } - while (l) { - temp = l; - l = l->next; - if (al_free_list_count > RF_AL_FREELIST_MAX) { - DO_FREE(temp, sizeof(*temp)); - } else { - temp->next = al_free_list; - al_free_list = temp; - al_free_list_count++; - } - } -} - -RF_AllocListElem_t * -rf_real_MakeAllocList(lockflag) - int lockflag; -{ - RF_AllocListElem_t *p; - - if (al_free_list) { - fl_hit_count++; - p = al_free_list; - al_free_list = p->next; - al_free_list_count--; - } else { - fl_miss_count++; - RF_Malloc(p, sizeof(RF_AllocListElem_t), (RF_AllocListElem_t *)); /* no allocation locking - * in kernel, so this is - * fine */ - } - if (p == NULL) { - return (NULL); - } - bzero((char *) p, sizeof(RF_AllocListElem_t)); - return (p); -} diff --git a/sys/dev/raidframe/rf_alloclist.h b/sys/dev/raidframe/rf_alloclist.h deleted file mode 100644 index c746452..0000000 --- a/sys/dev/raidframe/rf_alloclist.h +++ /dev/null @@ -1,60 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_alloclist.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/**************************************************************************** - * - * alloclist.h -- header file for alloclist.c - * - ***************************************************************************/ - -#ifndef _RF__RF_ALLOCLIST_H_ -#define _RF__RF_ALLOCLIST_H_ - -#include <dev/raidframe/rf_types.h> - -#define RF_POINTERS_PER_ALLOC_LIST_ELEMENT 20 - -struct RF_AllocListElem_s { - void *pointers[RF_POINTERS_PER_ALLOC_LIST_ELEMENT]; - int sizes[RF_POINTERS_PER_ALLOC_LIST_ELEMENT]; - int numPointers; - RF_AllocListElem_t *next; -}; -#define rf_MakeAllocList(_ptr_) _ptr_ = rf_real_MakeAllocList(1); -#define rf_AddToAllocList(_l_,_ptr_,_sz_) rf_real_AddToAllocList((_l_), (_ptr_), (_sz_), 1) - -int rf_ConfigureAllocList(RF_ShutdownList_t ** listp); - -#if RF_UTILITY == 0 -void rf_real_AddToAllocList(RF_AllocListElem_t * l, void *p, int size, int lockflag); -void rf_FreeAllocList(RF_AllocListElem_t * l); -RF_AllocListElem_t *rf_real_MakeAllocList(int lockflag); -#endif /* RF_UTILITY == 0 */ - -#endif /* !_RF__RF_ALLOCLIST_H_ */ diff --git a/sys/dev/raidframe/rf_archs.h b/sys/dev/raidframe/rf_archs.h deleted file mode 100644 index faef157..0000000 --- a/sys/dev/raidframe/rf_archs.h +++ /dev/null @@ -1,75 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_archs.h,v 1.11 2001/01/26 04:43:16 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_archs.h -- defines for which architectures you want to - * include is some particular build of raidframe. Unfortunately, - * it's difficult to exclude declustering, P+Q, and distributed - * sparing because the code is intermixed with RAID5 code. This - * should be fixed. - * - * this is really intended only for use in the kernel, where I - * am worried about the size of the object module. At user level and - * in the simulator, I don't really care that much, so all the - * architectures can be compiled together. Note that by itself, turning - * off these defines does not affect the size of the executable; you - * have to edit the makefile for that. - * - * comment out any line below to eliminate that architecture. - * the list below includes all the modules that can be compiled - * out. - * - */ - -#ifndef _RF__RF_ARCHS_H_ -#define _RF__RF_ARCHS_H_ - -#define RF_INCLUDE_EVENODD 1 - -#define RF_INCLUDE_RAID5_RS 1 -#define RF_INCLUDE_PARITYLOGGING 1 - -#define RF_INCLUDE_CHAINDECLUSTER 1 -#define RF_INCLUDE_INTERDECLUSTER 1 - -#define RF_INCLUDE_PARITY_DECLUSTERING 1 -#define RF_INCLUDE_PARITY_DECLUSTERING_DS 1 - -#define RF_INCLUDE_RAID0 1 -#define RF_INCLUDE_RAID1 1 -#define RF_INCLUDE_RAID4 1 -#define RF_INCLUDE_RAID5 1 -#define RF_INCLUDE_RAID6 0 -#define RF_INCLUDE_DECL_PQ 0 - -#define RF_MEMORY_REDZONES 0 -#define RF_RECON_STATS 1 - -#include <dev/raidframe/rf_options.h> - -#endif /* !_RF__RF_ARCHS_H_ */ diff --git a/sys/dev/raidframe/rf_aselect.c b/sys/dev/raidframe/rf_aselect.c deleted file mode 100644 index 13cdbbe..0000000 --- a/sys/dev/raidframe/rf_aselect.c +++ /dev/null @@ -1,496 +0,0 @@ -/* $NetBSD: rf_aselect.c,v 1.3 1999/02/05 00:06:06 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************** - * - * aselect.c -- algorithm selection code - * - *****************************************************************************/ - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_desc.h> -#include <dev/raidframe/rf_map.h> - -#if defined(__NetBSD__) || defined(__FreeBSD__) && defined(_KERNEL) -/* the function below is not used... so don't define it! */ -#else -static void TransferDagMemory(RF_DagHeader_t *, RF_DagHeader_t *); -#endif - -static int InitHdrNode(RF_DagHeader_t **, RF_Raid_t *, int); -static void UpdateNodeHdrPtr(RF_DagHeader_t *, RF_DagNode_t *); -int rf_SelectAlgorithm(RF_RaidAccessDesc_t *, RF_RaidAccessFlags_t); - - -/****************************************************************************** - * - * Create and Initialiaze a dag header and termination node - * - *****************************************************************************/ -static int -InitHdrNode(hdr, raidPtr, memChunkEnable) - RF_DagHeader_t **hdr; - RF_Raid_t *raidPtr; - int memChunkEnable; -{ - /* create and initialize dag hdr */ - *hdr = rf_AllocDAGHeader(); - rf_MakeAllocList((*hdr)->allocList); - if ((*hdr)->allocList == NULL) { - rf_FreeDAGHeader(*hdr); - return (ENOMEM); - } - (*hdr)->status = rf_enable; - (*hdr)->numSuccedents = 0; - (*hdr)->raidPtr = raidPtr; - (*hdr)->next = NULL; - return (0); -} -/****************************************************************************** - * - * Transfer allocation list and mem chunks from one dag to another - * - *****************************************************************************/ -#if defined(__NetBSD__) || defined(__FreeBSD__) && defined(_KERNEL) -/* the function below is not used... so don't define it! */ -#else -static void -TransferDagMemory(daga, dagb) - RF_DagHeader_t *daga; - RF_DagHeader_t *dagb; -{ - RF_AccessStripeMapHeader_t *end; - RF_AllocListElem_t *p; - int i, memChunksXfrd = 0, xtraChunksXfrd = 0; - - /* transfer allocList from dagb to daga */ - for (p = dagb->allocList; p; p = p->next) { - for (i = 0; i < p->numPointers; i++) { - rf_AddToAllocList(daga->allocList, p->pointers[i], p->sizes[i]); - p->pointers[i] = NULL; - p->sizes[i] = 0; - } - p->numPointers = 0; - } - - /* transfer chunks from dagb to daga */ - while ((memChunksXfrd + xtraChunksXfrd < dagb->chunkIndex + dagb->xtraChunkIndex) && (daga->chunkIndex < RF_MAXCHUNKS)) { - /* stuff chunks into daga's memChunk array */ - if (memChunksXfrd < dagb->chunkIndex) { - daga->memChunk[daga->chunkIndex++] = dagb->memChunk[memChunksXfrd]; - dagb->memChunk[memChunksXfrd++] = NULL; - } else { - daga->memChunk[daga->xtraChunkIndex++] = dagb->xtraMemChunk[xtraChunksXfrd]; - dagb->xtraMemChunk[xtraChunksXfrd++] = NULL; - } - } - /* use escape hatch to hold excess chunks */ - while (memChunksXfrd + xtraChunksXfrd < dagb->chunkIndex + dagb->xtraChunkIndex) { - if (memChunksXfrd < dagb->chunkIndex) { - daga->xtraMemChunk[daga->xtraChunkIndex++] = dagb->memChunk[memChunksXfrd]; - dagb->memChunk[memChunksXfrd++] = NULL; - } else { - daga->xtraMemChunk[daga->xtraChunkIndex++] = dagb->xtraMemChunk[xtraChunksXfrd]; - dagb->xtraMemChunk[xtraChunksXfrd++] = NULL; - } - } - RF_ASSERT((memChunksXfrd == dagb->chunkIndex) && (xtraChunksXfrd == dagb->xtraChunkIndex)); - RF_ASSERT(daga->chunkIndex <= RF_MAXCHUNKS); - RF_ASSERT(daga->xtraChunkIndex <= daga->xtraChunkCnt); - dagb->chunkIndex = 0; - dagb->xtraChunkIndex = 0; - - /* transfer asmList from dagb to daga */ - if (dagb->asmList) { - if (daga->asmList) { - end = daga->asmList; - while (end->next) - end = end->next; - end->next = dagb->asmList; - } else - daga->asmList = dagb->asmList; - dagb->asmList = NULL; - } -} -#endif /* __NetBSD__ */ - -/***************************************************************************************** - * - * Ensure that all node->dagHdr fields in a dag are consistent - * - * IMPORTANT: This routine recursively searches all succedents of the node. If a - * succedent is encountered whose dagHdr ptr does not require adjusting, that node's - * succedents WILL NOT BE EXAMINED. - * - ****************************************************************************************/ -static void -UpdateNodeHdrPtr(hdr, node) - RF_DagHeader_t *hdr; - RF_DagNode_t *node; -{ - int i; - RF_ASSERT(hdr != NULL && node != NULL); - for (i = 0; i < node->numSuccedents; i++) - if (node->succedents[i]->dagHdr != hdr) - UpdateNodeHdrPtr(hdr, node->succedents[i]); - node->dagHdr = hdr; -} -/****************************************************************************** - * - * Create a DAG to do a read or write operation. - * - * create an array of dagLists, one list per parity stripe. - * return the lists in the array desc->dagArray. - * - * Normally, each list contains one dag for the entire stripe. In some - * tricky cases, we break this into multiple dags, either one per stripe - * unit or one per block (sector). When this occurs, these dags are returned - * as a linked list (dagList) which is executed sequentially (to preserve - * atomic parity updates in the stripe). - * - * dags which operate on independent parity goups (stripes) are returned in - * independent dagLists (distinct elements in desc->dagArray) and may be - * executed concurrently. - * - * Finally, if the SelectionFunc fails to create a dag for a block, we punt - * and return 1. - * - * The above process is performed in two phases: - * 1) create an array(s) of creation functions (eg stripeFuncs) - * 2) create dags and concatenate/merge to form the final dag. - * - * Because dag's are basic blocks (single entry, single exit, unconditional - * control flow, we can add the following optimizations (future work): - * first-pass optimizer to allow max concurrency (need all data dependencies) - * second-pass optimizer to eliminate common subexpressions (need true - * data dependencies) - * third-pass optimizer to eliminate dead code (need true data dependencies) - *****************************************************************************/ - -#define MAXNSTRIPES 5 - -int -rf_SelectAlgorithm(desc, flags) - RF_RaidAccessDesc_t *desc; - RF_RaidAccessFlags_t flags; -{ - RF_AccessStripeMapHeader_t *asm_h = desc->asmap; - RF_IoType_t type = desc->type; - RF_Raid_t *raidPtr = desc->raidPtr; - void *bp = desc->bp; - - RF_AccessStripeMap_t *asmap = asm_h->stripeMap; - RF_AccessStripeMap_t *asm_p; - RF_DagHeader_t *dag_h = NULL, *tempdag_h, *lastdag_h; - int i, j, k; - RF_VoidFuncPtr *stripeFuncs, normalStripeFuncs[MAXNSTRIPES]; - RF_AccessStripeMap_t *asm_up, *asm_bp; - RF_AccessStripeMapHeader_t ***asmh_u, *endASMList; - RF_AccessStripeMapHeader_t ***asmh_b; - RF_VoidFuncPtr **stripeUnitFuncs, uFunc; - RF_VoidFuncPtr **blockFuncs, bFunc; - int numStripesBailed = 0, cantCreateDAGs = RF_FALSE; - int numStripeUnitsBailed = 0; - int stripeNum, numUnitDags = 0, stripeUnitNum, numBlockDags = 0; - RF_StripeNum_t numStripeUnits; - RF_SectorNum_t numBlocks; - RF_RaidAddr_t address; - int length; - RF_PhysDiskAddr_t *physPtr; - caddr_t buffer; - - lastdag_h = NULL; - asmh_u = asmh_b = NULL; - stripeUnitFuncs = NULL; - blockFuncs = NULL; - - /* get an array of dag-function creation pointers, try to avoid - * calling malloc */ - if (asm_h->numStripes <= MAXNSTRIPES) - stripeFuncs = normalStripeFuncs; - else - RF_Calloc(stripeFuncs, asm_h->numStripes, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr *)); - - /* walk through the asm list once collecting information */ - /* attempt to find a single creation function for each stripe */ - desc->numStripes = 0; - for (i = 0, asm_p = asmap; asm_p; asm_p = asm_p->next, i++) { - desc->numStripes++; - (raidPtr->Layout.map->SelectionFunc) (raidPtr, type, asm_p, &stripeFuncs[i]); - /* check to see if we found a creation func for this stripe */ - if (stripeFuncs[i] == (RF_VoidFuncPtr) NULL) { - /* could not find creation function for entire stripe - * so, let's see if we can find one for each stripe - * unit in the stripe */ - - if (numStripesBailed == 0) { - /* one stripe map header for each stripe we - * bail on */ - RF_Malloc(asmh_u, sizeof(RF_AccessStripeMapHeader_t **) * asm_h->numStripes, (RF_AccessStripeMapHeader_t ***)); - /* create an array of ptrs to arrays of - * stripeFuncs */ - RF_Calloc(stripeUnitFuncs, asm_h->numStripes, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr **)); - } - /* create an array of creation funcs (called - * stripeFuncs) for this stripe */ - numStripeUnits = asm_p->numStripeUnitsAccessed; - RF_Calloc(stripeUnitFuncs[numStripesBailed], numStripeUnits, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr *)); - RF_Malloc(asmh_u[numStripesBailed], numStripeUnits * sizeof(RF_AccessStripeMapHeader_t *), (RF_AccessStripeMapHeader_t **)); - - /* lookup array of stripeUnitFuncs for this stripe */ - for (j = 0, physPtr = asm_p->physInfo; physPtr; physPtr = physPtr->next, j++) { - /* remap for series of single stripe-unit - * accesses */ - address = physPtr->raidAddress; - length = physPtr->numSector; - buffer = physPtr->bufPtr; - - asmh_u[numStripesBailed][j] = rf_MapAccess(raidPtr, address, length, buffer, RF_DONT_REMAP); - asm_up = asmh_u[numStripesBailed][j]->stripeMap; - - /* get the creation func for this stripe unit */ - (raidPtr->Layout.map->SelectionFunc) (raidPtr, type, asm_up, &(stripeUnitFuncs[numStripesBailed][j])); - - /* check to see if we found a creation func - * for this stripe unit */ - if (stripeUnitFuncs[numStripesBailed][j] == (RF_VoidFuncPtr) NULL) { - /* could not find creation function - * for stripe unit so, let's see if we - * can find one for each block in the - * stripe unit */ - if (numStripeUnitsBailed == 0) { - /* one stripe map header for - * each stripe unit we bail on */ - RF_Malloc(asmh_b, sizeof(RF_AccessStripeMapHeader_t **) * asm_h->numStripes * raidPtr->Layout.numDataCol, (RF_AccessStripeMapHeader_t ***)); - /* create an array of ptrs to - * arrays of blockFuncs */ - RF_Calloc(blockFuncs, asm_h->numStripes * raidPtr->Layout.numDataCol, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr **)); - } - /* create an array of creation funcs - * (called blockFuncs) for this stripe - * unit */ - numBlocks = physPtr->numSector; - numBlockDags += numBlocks; - RF_Calloc(blockFuncs[numStripeUnitsBailed], numBlocks, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr *)); - RF_Malloc(asmh_b[numStripeUnitsBailed], numBlocks * sizeof(RF_AccessStripeMapHeader_t *), (RF_AccessStripeMapHeader_t **)); - - /* lookup array of blockFuncs for this - * stripe unit */ - for (k = 0; k < numBlocks; k++) { - /* remap for series of single - * stripe-unit accesses */ - address = physPtr->raidAddress + k; - length = 1; - buffer = physPtr->bufPtr + (k * (1 << raidPtr->logBytesPerSector)); - - asmh_b[numStripeUnitsBailed][k] = rf_MapAccess(raidPtr, address, length, buffer, RF_DONT_REMAP); - asm_bp = asmh_b[numStripeUnitsBailed][k]->stripeMap; - - /* get the creation func for - * this stripe unit */ - (raidPtr->Layout.map->SelectionFunc) (raidPtr, type, asm_bp, &(blockFuncs[numStripeUnitsBailed][k])); - - /* check to see if we found a - * creation func for this - * stripe unit */ - if (blockFuncs[numStripeUnitsBailed][k] == NULL) - cantCreateDAGs = RF_TRUE; - } - numStripeUnitsBailed++; - } else { - numUnitDags++; - } - } - RF_ASSERT(j == numStripeUnits); - numStripesBailed++; - } - } - - if (cantCreateDAGs) { - /* free memory and punt */ - if (asm_h->numStripes > MAXNSTRIPES) - RF_Free(stripeFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr)); - if (numStripesBailed > 0) { - stripeNum = 0; - for (i = 0, asm_p = asmap; asm_p; asm_p = asm_p->next, i++) - if (stripeFuncs[i] == NULL) { - numStripeUnits = asm_p->numStripeUnitsAccessed; - for (j = 0; j < numStripeUnits; j++) - rf_FreeAccessStripeMap(asmh_u[stripeNum][j]); - RF_Free(asmh_u[stripeNum], numStripeUnits * sizeof(RF_AccessStripeMapHeader_t *)); - RF_Free(stripeUnitFuncs[stripeNum], numStripeUnits * sizeof(RF_VoidFuncPtr)); - stripeNum++; - } - RF_ASSERT(stripeNum == numStripesBailed); - RF_Free(stripeUnitFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr)); - RF_Free(asmh_u, asm_h->numStripes * sizeof(RF_AccessStripeMapHeader_t **)); - } - return (1); - } else { - /* begin dag creation */ - stripeNum = 0; - stripeUnitNum = 0; - - /* create an array of dagLists and fill them in */ - RF_CallocAndAdd(desc->dagArray, desc->numStripes, sizeof(RF_DagList_t), (RF_DagList_t *), desc->cleanupList); - - for (i = 0, asm_p = asmap; asm_p; asm_p = asm_p->next, i++) { - /* grab dag header for this stripe */ - dag_h = NULL; - desc->dagArray[i].desc = desc; - - if (stripeFuncs[i] == (RF_VoidFuncPtr) NULL) { - /* use bailout functions for this stripe */ - for (j = 0, physPtr = asm_p->physInfo; physPtr; physPtr = physPtr->next, j++) { - uFunc = stripeUnitFuncs[stripeNum][j]; - if (uFunc == (RF_VoidFuncPtr) NULL) { - /* use bailout functions for - * this stripe unit */ - for (k = 0; k < physPtr->numSector; k++) { - /* create a dag for - * this block */ - InitHdrNode(&tempdag_h, raidPtr, rf_useMemChunks); - desc->dagArray[i].numDags++; - if (dag_h == NULL) { - dag_h = tempdag_h; - } else { - lastdag_h->next = tempdag_h; - } - lastdag_h = tempdag_h; - - bFunc = blockFuncs[stripeUnitNum][k]; - RF_ASSERT(bFunc); - asm_bp = asmh_b[stripeUnitNum][k]->stripeMap; - (*bFunc) (raidPtr, asm_bp, tempdag_h, bp, flags, tempdag_h->allocList); - } - stripeUnitNum++; - } else { - /* create a dag for this unit */ - InitHdrNode(&tempdag_h, raidPtr, rf_useMemChunks); - desc->dagArray[i].numDags++; - if (dag_h == NULL) { - dag_h = tempdag_h; - } else { - lastdag_h->next = tempdag_h; - } - lastdag_h = tempdag_h; - - asm_up = asmh_u[stripeNum][j]->stripeMap; - (*uFunc) (raidPtr, asm_up, tempdag_h, bp, flags, tempdag_h->allocList); - } - } - RF_ASSERT(j == asm_p->numStripeUnitsAccessed); - /* merge linked bailout dag to existing dag - * collection */ - stripeNum++; - } else { - /* Create a dag for this parity stripe */ - InitHdrNode(&tempdag_h, raidPtr, rf_useMemChunks); - desc->dagArray[i].numDags++; - if (dag_h == NULL) { - dag_h = tempdag_h; - } else { - lastdag_h->next = tempdag_h; - } - lastdag_h = tempdag_h; - - (stripeFuncs[i]) (raidPtr, asm_p, tempdag_h, bp, flags, tempdag_h->allocList); - } - desc->dagArray[i].dags = dag_h; - } - RF_ASSERT(i == desc->numStripes); - - /* free memory */ - if (asm_h->numStripes > MAXNSTRIPES) - RF_Free(stripeFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr)); - if ((numStripesBailed > 0) || (numStripeUnitsBailed > 0)) { - stripeNum = 0; - stripeUnitNum = 0; - if (dag_h->asmList) { - endASMList = dag_h->asmList; - while (endASMList->next) - endASMList = endASMList->next; - } else - endASMList = NULL; - /* walk through io, stripe by stripe */ - for (i = 0, asm_p = asmap; asm_p; asm_p = asm_p->next, i++) - if (stripeFuncs[i] == NULL) { - numStripeUnits = asm_p->numStripeUnitsAccessed; - /* walk through stripe, stripe unit by - * stripe unit */ - for (j = 0, physPtr = asm_p->physInfo; physPtr; physPtr = physPtr->next, j++) { - if (stripeUnitFuncs[stripeNum][j] == NULL) { - numBlocks = physPtr->numSector; - /* walk through stripe - * unit, block by - * block */ - for (k = 0; k < numBlocks; k++) - if (dag_h->asmList == NULL) { - dag_h->asmList = asmh_b[stripeUnitNum][k]; - endASMList = dag_h->asmList; - } else { - endASMList->next = asmh_b[stripeUnitNum][k]; - endASMList = endASMList->next; - } - RF_Free(asmh_b[stripeUnitNum], numBlocks * sizeof(RF_AccessStripeMapHeader_t *)); - RF_Free(blockFuncs[stripeUnitNum], numBlocks * sizeof(RF_VoidFuncPtr)); - stripeUnitNum++; - } - if (dag_h->asmList == NULL) { - dag_h->asmList = asmh_u[stripeNum][j]; - endASMList = dag_h->asmList; - } else { - endASMList->next = asmh_u[stripeNum][j]; - endASMList = endASMList->next; - } - } - RF_Free(asmh_u[stripeNum], numStripeUnits * sizeof(RF_AccessStripeMapHeader_t *)); - RF_Free(stripeUnitFuncs[stripeNum], numStripeUnits * sizeof(RF_VoidFuncPtr)); - stripeNum++; - } - RF_ASSERT(stripeNum == numStripesBailed); - RF_Free(stripeUnitFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr)); - RF_Free(asmh_u, asm_h->numStripes * sizeof(RF_AccessStripeMapHeader_t **)); - if (numStripeUnitsBailed > 0) { - RF_ASSERT(stripeUnitNum == numStripeUnitsBailed); - RF_Free(blockFuncs, raidPtr->Layout.numDataCol * asm_h->numStripes * sizeof(RF_VoidFuncPtr)); - RF_Free(asmh_b, raidPtr->Layout.numDataCol * asm_h->numStripes * sizeof(RF_AccessStripeMapHeader_t **)); - } - } - return (0); - } -} diff --git a/sys/dev/raidframe/rf_aselect.h b/sys/dev/raidframe/rf_aselect.h deleted file mode 100644 index de9cd76..0000000 --- a/sys/dev/raidframe/rf_aselect.h +++ /dev/null @@ -1,43 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_aselect.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************** - * - * aselect.h -- header file for algorithm selection code - * - *****************************************************************************/ - -#ifndef _RF__RF_ASELECT_H_ -#define _RF__RF_ASELECT_H_ - -#include <dev/raidframe/rf_desc.h> - -int rf_SelectAlgorithm(RF_RaidAccessDesc_t * desc, RF_RaidAccessFlags_t flags); - -#endif /* !_RF__RF_ASELECT_H_ */ diff --git a/sys/dev/raidframe/rf_bsd.h b/sys/dev/raidframe/rf_bsd.h deleted file mode 100644 index 14c10f5..0000000 --- a/sys/dev/raidframe/rf_bsd.h +++ /dev/null @@ -1,152 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_netbsd.h,v 1.12 2000/05/28 22:53:49 oster Exp $ */ - -/*- - * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. - * All rights reserved. - * - * This code is derived from software contributed to The NetBSD Foundation - * by Greg Oster; Jason R. Thorpe. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. - * 4. Neither the name of The NetBSD Foundation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _RF__RF_BSD_H_ -#define _RF__RF_BSD_H_ - -#ifdef _KERNEL -#include <sys/fcntl.h> -#include <sys/systm.h> -#include <sys/namei.h> -#include <sys/vnode.h> -#include "opt_raid.h" - -#ifdef RAID_DEBUG -#define rf_printf(lvl, fmt, args...) \ - do { \ - if (lvl <= RAID_DEBUG) printf(fmt, ##args); \ - } while(0) - -#else /* DEBUG */ -#define rf_printf(lvl, fmt, args...) { } -#endif /* DEBUG */ -#endif /* _KERNEL */ - -/* The per-component label information that the user can set */ -typedef struct RF_ComponentInfo_s { - int row; /* the row number of this component */ - int column; /* the column number of this component */ - int serial_number; /* a user-specified serial number for this - RAID set */ -} RF_ComponentInfo_t; - -/* The per-component label information */ -typedef struct RF_ComponentLabel_s { - int version; /* The version of this label. */ - int serial_number; /* a user-specified serial number for this - RAID set */ - int mod_counter; /* modification counter. Changed (usually - by incrementing) every time the label - is changed */ - int row; /* the row number of this component */ - int column; /* the column number of this component */ - int num_rows; /* number of rows in this RAID set */ - int num_columns; /* number of columns in this RAID set */ - int clean; /* 1 when clean, 0 when dirty */ - int status; /* rf_ds_optimal, rf_ds_dist_spared, whatever. */ - /* stuff that will be in version 2 of the label */ - int sectPerSU; /* Sectors per Stripe Unit */ - int SUsPerPU; /* Stripe Units per Parity Units */ - int SUsPerRU; /* Stripe Units per Reconstruction Units */ - int parityConfig; /* '0' == RAID0, '1' == RAID1, etc. */ - int maxOutstanding; /* maxOutstanding disk requests */ - int blockSize; /* size of component block. - (disklabel->d_secsize) */ - int numBlocks; /* number of blocks on this component. May - be smaller than the partition size. */ - int partitionSize; /* number of blocks on this *partition*. - Must exactly match the partition size - from the disklabel. */ - int future_use[33]; /* Future expansion */ - int autoconfigure; /* automatically configure this RAID set. - 0 == no, 1 == yes */ - int root_partition; /* Use this set as / - 0 == no, 1 == yes*/ - int last_unit; /* last unit number (e.g. 0 for /dev/raid0) - of this component. Used for autoconfigure - only. */ - int config_order; /* 0 .. n. The order in which the component - should be auto-configured. E.g. 0 is will - done first, (and would become raid0). - This may be in conflict with last_unit!!?! */ - /* Not currently used. */ - int future_use2[44]; /* More future expansion */ -} RF_ComponentLabel_t; - -typedef struct RF_SingleComponent_s { - int row; - int column; - char component_name[50]; /* name of the component */ -} RF_SingleComponent_t; - -#ifdef _KERNEL - -struct raidcinfo { - struct vnode *ci_vp; /* component device's vnode */ - dev_t ci_dev; /* component device's dev_t */ - RF_ComponentLabel_t ci_label; /* components RAIDframe label */ -#if 0 - size_t ci_size; /* size */ - char *ci_path; /* path to component */ - size_t ci_pathlen; /* length of component path */ -#endif -}; - - - -/* XXX probably belongs in a different .h file. */ -typedef struct RF_AutoConfig_s { - char devname[56]; /* the name of this component */ - int flag; /* a general-purpose flag */ - dev_t dev; /* the device for this component */ - struct vnode *vp; /* Mr. Vnode Pointer */ - RF_ComponentLabel_t *clabel; /* the label */ - struct RF_AutoConfig_s *next; /* the next autoconfig structure - in this set. */ -} RF_AutoConfig_t; - -typedef struct RF_ConfigSet_s { - struct RF_AutoConfig_s *ac; /* all of the autoconfig structures for - this config set. */ - int rootable; /* Set to 1 if this set can be root */ - struct RF_ConfigSet_s *next; -} RF_ConfigSet_t; - -#endif /* _KERNEL */ -#endif /* _RF__RF_BSD_H_ */ diff --git a/sys/dev/raidframe/rf_callback.c b/sys/dev/raidframe/rf_callback.c deleted file mode 100644 index 1739fc6..0000000 --- a/sys/dev/raidframe/rf_callback.c +++ /dev/null @@ -1,96 +0,0 @@ -/* $NetBSD: rf_callback.c,v 1.3 1999/02/05 00:06:06 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************************** - * - * callback.c -- code to manipulate callback descriptor - * - ****************************************************************************************/ - - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_callback.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_freelist.h> -#include <dev/raidframe/rf_shutdown.h> - -static RF_FreeList_t *rf_callback_freelist; - -#define RF_MAX_FREE_CALLBACK 64 -#define RF_CALLBACK_INC 4 -#define RF_CALLBACK_INITIAL 4 - -static void rf_ShutdownCallback(void *); -static void -rf_ShutdownCallback(ignored) - void *ignored; -{ - RF_FREELIST_DESTROY(rf_callback_freelist, next, (RF_CallbackDesc_t *)); -} - -int -rf_ConfigureCallback(listp) - RF_ShutdownList_t **listp; -{ - int rc; - - RF_FREELIST_CREATE(rf_callback_freelist, RF_MAX_FREE_CALLBACK, - RF_CALLBACK_INC, sizeof(RF_CallbackDesc_t)); - if (rf_callback_freelist == NULL) - return (ENOMEM); - rc = rf_ShutdownCreate(listp, rf_ShutdownCallback, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownCallback(NULL); - return (rc); - } - RF_FREELIST_PRIME(rf_callback_freelist, RF_CALLBACK_INITIAL, next, - (RF_CallbackDesc_t *)); - return (0); -} - -RF_CallbackDesc_t * -rf_AllocCallbackDesc() -{ - RF_CallbackDesc_t *p; - - RF_FREELIST_GET(rf_callback_freelist, p, next, (RF_CallbackDesc_t *)); - return (p); -} - -void -rf_FreeCallbackDesc(p) - RF_CallbackDesc_t *p; -{ - RF_FREELIST_FREE(rf_callback_freelist, p, next); -} diff --git a/sys/dev/raidframe/rf_callback.h b/sys/dev/raidframe/rf_callback.h deleted file mode 100644 index feda31d..0000000 --- a/sys/dev/raidframe/rf_callback.h +++ /dev/null @@ -1,65 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_callback.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************************** - * - * callback.h -- header file for callback.c - * - * the reconstruction code must manage concurrent I/Os on multiple drives. - * it sometimes needs to suspend operation on a particular drive until some - * condition occurs. we can't block the thread, of course, or we wouldn't - * be able to manage our other outstanding I/Os. Instead we just suspend - * new activity on the indicated disk, and create a callback descriptor and - * put it someplace where it will get invoked when the condition that's - * stalling us has cleared. When the descriptor is invoked, it will call - * a function that will restart operation on the indicated disk. - * - ****************************************************************************************/ - -#ifndef _RF__RF_CALLBACK_H_ -#define _RF__RF_CALLBACK_H_ - -#include <dev/raidframe/rf_types.h> - -struct RF_CallbackDesc_s { - void (*callbackFunc) (RF_CBParam_t); /* function to call */ - RF_CBParam_t callbackArg; /* args to give to function, or just - * info about this callback */ - RF_CBParam_t callbackArg2; - RF_RowCol_t row; /* disk row and column IDs to give to the - * callback func */ - RF_RowCol_t col; - RF_CallbackDesc_t *next;/* next entry in list */ -}; - -int rf_ConfigureCallback(RF_ShutdownList_t ** listp); -RF_CallbackDesc_t *rf_AllocCallbackDesc(void); -void rf_FreeCallbackDesc(RF_CallbackDesc_t * p); - -#endif /* !_RF__RF_CALLBACK_H_ */ diff --git a/sys/dev/raidframe/rf_chaindecluster.c b/sys/dev/raidframe/rf_chaindecluster.c deleted file mode 100644 index 68951a0..0000000 --- a/sys/dev/raidframe/rf_chaindecluster.c +++ /dev/null @@ -1,292 +0,0 @@ -/* $NetBSD: rf_chaindecluster.c,v 1.6 2001/01/26 04:27:16 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Khalil Amiri - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/****************************************************************************** - * - * rf_chaindecluster.c -- implements chained declustering - * - *****************************************************************************/ - -#include <dev/raidframe/rf_archs.h> - -#if (RF_INCLUDE_CHAINDECLUSTER > 0) - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_chaindecluster.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagffrd.h> -#include <dev/raidframe/rf_dagffwr.h> -#include <dev/raidframe/rf_dagdegrd.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_utils.h> - -typedef struct RF_ChaindeclusterConfigInfo_s { - RF_RowCol_t **stripeIdentifier; /* filled in at config time and used - * by IdentifyStripe */ - RF_StripeCount_t numSparingRegions; - RF_StripeCount_t stripeUnitsPerSparingRegion; - RF_SectorNum_t mirrorStripeOffset; -} RF_ChaindeclusterConfigInfo_t; - -int -rf_ConfigureChainDecluster( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_StripeCount_t num_used_stripeUnitsPerDisk; - RF_ChaindeclusterConfigInfo_t *info; - RF_RowCol_t i; - - /* create a Chained Declustering configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_ChaindeclusterConfigInfo_t), (RF_ChaindeclusterConfigInfo_t *), raidPtr->cleanupList); - if (info == NULL) - return (ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - - /* fill in the config structure. */ - info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, 2, raidPtr->cleanupList); - if (info->stripeIdentifier == NULL) - return (ENOMEM); - for (i = 0; i < raidPtr->numCol; i++) { - info->stripeIdentifier[i][0] = i % raidPtr->numCol; - info->stripeIdentifier[i][1] = (i + 1) % raidPtr->numCol; - } - - RF_ASSERT(raidPtr->numRow == 1); - - /* fill in the remaining layout parameters */ - num_used_stripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk - (layoutPtr->stripeUnitsPerDisk % - (2 * raidPtr->numCol - 2)); - info->numSparingRegions = num_used_stripeUnitsPerDisk / (2 * raidPtr->numCol - 2); - info->stripeUnitsPerSparingRegion = raidPtr->numCol * (raidPtr->numCol - 1); - info->mirrorStripeOffset = info->numSparingRegions * (raidPtr->numCol - 1); - layoutPtr->numStripe = info->numSparingRegions * info->stripeUnitsPerSparingRegion; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = 1; - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numParityCol = 1; - - layoutPtr->dataStripeUnitsPerDisk = num_used_stripeUnitsPerDisk; - - raidPtr->sectorsPerDisk = - num_used_stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; - - raidPtr->totalSectors = - (layoutPtr->numStripe) * layoutPtr->sectorsPerStripeUnit; - - layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit; - - return (0); -} - -RF_ReconUnitCount_t -rf_GetNumSpareRUsChainDecluster(raidPtr) - RF_Raid_t *raidPtr; -{ - RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - - /* - * The layout uses two stripe units per disk as spare within each - * sparing region. - */ - return (2 * info->numSparingRegions); -} - - -/* Maps to the primary copy of the data, i.e. the first mirror pair */ -void -rf_MapSectorChainDecluster( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - RF_SectorNum_t index_within_region, index_within_disk; - RF_StripeNum_t sparing_region_id; - int col_before_remap; - - *row = 0; - sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; - index_within_region = SUID % info->stripeUnitsPerSparingRegion; - index_within_disk = index_within_region / raidPtr->numCol; - col_before_remap = SUID % raidPtr->numCol; - - if (!remap) { - *col = col_before_remap; - *diskSector = (index_within_disk + ((raidPtr->numCol - 1) * sparing_region_id)) * - raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - } else { - /* remap sector to spare space... */ - *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidPtr->numCol - 1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - index_within_disk = index_within_region / raidPtr->numCol; - if (index_within_disk < col_before_remap) - *col = index_within_disk; - else - if (index_within_disk == raidPtr->numCol - 2) { - *col = (col_before_remap + raidPtr->numCol - 1) % raidPtr->numCol; - *diskSector += raidPtr->Layout.sectorsPerStripeUnit; - } else - *col = (index_within_disk + 2) % raidPtr->numCol; - } - -} - - - -/* Maps to the second copy of the mirror pair, which is chain declustered. The second copy is contained - in the next disk (mod numCol) after the disk containing the primary copy. - The offset into the disk is one-half disk down */ -void -rf_MapParityChainDecluster( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - RF_SectorNum_t index_within_region, index_within_disk; - RF_StripeNum_t sparing_region_id; - int col_before_remap; - - *row = 0; - if (!remap) { - *col = SUID % raidPtr->numCol; - *col = (*col + 1) % raidPtr->numCol; - *diskSector = info->mirrorStripeOffset * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (SUID / raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - } else { - /* remap parity to spare space ... */ - sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; - index_within_region = SUID % info->stripeUnitsPerSparingRegion; - index_within_disk = index_within_region / raidPtr->numCol; - *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - col_before_remap = SUID % raidPtr->numCol; - if (index_within_disk < col_before_remap) - *col = index_within_disk; - else - if (index_within_disk == raidPtr->numCol - 2) { - *col = (col_before_remap + 2) % raidPtr->numCol; - *diskSector -= raidPtr->Layout.sectorsPerStripeUnit; - } else - *col = (index_within_disk + 2) % raidPtr->numCol; - } - -} - -void -rf_IdentifyStripeChainDecluster( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) -{ - RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - RF_StripeNum_t SUID; - RF_RowCol_t col; - - SUID = addr / raidPtr->Layout.sectorsPerStripeUnit; - col = SUID % raidPtr->numCol; - *outRow = 0; - *diskids = info->stripeIdentifier[col]; -} - -void -rf_MapSIDToPSIDChainDecluster( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru) -{ - *which_ru = 0; - *psID = stripeID; -} -/****************************************************************************** - * select a graph to perform a single-stripe access - * - * Parameters: raidPtr - description of the physical array - * type - type of operation (read or write) requested - * asmap - logical & physical addresses for this access - * createFunc - function to use to create the graph (return value) - *****************************************************************************/ - -void -rf_RAIDCDagSelect( - RF_Raid_t * raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t * asmap, - RF_VoidFuncPtr * createFunc) -#if 0 - void (**createFunc) (RF_Raid_t *, RF_AccessStripeMap_t *, - RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, - RF_AllocListElem_t *) -#endif -{ - RF_ASSERT(RF_IO_IS_R_OR_W(type)); - RF_ASSERT(raidPtr->numRow == 1); - - if (asmap->numDataFailed + asmap->numParityFailed > 1) { - RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); - *createFunc = NULL; - return; - } - *createFunc = (type == RF_IO_TYPE_READ) ? (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG : (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; - - if (type == RF_IO_TYPE_READ) { - if ((raidPtr->status[0] == rf_rs_degraded) || (raidPtr->status[0] == rf_rs_reconstructing)) - *createFunc = (RF_VoidFuncPtr) rf_CreateRaidCDegradedReadDAG; /* array status is - * degraded, implement - * workload shifting */ - else - *createFunc = (RF_VoidFuncPtr) rf_CreateMirrorPartitionReadDAG; /* array status not - * degraded, so use - * mirror partition dag */ - } else - *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; -} -#endif /* (RF_INCLUDE_CHAINDECLUSTER > 0) */ diff --git a/sys/dev/raidframe/rf_chaindecluster.h b/sys/dev/raidframe/rf_chaindecluster.h deleted file mode 100644 index 6030289..0000000 --- a/sys/dev/raidframe/rf_chaindecluster.h +++ /dev/null @@ -1,68 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_chaindecluster.h,v 1.4 2001/01/26 04:14:14 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Khalil Amiri - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_chaindecluster.h - * header file for Chained Declustering - */ - - -#ifndef _RF__RF_CHAINDECLUSTER_H_ -#define _RF__RF_CHAINDECLUSTER_H_ - -int -rf_ConfigureChainDecluster(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -RF_ReconUnitCount_t rf_GetNumSpareRUsChainDecluster(RF_Raid_t * raidPtr); -void -rf_MapSectorChainDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapParityChainDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_IdentifyStripeChainDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -void -rf_MapSIDToPSIDChainDecluster(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru); -void -rf_RAIDCDagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, - RF_VoidFuncPtr *); -#if 0 -void (**createFunc) (RF_Raid_t *, - RF_AccessStripeMap_t *, - RF_DagHeader_t *, - void *, - RF_RaidAccessFlags_t, - RF_AllocListElem_t *); -#endif - -#endif /* !_RF__RF_CHAINDECLUSTER_H_ */ diff --git a/sys/dev/raidframe/rf_configure.h b/sys/dev/raidframe/rf_configure.h deleted file mode 100644 index c51b8a3..0000000 --- a/sys/dev/raidframe/rf_configure.h +++ /dev/null @@ -1,99 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_configure.h,v 1.4 1999/03/02 03:18:49 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/******************************** - * - * rf_configure.h - * - * header file for raidframe configuration in the kernel version only. - * configuration is invoked via ioctl rather than at boot time - * - *******************************/ - - -#ifndef _RF__RF_CONFIGURE_H_ -#define _RF__RF_CONFIGURE_H_ - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_types.h> - -#include <sys/param.h> -#include <sys/proc.h> - -#if defined(__NetBSD__) -#include <sys/ioctl.h> -#elif defined(__FreeBSD__) -#include <sys/ioccom.h> -#include <sys/filio.h> -#endif - -/* the raidframe configuration, passed down through an ioctl. - * the driver can be reconfigured (with total loss of data) at any time, - * but it must be shut down first. - */ -struct RF_Config_s { - RF_RowCol_t numRow, numCol, numSpare; /* number of rows, columns, - * and spare disks */ - dev_t devs[RF_MAXROW][RF_MAXCOL]; /* device numbers for disks - * comprising array */ - char devnames[RF_MAXROW][RF_MAXCOL][50]; /* device names */ - dev_t spare_devs[RF_MAXSPARE]; /* device numbers for spare - * disks */ - char spare_names[RF_MAXSPARE][50]; /* device names */ - RF_SectorNum_t sectPerSU; /* sectors per stripe unit */ - RF_StripeNum_t SUsPerPU;/* stripe units per parity unit */ - RF_StripeNum_t SUsPerRU;/* stripe units per reconstruction unit */ - RF_ParityConfig_t parityConfig; /* identifies the RAID architecture to - * be used */ - RF_DiskQueueType_t diskQueueType; /* 'f' = fifo, 'c' = cvscan, - * not used in kernel */ - char maxOutstandingDiskReqs; /* # concurrent reqs to be sent to a - * disk. not used in kernel. */ - char debugVars[RF_MAXDBGV][50]; /* space for specifying debug - * variables & their values */ - unsigned int layoutSpecificSize; /* size in bytes of - * layout-specific info */ - void *layoutSpecific; /* a pointer to a layout-specific structure to - * be copied in */ - int force; /* if !0, ignore many fatal - configuration conditions */ - /* - "force" is used to override cases where the component labels would - indicate that configuration should not proceed without user - intervention - */ -}; -#ifndef _KERNEL -int rf_MakeConfig(char *configname, RF_Config_t * cfgPtr); -int rf_MakeLayoutSpecificNULL(FILE * fp, RF_Config_t * cfgPtr, void *arg); -int rf_MakeLayoutSpecificDeclustered(FILE * configfp, RF_Config_t * cfgPtr, void *arg); -void *rf_ReadSpareTable(RF_SparetWait_t * req, char *fname); -#endif /* !_KERNEL */ - -#endif /* !_RF__RF_CONFIGURE_H_ */ diff --git a/sys/dev/raidframe/rf_copyback.c b/sys/dev/raidframe/rf_copyback.c deleted file mode 100644 index eb16404..0000000 --- a/sys/dev/raidframe/rf_copyback.c +++ /dev/null @@ -1,433 +0,0 @@ -/* $NetBSD: rf_copyback.c,v 1.15 2001/01/26 02:16:24 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************************** - * - * copyback.c -- code to copy reconstructed data back from spare space to - * the replaced disk. - * - * the code operates using callbacks on the I/Os to continue with the next - * unit to be copied back. We do this because a simple loop containing blocking I/Os - * will not work in the simulator. - * - ****************************************************************************************/ - -#include <dev/raidframe/rf_types.h> - -#if defined(__FreeBSD__) -#include <sys/types.h> -#include <sys/systm.h> -#if __FreeBSD_version > 500005 -#include <sys/bio.h> -#endif -#endif - -#include <sys/time.h> -#include <sys/buf.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_mcpair.h> -#include <dev/raidframe/rf_acctrace.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_utils.h> -#include <dev/raidframe/rf_copyback.h> -#include <dev/raidframe/rf_decluster.h> -#include <dev/raidframe/rf_driver.h> -#include <dev/raidframe/rf_shutdown.h> -#include <dev/raidframe/rf_kintf.h> - -#define RF_COPYBACK_DATA 0 -#define RF_COPYBACK_PARITY 1 - -int rf_copyback_in_progress; - -static int rf_CopybackReadDoneProc(RF_CopybackDesc_t * desc, int status); -static int rf_CopybackWriteDoneProc(RF_CopybackDesc_t * desc, int status); -static void rf_CopybackOne(RF_CopybackDesc_t * desc, int typ, - RF_RaidAddr_t addr, RF_RowCol_t testRow, - RF_RowCol_t testCol, - RF_SectorNum_t testOffs); -static void rf_CopybackComplete(RF_CopybackDesc_t * desc, int status); - -int -rf_ConfigureCopyback(listp) - RF_ShutdownList_t **listp; -{ - rf_copyback_in_progress = 0; - return (0); -} -#include <sys/types.h> -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/proc.h> -#if defined(__NetBSD__) -#include <sys/ioctl.h> -#elif defined(__FreeBSD__) -#include <sys/ioccom.h> -#include <sys/filio.h> -#endif -#include <sys/fcntl.h> -#include <sys/vnode.h> - -/* do a complete copyback */ -void -rf_CopybackReconstructedData(raidPtr) - RF_Raid_t *raidPtr; -{ - RF_ComponentLabel_t *c_label; - int done, retcode; - RF_CopybackDesc_t *desc; - RF_RowCol_t frow, fcol; - RF_RaidDisk_t *badDisk; - struct vnode *vp; - char *databuf; - int ac; - - RF_Malloc(c_label, sizeof(RF_ComponentLabel_t), (RF_ComponentLabel_t *)); - if (c_label == NULL) { - printf("rf_CopybackReconstructedData: Out of memory?\n"); - return; - } - - done = 0; - fcol = 0; - for (frow = 0; frow < raidPtr->numRow; frow++) { - for (fcol = 0; fcol < raidPtr->numCol; fcol++) { - if (raidPtr->Disks[frow][fcol].status == rf_ds_dist_spared - || raidPtr->Disks[frow][fcol].status == rf_ds_spared) { - done = 1; - break; - } - } - if (done) - break; - } - - if (frow == raidPtr->numRow) { - printf("COPYBACK: no disks need copyback\n"); - return; - } - badDisk = &raidPtr->Disks[frow][fcol]; - - /* This device may have been opened successfully the first time. Close - * it before trying to open it again.. */ - - if (raidPtr->raid_cinfo[frow][fcol].ci_vp != NULL) { - printf("Closed the open device: %s\n", - raidPtr->Disks[frow][fcol].devname); - vp = raidPtr->raid_cinfo[frow][fcol].ci_vp; - ac = raidPtr->Disks[frow][fcol].auto_configured; - rf_close_component(raidPtr, vp, ac); - raidPtr->raid_cinfo[frow][fcol].ci_vp = NULL; - - } - /* note that this disk was *not* auto_configured (any longer) */ - raidPtr->Disks[frow][fcol].auto_configured = 0; - - printf("About to (re-)open the device: %s\n", - raidPtr->Disks[frow][fcol].devname); - - retcode = raid_getcomponentsize(raidPtr, frow, fcol); - - if (retcode) { - printf("COPYBACK: raidlookup on device: %s failed: %d!\n", - raidPtr->Disks[frow][fcol].devname, retcode); - - /* XXX the component isn't responding properly... must be - * still dead :-( */ - return; - - } -#if 0 - /* This is the way it was done before the CAM stuff was removed */ - - if (rf_extract_ids(badDisk->devname, &bus, &targ, &lun)) { - printf("COPYBACK: unable to extract bus, target, lun from devname %s\n", - badDisk->devname); - return; - } - /* TUR the disk that's marked as bad to be sure that it's actually - * alive */ - rf_SCSI_AllocTUR(&tur_op); - retcode = rf_SCSI_DoTUR(tur_op, bus, targ, lun, badDisk->dev); - rf_SCSI_FreeDiskOp(tur_op, 0); -#endif - - if (retcode) { - printf("COPYBACK: target disk failed TUR\n"); - return; - } - /* get a buffer to hold one SU */ - RF_Malloc(databuf, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (char *)); - - /* create a descriptor */ - RF_Malloc(desc, sizeof(*desc), (RF_CopybackDesc_t *)); - desc->raidPtr = raidPtr; - desc->status = 0; - desc->frow = frow; - desc->fcol = fcol; - desc->spRow = badDisk->spareRow; - desc->spCol = badDisk->spareCol; - desc->stripeAddr = 0; - desc->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; - desc->sectPerStripe = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.numDataCol; - desc->databuf = databuf; - desc->mcpair = rf_AllocMCPair(); - - printf("COPYBACK: Quiescing the array\n"); - /* quiesce the array, since we don't want to code support for user - * accs here */ - rf_SuspendNewRequestsAndWait(raidPtr); - - /* adjust state of the array and of the disks */ - RF_LOCK_MUTEX(raidPtr->mutex); - raidPtr->Disks[desc->frow][desc->fcol].status = rf_ds_optimal; - raidPtr->status[desc->frow] = rf_rs_optimal; - rf_copyback_in_progress = 1; /* debug only */ - RF_UNLOCK_MUTEX(raidPtr->mutex); - - printf("COPYBACK: Beginning\n"); - RF_GETTIME(desc->starttime); - rf_ContinueCopyback(desc); - - /* Data has been restored. Fix up the component label. */ - /* Don't actually need the read here.. */ - raidread_component_label( raidPtr->raid_cinfo[frow][fcol].ci_dev, - raidPtr->raid_cinfo[frow][fcol].ci_vp, - c_label); - - raid_init_component_label( raidPtr, c_label ); - - c_label->row = frow; - c_label->column = fcol; - c_label->partitionSize = raidPtr->Disks[frow][fcol].partitionSize; - - raidwrite_component_label( raidPtr->raid_cinfo[frow][fcol].ci_dev, - raidPtr->raid_cinfo[frow][fcol].ci_vp, - c_label); - RF_Free(c_label, sizeof(RF_ComponentLabel_t)); -} - - -/* - * invoked via callback after a copyback I/O has completed to - * continue on with the next one - */ -void -rf_ContinueCopyback(desc) - RF_CopybackDesc_t *desc; -{ - RF_SectorNum_t testOffs, stripeAddr; - RF_Raid_t *raidPtr = desc->raidPtr; - RF_RaidAddr_t addr; - RF_RowCol_t testRow, testCol; - int old_pctg, new_pctg, done; - struct timeval t, diff; - - old_pctg = (-1); - while (1) { - stripeAddr = desc->stripeAddr; - desc->raidPtr->copyback_stripes_done = stripeAddr - / desc->sectPerStripe; - if (rf_prReconSched) { - old_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors; - } - desc->stripeAddr += desc->sectPerStripe; - if (rf_prReconSched) { - new_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors; - if (new_pctg != old_pctg) { - RF_GETTIME(t); - RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff); - printf("%d %d.%06d\n", new_pctg, (int) diff.tv_sec, (int) diff.tv_usec); - } - } - if (stripeAddr >= raidPtr->totalSectors) { - rf_CopybackComplete(desc, 0); - return; - } - /* walk through the current stripe, su-by-su */ - for (done = 0, addr = stripeAddr; addr < stripeAddr + desc->sectPerStripe; addr += desc->sectPerSU) { - - /* map the SU, disallowing remap to spare space */ - (raidPtr->Layout.map->MapSector) (raidPtr, addr, &testRow, &testCol, &testOffs, RF_DONT_REMAP); - - if (testRow == desc->frow && testCol == desc->fcol) { - rf_CopybackOne(desc, RF_COPYBACK_DATA, addr, testRow, testCol, testOffs); - done = 1; - break; - } - } - - if (!done) { - /* we didn't find the failed disk in the data part. - * check parity. */ - - /* map the parity for this stripe, disallowing remap - * to spare space */ - (raidPtr->Layout.map->MapParity) (raidPtr, stripeAddr, &testRow, &testCol, &testOffs, RF_DONT_REMAP); - - if (testRow == desc->frow && testCol == desc->fcol) { - rf_CopybackOne(desc, RF_COPYBACK_PARITY, stripeAddr, testRow, testCol, testOffs); - } - } - /* check to see if the last read/write pair failed */ - if (desc->status) { - rf_CopybackComplete(desc, 1); - return; - } - /* we didn't find any units to copy back in this stripe. - * Continue with the next one */ - } -} - - -/* copyback one unit */ -static void -rf_CopybackOne(desc, typ, addr, testRow, testCol, testOffs) - RF_CopybackDesc_t *desc; - int typ; - RF_RaidAddr_t addr; - RF_RowCol_t testRow; - RF_RowCol_t testCol; - RF_SectorNum_t testOffs; -{ - RF_SectorCount_t sectPerSU = desc->sectPerSU; - RF_Raid_t *raidPtr = desc->raidPtr; - RF_RowCol_t spRow = desc->spRow; - RF_RowCol_t spCol = desc->spCol; - RF_SectorNum_t spOffs; - - /* find the spare spare location for this SU */ - if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { - if (typ == RF_COPYBACK_DATA) - raidPtr->Layout.map->MapSector(raidPtr, addr, &spRow, &spCol, &spOffs, RF_REMAP); - else - raidPtr->Layout.map->MapParity(raidPtr, addr, &spRow, &spCol, &spOffs, RF_REMAP); - } else { - spOffs = testOffs; - } - - /* create reqs to read the old location & write the new */ - desc->readreq = rf_CreateDiskQueueData(RF_IO_TYPE_READ, spOffs, - sectPerSU, desc->databuf, 0L, 0, - (int (*) (void *, int)) rf_CopybackReadDoneProc, desc, - NULL, NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL); - desc->writereq = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, testOffs, - sectPerSU, desc->databuf, 0L, 0, - (int (*) (void *, int)) rf_CopybackWriteDoneProc, desc, - NULL, NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL); - desc->frow = testRow; - desc->fcol = testCol; - - /* enqueue the read. the write will go out as part of the callback on - * the read. at user-level & in the kernel, wait for the read-write - * pair to complete. in the simulator, just return, since everything - * will happen as callbacks */ - - RF_LOCK_MUTEX(desc->mcpair->mutex); - desc->mcpair->flag = 0; - - rf_DiskIOEnqueue(&raidPtr->Queues[spRow][spCol], desc->readreq, RF_IO_NORMAL_PRIORITY); - - while (!desc->mcpair->flag) { - RF_WAIT_MCPAIR(desc->mcpair); - } - RF_UNLOCK_MUTEX(desc->mcpair->mutex); - rf_FreeDiskQueueData(desc->readreq); - rf_FreeDiskQueueData(desc->writereq); - -} - - -/* called at interrupt context when the read has completed. just send out the write */ -static int -rf_CopybackReadDoneProc(desc, status) - RF_CopybackDesc_t *desc; - int status; -{ - if (status) { /* invoke the callback with bad status */ - printf("COPYBACK: copyback read failed. Aborting.\n"); - (desc->writereq->CompleteFunc) (desc, -100); - } else { - rf_DiskIOEnqueue(&(desc->raidPtr->Queues[desc->frow][desc->fcol]), desc->writereq, RF_IO_NORMAL_PRIORITY); - } - return (0); -} -/* called at interrupt context when the write has completed. - * at user level & in the kernel, wake up the copyback thread. - * in the simulator, invoke the next copyback directly. - * can't free diskqueuedata structs in the kernel b/c we're at interrupt context. - */ -static int -rf_CopybackWriteDoneProc(desc, status) - RF_CopybackDesc_t *desc; - int status; -{ - if (status && status != -100) { - printf("COPYBACK: copyback write failed. Aborting.\n"); - } - desc->status = status; - rf_MCPairWakeupFunc(desc->mcpair); - return (0); -} -/* invoked when the copyback has completed */ -static void -rf_CopybackComplete(desc, status) - RF_CopybackDesc_t *desc; - int status; -{ - RF_Raid_t *raidPtr = desc->raidPtr; - struct timeval t, diff; - - if (!status) { - RF_LOCK_MUTEX(raidPtr->mutex); - if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { - RF_ASSERT(raidPtr->Layout.map->parityConfig == 'D'); - rf_FreeSpareTable(raidPtr); - } else { - raidPtr->Disks[desc->spRow][desc->spCol].status = rf_ds_spare; - } - RF_UNLOCK_MUTEX(raidPtr->mutex); - - RF_GETTIME(t); - RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff); - printf("Copyback time was %d.%06d seconds\n", - (int) diff.tv_sec, (int) diff.tv_usec); - } else - printf("COPYBACK: Failure.\n"); - - RF_Free(desc->databuf, rf_RaidAddressToByte(raidPtr, desc->sectPerSU)); - rf_FreeMCPair(desc->mcpair); - RF_Free(desc, sizeof(*desc)); - - rf_copyback_in_progress = 0; - rf_ResumeNewRequests(raidPtr); -} diff --git a/sys/dev/raidframe/rf_copyback.h b/sys/dev/raidframe/rf_copyback.h deleted file mode 100644 index 67da842..0000000 --- a/sys/dev/raidframe/rf_copyback.h +++ /dev/null @@ -1,61 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_copyback.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */ -/* - * rf_copyback.h - */ -/* - * Copyright (c) 1996 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_COPYBACK_H_ -#define _RF__RF_COPYBACK_H_ - -#include <dev/raidframe/rf_types.h> - -typedef struct RF_CopybackDesc_s { - RF_Raid_t *raidPtr; - RF_RowCol_t frow; - RF_RowCol_t fcol; - RF_RowCol_t spRow; - RF_RowCol_t spCol; - int status; - RF_StripeNum_t stripeAddr; - RF_SectorCount_t sectPerSU; - RF_SectorCount_t sectPerStripe; - char *databuf; - RF_DiskQueueData_t *readreq; - RF_DiskQueueData_t *writereq; - struct timeval starttime; - RF_MCPair_t *mcpair; -} RF_CopybackDesc_t; - -extern int rf_copyback_in_progress; - -int rf_ConfigureCopyback(RF_ShutdownList_t ** listp); -void rf_CopybackReconstructedData(RF_Raid_t * raidPtr); -void rf_ContinueCopyback(RF_CopybackDesc_t * desc); - -#endif /* !_RF__RF_COPYBACK_H_ */ diff --git a/sys/dev/raidframe/rf_cvscan.c b/sys/dev/raidframe/rf_cvscan.c deleted file mode 100644 index b7c1026..0000000 --- a/sys/dev/raidframe/rf_cvscan.c +++ /dev/null @@ -1,441 +0,0 @@ -/* $NetBSD: rf_cvscan.c,v 1.5 1999/08/13 03:41:53 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/******************************************************************************* - * - * cvscan.c -- prioritized cvscan disk queueing code. - * - * Nov 9, 1994, adapted from raidSim version (MCH) - * - ******************************************************************************/ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_stripelocks.h> -#include <dev/raidframe/rf_layout.h> -#include <dev/raidframe/rf_diskqueue.h> -#include <dev/raidframe/rf_cvscan.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_general.h> - -#define DO_CHECK_STATE(_hdr_) CheckCvscanState((_hdr_), __FILE__, __LINE__) - -#define pri_ok(p) ( ((p) == RF_IO_NORMAL_PRIORITY) || ((p) == RF_IO_LOW_PRIORITY)) - -static void -CheckCvscanState(RF_CvscanHeader_t * hdr, char *file, int line) -{ - long i, key; - RF_DiskQueueData_t *tmp; - - if (hdr->left != (RF_DiskQueueData_t *) NULL) - RF_ASSERT(hdr->left->sectorOffset < hdr->cur_block); - for (key = hdr->cur_block, i = 0, tmp = hdr->left; - tmp != (RF_DiskQueueData_t *) NULL; - key = tmp->sectorOffset, i++, tmp = tmp->next) - RF_ASSERT(tmp->sectorOffset <= key - && tmp->priority == hdr->nxt_priority && pri_ok(tmp->priority)); - RF_ASSERT(i == hdr->left_cnt); - - for (key = hdr->cur_block, i = 0, tmp = hdr->right; - tmp != (RF_DiskQueueData_t *) NULL; - key = tmp->sectorOffset, i++, tmp = tmp->next) { - RF_ASSERT(key <= tmp->sectorOffset); - RF_ASSERT(tmp->priority == hdr->nxt_priority); - RF_ASSERT(pri_ok(tmp->priority)); - } - RF_ASSERT(i == hdr->right_cnt); - - for (key = hdr->nxt_priority - 1, tmp = hdr->burner; - tmp != (RF_DiskQueueData_t *) NULL; - key = tmp->priority, tmp = tmp->next) { - RF_ASSERT(tmp); - RF_ASSERT(hdr); - RF_ASSERT(pri_ok(tmp->priority)); - RF_ASSERT(key >= tmp->priority); - RF_ASSERT(tmp->priority < hdr->nxt_priority); - } -} - - - -static void -PriorityInsert(RF_DiskQueueData_t ** list_ptr, RF_DiskQueueData_t * req) -{ - /* * insert block pointed to by req in to list whose first * entry is - * pointed to by the pointer that list_ptr points to * ie., list_ptr - * is a grandparent of the first entry */ - - for (; (*list_ptr) != (RF_DiskQueueData_t *) NULL && - (*list_ptr)->priority > req->priority; - list_ptr = &((*list_ptr)->next)) { - } - req->next = (*list_ptr); - (*list_ptr) = req; -} - - - -static void -ReqInsert(RF_DiskQueueData_t ** list_ptr, RF_DiskQueueData_t * req, RF_CvscanArmDir_t order) -{ - /* * insert block pointed to by req in to list whose first * entry is - * pointed to by the pointer that list_ptr points to * ie., list_ptr - * is a grandparent of the first entry */ - - for (; (*list_ptr) != (RF_DiskQueueData_t *) NULL && - - ((order == rf_cvscan_RIGHT && (*list_ptr)->sectorOffset <= req->sectorOffset) - || (order == rf_cvscan_LEFT && (*list_ptr)->sectorOffset > req->sectorOffset)); - list_ptr = &((*list_ptr)->next)) { - } - req->next = (*list_ptr); - (*list_ptr) = req; -} - - - -static RF_DiskQueueData_t * -ReqDequeue(RF_DiskQueueData_t ** list_ptr) -{ - RF_DiskQueueData_t *ret = (*list_ptr); - if ((*list_ptr) != (RF_DiskQueueData_t *) NULL) { - (*list_ptr) = (*list_ptr)->next; - } - return (ret); -} - - - -static void -ReBalance(RF_CvscanHeader_t * hdr) -{ - /* DO_CHECK_STATE(hdr); */ - while (hdr->right != (RF_DiskQueueData_t *) NULL - && hdr->right->sectorOffset < hdr->cur_block) { - hdr->right_cnt--; - hdr->left_cnt++; - ReqInsert(&hdr->left, ReqDequeue(&hdr->right), rf_cvscan_LEFT); - } - /* DO_CHECK_STATE(hdr); */ -} - - - -static void -Transfer(RF_DiskQueueData_t ** to_list_ptr, RF_DiskQueueData_t ** from_list_ptr) -{ - RF_DiskQueueData_t *gp; - for (gp = (*from_list_ptr); gp != (RF_DiskQueueData_t *) NULL;) { - RF_DiskQueueData_t *p = gp->next; - PriorityInsert(to_list_ptr, gp); - gp = p; - } - (*from_list_ptr) = (RF_DiskQueueData_t *) NULL; -} - - - -static void -RealEnqueue(RF_CvscanHeader_t * hdr, RF_DiskQueueData_t * req) -{ - RF_ASSERT(req->priority == RF_IO_NORMAL_PRIORITY || req->priority == RF_IO_LOW_PRIORITY); - - DO_CHECK_STATE(hdr); - if (hdr->left_cnt == 0 && hdr->right_cnt == 0) { - hdr->nxt_priority = req->priority; - } - if (req->priority > hdr->nxt_priority) { - /* - ** dump all other outstanding requests on the back burner - */ - Transfer(&hdr->burner, &hdr->left); - Transfer(&hdr->burner, &hdr->right); - hdr->left_cnt = 0; - hdr->right_cnt = 0; - hdr->nxt_priority = req->priority; - } - if (req->priority < hdr->nxt_priority) { - /* - ** yet another low priority task! - */ - PriorityInsert(&hdr->burner, req); - } else { - if (req->sectorOffset < hdr->cur_block) { - /* this request is to the left of the current arms */ - ReqInsert(&hdr->left, req, rf_cvscan_LEFT); - hdr->left_cnt++; - } else { - /* this request is to the right of the current arms */ - ReqInsert(&hdr->right, req, rf_cvscan_RIGHT); - hdr->right_cnt++; - } - } - DO_CHECK_STATE(hdr); -} - - - -void -rf_CvscanEnqueue(void *q_in, RF_DiskQueueData_t * elem, int priority) -{ - RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in; - RealEnqueue(hdr, elem /* req */ ); -} - - - -RF_DiskQueueData_t * -rf_CvscanDequeue(void *q_in) -{ - RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in; - long range, i, sum_dist_left, sum_dist_right; - RF_DiskQueueData_t *ret; - RF_DiskQueueData_t *tmp; - - DO_CHECK_STATE(hdr); - - if (hdr->left_cnt == 0 && hdr->right_cnt == 0) - return ((RF_DiskQueueData_t *) NULL); - - range = RF_MIN(hdr->range_for_avg, RF_MIN(hdr->left_cnt, hdr->right_cnt)); - for (i = 0, tmp = hdr->left, sum_dist_left = - ((hdr->direction == rf_cvscan_RIGHT) ? range * hdr->change_penalty : 0); - tmp != (RF_DiskQueueData_t *) NULL && i < range; - tmp = tmp->next, i++) { - sum_dist_left += hdr->cur_block - tmp->sectorOffset; - } - for (i = 0, tmp = hdr->right, sum_dist_right = - ((hdr->direction == rf_cvscan_LEFT) ? range * hdr->change_penalty : 0); - tmp != (RF_DiskQueueData_t *) NULL && i < range; - tmp = tmp->next, i++) { - sum_dist_right += tmp->sectorOffset - hdr->cur_block; - } - - if (hdr->right_cnt == 0 || sum_dist_left < sum_dist_right) { - hdr->direction = rf_cvscan_LEFT; - hdr->cur_block = hdr->left->sectorOffset + hdr->left->numSector; - hdr->left_cnt = RF_MAX(hdr->left_cnt - 1, 0); - tmp = hdr->left; - ret = (ReqDequeue(&hdr->left)) /*->parent*/ ; - } else { - hdr->direction = rf_cvscan_RIGHT; - hdr->cur_block = hdr->right->sectorOffset + hdr->right->numSector; - hdr->right_cnt = RF_MAX(hdr->right_cnt - 1, 0); - tmp = hdr->right; - ret = (ReqDequeue(&hdr->right)) /*->parent*/ ; - } - ReBalance(hdr); - - if (hdr->left_cnt == 0 && hdr->right_cnt == 0 - && hdr->burner != (RF_DiskQueueData_t *) NULL) { - /* - ** restore low priority requests for next dequeue - */ - RF_DiskQueueData_t *burner = hdr->burner; - hdr->nxt_priority = burner->priority; - while (burner != (RF_DiskQueueData_t *) NULL - && burner->priority == hdr->nxt_priority) { - RF_DiskQueueData_t *next = burner->next; - RealEnqueue(hdr, burner); - burner = next; - } - hdr->burner = burner; - } - DO_CHECK_STATE(hdr); - return (ret); -} - - - -RF_DiskQueueData_t * -rf_CvscanPeek(void *q_in) -{ - RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in; - long range, i, sum_dist_left, sum_dist_right; - RF_DiskQueueData_t *tmp, *headElement; - - DO_CHECK_STATE(hdr); - - if (hdr->left_cnt == 0 && hdr->right_cnt == 0) - headElement = NULL; - else { - range = RF_MIN(hdr->range_for_avg, RF_MIN(hdr->left_cnt, hdr->right_cnt)); - for (i = 0, tmp = hdr->left, sum_dist_left = - ((hdr->direction == rf_cvscan_RIGHT) ? range * hdr->change_penalty : 0); - tmp != (RF_DiskQueueData_t *) NULL && i < range; - tmp = tmp->next, i++) { - sum_dist_left += hdr->cur_block - tmp->sectorOffset; - } - for (i = 0, tmp = hdr->right, sum_dist_right = - ((hdr->direction == rf_cvscan_LEFT) ? range * hdr->change_penalty : 0); - tmp != (RF_DiskQueueData_t *) NULL && i < range; - tmp = tmp->next, i++) { - sum_dist_right += tmp->sectorOffset - hdr->cur_block; - } - - if (hdr->right_cnt == 0 || sum_dist_left < sum_dist_right) - headElement = hdr->left; - else - headElement = hdr->right; - } - return (headElement); -} - - - -/* -** CVSCAN( 1, 0 ) is Shortest Seek Time First (SSTF) -** lowest average response time -** CVSCAN( 1, infinity ) is SCAN -** lowest response time standard deviation -*/ - - -int -rf_CvscanConfigure() -{ - return (0); -} - - - -void * -rf_CvscanCreate(RF_SectorCount_t sectPerDisk, - RF_AllocListElem_t * clList, - RF_ShutdownList_t ** listp) -{ - RF_CvscanHeader_t *hdr; - long range = 2; /* Currently no mechanism to change these */ - long penalty = sectPerDisk / 5; - - RF_MallocAndAdd(hdr, sizeof(RF_CvscanHeader_t), (RF_CvscanHeader_t *), clList); - bzero((char *) hdr, sizeof(RF_CvscanHeader_t)); - hdr->range_for_avg = RF_MAX(range, 1); - hdr->change_penalty = RF_MAX(penalty, 0); - hdr->direction = rf_cvscan_RIGHT; - hdr->cur_block = 0; - hdr->left_cnt = hdr->right_cnt = 0; - hdr->left = hdr->right = (RF_DiskQueueData_t *) NULL; - hdr->burner = (RF_DiskQueueData_t *) NULL; - DO_CHECK_STATE(hdr); - - return ((void *) hdr); -} - - -#if defined(__NetBSD__) || defined(__FreeBSD__) && defined(_KERNEL) -/* PrintCvscanQueue is not used, so we ignore it... */ -#else -static void -PrintCvscanQueue(RF_CvscanHeader_t * hdr) -{ - RF_DiskQueueData_t *tmp; - - printf("CVSCAN(%d,%d) at %d going %s\n", - (int) hdr->range_for_avg, - (int) hdr->change_penalty, - (int) hdr->cur_block, - (hdr->direction == rf_cvscan_LEFT) ? "LEFT" : "RIGHT"); - printf("\tLeft(%d): ", hdr->left_cnt); - for (tmp = hdr->left; tmp != (RF_DiskQueueData_t *) NULL; tmp = tmp->next) - printf("(%d,%ld,%d) ", - (int) tmp->sectorOffset, - (long) (tmp->sectorOffset + tmp->numSector), - tmp->priority); - printf("\n"); - printf("\tRight(%d): ", hdr->right_cnt); - for (tmp = hdr->right; tmp != (RF_DiskQueueData_t *) NULL; tmp = tmp->next) - printf("(%d,%ld,%d) ", - (int) tmp->sectorOffset, - (long) (tmp->sectorOffset + tmp->numSector), - tmp->priority); - printf("\n"); - printf("\tBurner: "); - for (tmp = hdr->burner; tmp != (RF_DiskQueueData_t *) NULL; tmp = tmp->next) - printf("(%d,%ld,%d) ", - (int) tmp->sectorOffset, - (long) (tmp->sectorOffset + tmp->numSector), - tmp->priority); - printf("\n"); -} -#endif - - -/* promotes reconstruction accesses for the given stripeID to normal priority. - * returns 1 if an access was found and zero otherwise. Normally, we should - * only have one or zero entries in the burner queue, so execution time should - * be short. - */ -int -rf_CvscanPromote(void *q_in, RF_StripeNum_t parityStripeID, RF_ReconUnitNum_t which_ru) -{ - RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in; - RF_DiskQueueData_t *trailer = NULL, *tmp = hdr->burner, *tlist = NULL; - int retval = 0; - - DO_CHECK_STATE(hdr); - while (tmp) { /* handle entries at the front of the list */ - if (tmp->parityStripeID == parityStripeID && tmp->which_ru == which_ru) { - hdr->burner = tmp->next; - tmp->priority = RF_IO_NORMAL_PRIORITY; - tmp->next = tlist; - tlist = tmp; - tmp = hdr->burner; - } else - break; - } - if (tmp) { - trailer = tmp; - tmp = tmp->next; - } - while (tmp) { /* handle entries on the rest of the list */ - if (tmp->parityStripeID == parityStripeID && tmp->which_ru == which_ru) { - trailer->next = tmp->next; - tmp->priority = RF_IO_NORMAL_PRIORITY; - tmp->next = tlist; - tlist = tmp; /* insert on a temp queue */ - tmp = trailer->next; - } else { - trailer = tmp; - tmp = tmp->next; - } - } - while (tlist) { - retval++; - tmp = tlist->next; - RealEnqueue(hdr, tlist); - tlist = tmp; - } - RF_ASSERT(retval == 0 || retval == 1); - DO_CHECK_STATE((RF_CvscanHeader_t *) q_in); - return (retval); -} diff --git a/sys/dev/raidframe/rf_cvscan.h b/sys/dev/raidframe/rf_cvscan.h deleted file mode 100644 index 7f536a8..0000000 --- a/sys/dev/raidframe/rf_cvscan.h +++ /dev/null @@ -1,85 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_cvscan.h,v 1.3 1999/02/05 00:06:07 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* -** Disk scheduling by CVSCAN( N, r ) -** -** Given a set of requests, partition them into one set on each -** side of the current arm position. The trick is to pick which -** side you are going to service next; once a side is picked you will -** service the closest request. -** Let there be n1 requests on one side and n2 requests on the other -** side. If one of n1 or n2 is zero, select the other side. -** If both n1 and n2 are nonzero, select a "range" for examination -** that is N' = min( n1, n2, N ). Average the distance from the -** current position to the nearest N' requests on each side giving -** d1 and d2. -** Suppose the last decision was to move toward set 2, then the -** current direction is toward set 2, and you will only switch to set -** 1 if d1+R < d2 where R is r*(total number of cylinders), r in [0,1]. -** -** I extend this by applying only to the set of requests that all -** share the same, highest priority level. -*/ - -#ifndef _RF__RF_CVSCAN_H_ -#define _RF__RF_CVSCAN_H_ - -#include <dev/raidframe/rf_diskqueue.h> - -typedef enum RF_CvscanArmDir_e { - rf_cvscan_LEFT, - rf_cvscan_RIGHT -} RF_CvscanArmDir_t; - -typedef struct RF_CvscanHeader_s { - long range_for_avg; /* CVSCAN param N */ - long change_penalty; /* CVSCAN param R */ - RF_CvscanArmDir_t direction; - RF_SectorNum_t cur_block; - int nxt_priority; - RF_DiskQueueData_t *left; - int left_cnt; - RF_DiskQueueData_t *right; - int right_cnt; - RF_DiskQueueData_t *burner; -} RF_CvscanHeader_t; - -int rf_CvscanConfigure(void); -void * -rf_CvscanCreate(RF_SectorCount_t sect_per_disk, - RF_AllocListElem_t * cl_list, RF_ShutdownList_t ** listp); -void rf_CvscanEnqueue(void *qptr, RF_DiskQueueData_t * req, int priority); -RF_DiskQueueData_t *rf_CvscanDequeue(void *qptr); -RF_DiskQueueData_t *rf_CvscanPeek(void *qptr); -int -rf_CvscanPromote(void *qptr, RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru); - -#endif /* !_RF__RF_CVSCAN_H_ */ diff --git a/sys/dev/raidframe/rf_dag.h b/sys/dev/raidframe/rf_dag.h deleted file mode 100644 index 15cd4a8..0000000 --- a/sys/dev/raidframe/rf_dag.h +++ /dev/null @@ -1,239 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_dag.h,v 1.3 1999/02/05 00:06:07 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: William V. Courtright II, Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/**************************************************************************** - * * - * dag.h -- header file for DAG-related data structures * - * * - ****************************************************************************/ - -#ifndef _RF__RF_DAG_H_ -#define _RF__RF_DAG_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_stripelocks.h> -#include <dev/raidframe/rf_layout.h> -#include <dev/raidframe/rf_dagflags.h> -#include <dev/raidframe/rf_acctrace.h> -#include <dev/raidframe/rf_memchunk.h> - -#define RF_THREAD_CONTEXT 0 /* we were invoked from thread context */ -#define RF_INTR_CONTEXT 1 /* we were invoked from interrupt context */ -#define RF_MAX_ANTECEDENTS 20 /* max num of antecedents a node may posses */ - -#if defined(__FreeBSD__) && __FreeBSD_version > 500005 -#include <sys/bio.h> -#endif -#include <sys/buf.h> - -struct RF_PropHeader_s { /* structure for propagation of results */ - int resultNum; /* bind result # resultNum */ - int paramNum; /* to parameter # paramNum */ - RF_PropHeader_t *next; /* linked list for multiple results/params */ -}; - -typedef enum RF_NodeStatus_e { - rf_bwd1, /* node is ready for undo logging (backward - * error recovery only) */ - rf_bwd2, /* node has completed undo logging (backward - * error recovery only) */ - rf_wait, /* node is waiting to be executed */ - rf_fired, /* node is currently executing its do function */ - rf_good, /* node successfully completed execution of - * its do function */ - rf_bad, /* node failed to successfully execute its do - * function */ - rf_skipped, /* not used anymore, used to imply a node was - * not executed */ - rf_recover, /* node is currently executing its undo - * function */ - rf_panic, /* node failed to successfully execute its - * undo function */ - rf_undone /* node successfully executed its undo - * function */ -} RF_NodeStatus_t; -/* - * These were used to control skipping a node. - * Now, these are only used as comments. - */ -typedef enum RF_AntecedentType_e { - rf_trueData, - rf_antiData, - rf_outputData, - rf_control -} RF_AntecedentType_t; -#define RF_DAG_PTRCACHESIZE 40 -#define RF_DAG_PARAMCACHESIZE 12 - -typedef RF_uint8 RF_DagNodeFlags_t; - -struct RF_DagNode_s { - RF_NodeStatus_t status; /* current status of this node */ - int (*doFunc) (RF_DagNode_t *); /* normal function */ - int (*undoFunc) (RF_DagNode_t *); /* func to remove effect of - * doFunc */ - int (*wakeFunc) (RF_DagNode_t *, int status); /* func called when the - * node completes an I/O */ - int numParams; /* number of parameters required by *funcPtr */ - int numResults; /* number of results produced by *funcPtr */ - int numAntecedents; /* number of antecedents */ - int numAntDone; /* number of antecedents which have finished */ - int numSuccedents; /* number of succedents */ - int numSuccFired; /* incremented when a succedent is fired - * during forward execution */ - int numSuccDone; /* incremented when a succedent finishes - * during rollBackward */ - int commitNode; /* boolean flag - if true, this is a commit - * node */ - RF_DagNode_t **succedents; /* succedents, array size - * numSuccedents */ - RF_DagNode_t **antecedents; /* antecedents, array size - * numAntecedents */ - RF_AntecedentType_t antType[RF_MAX_ANTECEDENTS]; /* type of each - * antecedent */ - void **results; /* array of results produced by *funcPtr */ - RF_DagParam_t *params; /* array of parameters required by *funcPtr */ - RF_PropHeader_t **propList; /* propagation list, size - * numSuccedents */ - RF_DagHeader_t *dagHdr; /* ptr to head of dag containing this node */ - void *dagFuncData; /* dag execution func uses this for whatever - * it wants */ - RF_DagNode_t *next; - int nodeNum; /* used by PrintDAG for debug only */ - int visited; /* used to avoid re-visiting nodes on DAG - * walks */ - /* ANY CODE THAT USES THIS FIELD MUST MAINTAIN THE PROPERTY THAT AFTER - * IT FINISHES, ALL VISITED FLAGS IN THE DAG ARE IDENTICAL */ - char *name; /* debug only */ - RF_DagNodeFlags_t flags;/* see below */ - RF_DagNode_t *dag_ptrs[RF_DAG_PTRCACHESIZE]; /* cache for performance */ - RF_DagParam_t dag_params[RF_DAG_PARAMCACHESIZE]; /* cache for performance */ -}; -/* - * Bit values for flags field of RF_DagNode_t - */ -#define RF_DAGNODE_FLAG_NONE 0x00 -#define RF_DAGNODE_FLAG_YIELD 0x01 /* in the kernel, yield the processor - * before firing this node */ - -/* enable - DAG ready for normal execution, no errors encountered - * rollForward - DAG encountered an error after commit point, rolling forward - * rollBackward - DAG encountered an error prior to commit point, rolling backward - */ -typedef enum RF_DagStatus_e { - rf_enable, - rf_rollForward, - rf_rollBackward -} RF_DagStatus_t; -#define RF_MAX_HDR_SUCC 1 - -#define RF_MAXCHUNKS 10 - -struct RF_DagHeader_s { - RF_DagStatus_t status; /* status of this DAG */ - int numSuccedents; /* DAG may be a tree, i.e. may have > 1 root */ - int numCommitNodes; /* number of commit nodes in graph */ - int numCommits; /* number of commit nodes which have been - * fired */ - RF_DagNode_t *succedents[RF_MAX_HDR_SUCC]; /* array of succedents, - * size numSuccedents */ - RF_DagHeader_t *next; /* ptr to allow a list of dags */ - RF_AllocListElem_t *allocList; /* ptr to list of ptrs to be freed - * prior to freeing DAG */ - RF_AccessStripeMapHeader_t *asmList; /* list of access stripe maps - * to be freed */ - int nodeNum; /* used by PrintDAG for debug only */ - int numNodesCompleted; - RF_AccTraceEntry_t *tracerec; /* perf mon only */ - - void (*cbFunc) (void *); /* function to call when the dag - * completes */ - void *cbArg; /* argument for cbFunc */ - char *creator; /* name of function used to create this dag */ - - RF_Raid_t *raidPtr; /* the descriptor for the RAID device this DAG - * is for */ - void *bp; /* the bp for this I/O passed down from the - * file system. ignored outside kernel */ - - RF_ChunkDesc_t *memChunk[RF_MAXCHUNKS]; /* experimental- Chunks of - * memory to be retained upon - * DAG free for re-use */ - int chunkIndex; /* the idea is to avoid calls to alloc and - * free */ - - RF_ChunkDesc_t **xtraMemChunk; /* escape hatch which allows - * SelectAlgorithm to merge memChunks - * from several dags */ - int xtraChunkIndex; /* number of ptrs to valid chunks */ - int xtraChunkCnt; /* number of ptrs to chunks allocated */ - -}; - -struct RF_DagList_s { - /* common info for a list of dags which will be fired sequentially */ - int numDags; /* number of dags in the list */ - int numDagsFired; /* number of dags in list which have initiated - * execution */ - int numDagsDone; /* number of dags in list which have completed - * execution */ - RF_DagHeader_t *dags; /* list of dags */ - RF_RaidAccessDesc_t *desc; /* ptr to descriptor for this access */ - RF_AccTraceEntry_t tracerec; /* perf mon info for dags (not user - * info) */ -}; -/* resets a node so that it can be fired again */ -#define RF_ResetNode(_n_) { \ - (_n_)->status = rf_wait; \ - (_n_)->numAntDone = 0; \ - (_n_)->numSuccFired = 0; \ - (_n_)->numSuccDone = 0; \ - (_n_)->next = NULL; \ -} - -#define RF_ResetDagHeader(_h_) { \ - (_h_)->numNodesCompleted = 0; \ - (_h_)->numCommits = 0; \ - (_h_)->status = rf_enable; \ -} - -/* convience macro for declaring a create dag function */ - -#define RF_CREATE_DAG_FUNC_DECL(_name_) \ -void _name_ ( \ - RF_Raid_t *raidPtr, \ - RF_AccessStripeMap_t *asmap, \ - RF_DagHeader_t *dag_h, \ - void *bp, \ - RF_RaidAccessFlags_t flags, \ - RF_AllocListElem_t *allocList) - -#endif /* !_RF__RF_DAG_H_ */ diff --git a/sys/dev/raidframe/rf_dagdegrd.c b/sys/dev/raidframe/rf_dagdegrd.c deleted file mode 100644 index 6321db6..0000000 --- a/sys/dev/raidframe/rf_dagdegrd.c +++ /dev/null @@ -1,1132 +0,0 @@ -/* $NetBSD: rf_dagdegrd.c,v 1.7 2001/01/26 14:06:16 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * rf_dagdegrd.c - * - * code for creating degraded read DAGs - */ - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_memchunk.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_dagdegrd.h> - - -/****************************************************************************** - * - * General comments on DAG creation: - * - * All DAGs in this file use roll-away error recovery. Each DAG has a single - * commit node, usually called "Cmt." If an error occurs before the Cmt node - * is reached, the execution engine will halt forward execution and work - * backward through the graph, executing the undo functions. Assuming that - * each node in the graph prior to the Cmt node are undoable and atomic - or - - * does not make changes to permanent state, the graph will fail atomically. - * If an error occurs after the Cmt node executes, the engine will roll-forward - * through the graph, blindly executing nodes until it reaches the end. - * If a graph reaches the end, it is assumed to have completed successfully. - * - * A graph has only 1 Cmt node. - * - */ - - -/****************************************************************************** - * - * The following wrappers map the standard DAG creation interface to the - * DAG creation routines. Additionally, these wrappers enable experimentation - * with new DAG structures by providing an extra level of indirection, allowing - * the DAG creation routines to be replaced at this single point. - */ - -void -rf_CreateRaidFiveDegradedReadDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) -{ - rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - &rf_xorRecoveryFuncs); -} - - -/****************************************************************************** - * - * DAG creation code begins here - */ - - -/****************************************************************************** - * Create a degraded read DAG for RAID level 1 - * - * Hdr -> Nil -> R(p/s)d -> Commit -> Trm - * - * The "Rd" node reads data from the surviving disk in the mirror pair - * Rpd - read of primary copy - * Rsd - read of secondary copy - * - * Parameters: raidPtr - description of the physical array - * asmap - logical & physical addresses for this access - * bp - buffer ptr (for holding write data) - * flags - general flags (e.g. disk locking) - * allocList - list of memory allocated in DAG creation - *****************************************************************************/ - -void -rf_CreateRaidOneDegradedReadDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) -{ - RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode; - RF_StripeNum_t parityStripeID; - RF_ReconUnitNum_t which_ru; - RF_PhysDiskAddr_t *pda; - int useMirror, i; - - useMirror = 0; - parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), - asmap->raidAddress, &which_ru); - if (rf_dagDebug) { - printf("[Creating RAID level 1 degraded read DAG]\n"); - } - dag_h->creator = "RaidOneDegradedReadDAG"; - /* alloc the Wnd nodes and the Wmir node */ - if (asmap->numDataFailed == 0) - useMirror = RF_FALSE; - else - useMirror = RF_TRUE; - - /* total number of nodes = 1 + (block + commit + terminator) */ - RF_CallocAndAdd(nodes, 4, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - i = 0; - rdNode = &nodes[i]; - i++; - blockNode = &nodes[i]; - i++; - commitNode = &nodes[i]; - i++; - termNode = &nodes[i]; - i++; - - /* this dag can not commit until the commit node is reached. errors - * prior to the commit point imply the dag has failed and must be - * retried */ - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* initialize the block, commit, and terminator nodes */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - - pda = asmap->physInfo; - RF_ASSERT(pda != NULL); - /* parityInfo must describe entire parity unit */ - RF_ASSERT(asmap->parityInfo->next == NULL); - - /* initialize the data node */ - if (!useMirror) { - /* read primary copy of data */ - rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rpd", allocList); - rdNode->params[0].p = pda; - rdNode->params[1].p = pda->bufPtr; - rdNode->params[2].v = parityStripeID; - rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } else { - /* read secondary copy of data */ - rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rsd", allocList); - rdNode->params[0].p = asmap->parityInfo; - rdNode->params[1].p = pda->bufPtr; - rdNode->params[2].v = parityStripeID; - rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - - /* connect header to block node */ - RF_ASSERT(dag_h->numSuccedents == 1); - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - /* connect block node to rdnode */ - RF_ASSERT(blockNode->numSuccedents == 1); - RF_ASSERT(rdNode->numAntecedents == 1); - blockNode->succedents[0] = rdNode; - rdNode->antecedents[0] = blockNode; - rdNode->antType[0] = rf_control; - - /* connect rdnode to commit node */ - RF_ASSERT(rdNode->numSuccedents == 1); - RF_ASSERT(commitNode->numAntecedents == 1); - rdNode->succedents[0] = commitNode; - commitNode->antecedents[0] = rdNode; - commitNode->antType[0] = rf_control; - - /* connect commit node to terminator */ - RF_ASSERT(commitNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - commitNode->succedents[0] = termNode; - termNode->antecedents[0] = commitNode; - termNode->antType[0] = rf_control; -} - - - -/****************************************************************************** - * - * creates a DAG to perform a degraded-mode read of data within one stripe. - * This DAG is as follows: - * - * Hdr -> Block -> Rud -> Xor -> Cmt -> T - * -> Rrd -> - * -> Rp --> - * - * Each R node is a successor of the L node - * One successor arc from each R node goes to C, and the other to X - * There is one Rud for each chunk of surviving user data requested by the - * user, and one Rrd for each chunk of surviving user data _not_ being read by - * the user - * R = read, ud = user data, rd = recovery (surviving) data, p = parity - * X = XOR, C = Commit, T = terminate - * - * The block node guarantees a single source node. - * - * Note: The target buffer for the XOR node is set to the actual user buffer - * where the failed data is supposed to end up. This buffer is zero'd by the - * code here. Thus, if you create a degraded read dag, use it, and then - * re-use, you have to be sure to zero the target buffer prior to the re-use. - * - * The recfunc argument at the end specifies the name and function used for - * the redundancy - * recovery function. - * - *****************************************************************************/ - -void -rf_CreateDegradedReadDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - RF_RedFuncs_t * recFunc) -{ - RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *xorNode, *blockNode; - RF_DagNode_t *commitNode, *rpNode, *termNode; - int nNodes, nRrdNodes, nRudNodes, nXorBufs, i; - int j, paramNum; - RF_SectorCount_t sectorsPerSU; - RF_ReconUnitNum_t which_ru; - char *overlappingPDAs;/* a temporary array of flags */ - RF_AccessStripeMapHeader_t *new_asm_h[2]; - RF_PhysDiskAddr_t *pda, *parityPDA; - RF_StripeNum_t parityStripeID; - RF_PhysDiskAddr_t *failedPDA; - RF_RaidLayout_t *layoutPtr; - char *rpBuf; - - layoutPtr = &(raidPtr->Layout); - /* failedPDA points to the pda within the asm that targets the failed - * disk */ - failedPDA = asmap->failedPDAs[0]; - parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, - asmap->raidAddress, &which_ru); - sectorsPerSU = layoutPtr->sectorsPerStripeUnit; - - if (rf_dagDebug) { - printf("[Creating degraded read DAG]\n"); - } - RF_ASSERT(asmap->numDataFailed == 1); - dag_h->creator = "DegradedReadDAG"; - - /* - * generate two ASMs identifying the surviving data we need - * in order to recover the lost data - */ - - /* overlappingPDAs array must be zero'd */ - RF_Calloc(overlappingPDAs, asmap->numStripeUnitsAccessed, sizeof(char), (char *)); - rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h, new_asm_h, &nXorBufs, - &rpBuf, overlappingPDAs, allocList); - - /* - * create all the nodes at once - * - * -1 because no access is generated for the failed pda - */ - nRudNodes = asmap->numStripeUnitsAccessed - 1; - nRrdNodes = ((new_asm_h[0]) ? new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) + - ((new_asm_h[1]) ? new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0); - nNodes = 5 + nRudNodes + nRrdNodes; /* lock, unlock, xor, Rp, Rud, - * Rrd */ - RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), - allocList); - i = 0; - blockNode = &nodes[i]; - i++; - commitNode = &nodes[i]; - i++; - xorNode = &nodes[i]; - i++; - rpNode = &nodes[i]; - i++; - termNode = &nodes[i]; - i++; - rudNodes = &nodes[i]; - i += nRudNodes; - rrdNodes = &nodes[i]; - i += nRrdNodes; - RF_ASSERT(i == nNodes); - - /* initialize nodes */ - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - /* this dag can not commit until the commit node is reached errors - * prior to the commit point imply the dag has failed */ - dag_h->numSuccedents = 1; - - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, nRudNodes + nRrdNodes + 1, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - rf_InitNode(xorNode, rf_wait, RF_FALSE, recFunc->simple, rf_NullNodeUndoFunc, - NULL, 1, nRudNodes + nRrdNodes + 1, 2 * nXorBufs + 2, 1, dag_h, - recFunc->SimpleName, allocList); - - /* fill in the Rud nodes */ - for (pda = asmap->physInfo, i = 0; i < nRudNodes; i++, pda = pda->next) { - if (pda == failedPDA) { - i--; - continue; - } - rf_InitNode(&rudNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, - rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, - "Rud", allocList); - RF_ASSERT(pda); - rudNodes[i].params[0].p = pda; - rudNodes[i].params[1].p = pda->bufPtr; - rudNodes[i].params[2].v = parityStripeID; - rudNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - - /* fill in the Rrd nodes */ - i = 0; - if (new_asm_h[0]) { - for (pda = new_asm_h[0]->stripeMap->physInfo; - i < new_asm_h[0]->stripeMap->numStripeUnitsAccessed; - i++, pda = pda->next) { - rf_InitNode(&rrdNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, - rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, - dag_h, "Rrd", allocList); - RF_ASSERT(pda); - rrdNodes[i].params[0].p = pda; - rrdNodes[i].params[1].p = pda->bufPtr; - rrdNodes[i].params[2].v = parityStripeID; - rrdNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - } - if (new_asm_h[1]) { - for (j = 0, pda = new_asm_h[1]->stripeMap->physInfo; - j < new_asm_h[1]->stripeMap->numStripeUnitsAccessed; - j++, pda = pda->next) { - rf_InitNode(&rrdNodes[i + j], rf_wait, RF_FALSE, rf_DiskReadFunc, - rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, - dag_h, "Rrd", allocList); - RF_ASSERT(pda); - rrdNodes[i + j].params[0].p = pda; - rrdNodes[i + j].params[1].p = pda->bufPtr; - rrdNodes[i + j].params[2].v = parityStripeID; - rrdNodes[i + j].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - } - /* make a PDA for the parity unit */ - RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - parityPDA->row = asmap->parityInfo->row; - parityPDA->col = asmap->parityInfo->col; - parityPDA->startSector = ((asmap->parityInfo->startSector / sectorsPerSU) - * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU); - parityPDA->numSector = failedPDA->numSector; - - /* initialize the Rp node */ - rf_InitNode(rpNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rp ", allocList); - rpNode->params[0].p = parityPDA; - rpNode->params[1].p = rpBuf; - rpNode->params[2].v = parityStripeID; - rpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - - /* - * the last and nastiest step is to assign all - * the parameters of the Xor node - */ - paramNum = 0; - for (i = 0; i < nRrdNodes; i++) { - /* all the Rrd nodes need to be xored together */ - xorNode->params[paramNum++] = rrdNodes[i].params[0]; - xorNode->params[paramNum++] = rrdNodes[i].params[1]; - } - for (i = 0; i < nRudNodes; i++) { - /* any Rud nodes that overlap the failed access need to be - * xored in */ - if (overlappingPDAs[i]) { - RF_MallocAndAdd(pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - bcopy((char *) rudNodes[i].params[0].p, (char *) pda, sizeof(RF_PhysDiskAddr_t)); - rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_DOBUFFER, 0); - xorNode->params[paramNum++].p = pda; - xorNode->params[paramNum++].p = pda->bufPtr; - } - } - RF_Free(overlappingPDAs, asmap->numStripeUnitsAccessed * sizeof(char)); - - /* install parity pda as last set of params to be xor'd */ - xorNode->params[paramNum++].p = parityPDA; - xorNode->params[paramNum++].p = rpBuf; - - /* - * the last 2 params to the recovery xor node are - * the failed PDA and the raidPtr - */ - xorNode->params[paramNum++].p = failedPDA; - xorNode->params[paramNum++].p = raidPtr; - RF_ASSERT(paramNum == 2 * nXorBufs + 2); - - /* - * The xor node uses results[0] as the target buffer. - * Set pointer and zero the buffer. In the kernel, this - * may be a user buffer in which case we have to remap it. - */ - xorNode->results[0] = failedPDA->bufPtr; - RF_BZERO(bp, failedPDA->bufPtr, rf_RaidAddressToByte(raidPtr, - failedPDA->numSector)); - - /* connect nodes to form graph */ - /* connect the header to the block node */ - RF_ASSERT(dag_h->numSuccedents == 1); - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - /* connect the block node to the read nodes */ - RF_ASSERT(blockNode->numSuccedents == (1 + nRrdNodes + nRudNodes)); - RF_ASSERT(rpNode->numAntecedents == 1); - blockNode->succedents[0] = rpNode; - rpNode->antecedents[0] = blockNode; - rpNode->antType[0] = rf_control; - for (i = 0; i < nRrdNodes; i++) { - RF_ASSERT(rrdNodes[i].numSuccedents == 1); - blockNode->succedents[1 + i] = &rrdNodes[i]; - rrdNodes[i].antecedents[0] = blockNode; - rrdNodes[i].antType[0] = rf_control; - } - for (i = 0; i < nRudNodes; i++) { - RF_ASSERT(rudNodes[i].numSuccedents == 1); - blockNode->succedents[1 + nRrdNodes + i] = &rudNodes[i]; - rudNodes[i].antecedents[0] = blockNode; - rudNodes[i].antType[0] = rf_control; - } - - /* connect the read nodes to the xor node */ - RF_ASSERT(xorNode->numAntecedents == (1 + nRrdNodes + nRudNodes)); - RF_ASSERT(rpNode->numSuccedents == 1); - rpNode->succedents[0] = xorNode; - xorNode->antecedents[0] = rpNode; - xorNode->antType[0] = rf_trueData; - for (i = 0; i < nRrdNodes; i++) { - RF_ASSERT(rrdNodes[i].numSuccedents == 1); - rrdNodes[i].succedents[0] = xorNode; - xorNode->antecedents[1 + i] = &rrdNodes[i]; - xorNode->antType[1 + i] = rf_trueData; - } - for (i = 0; i < nRudNodes; i++) { - RF_ASSERT(rudNodes[i].numSuccedents == 1); - rudNodes[i].succedents[0] = xorNode; - xorNode->antecedents[1 + nRrdNodes + i] = &rudNodes[i]; - xorNode->antType[1 + nRrdNodes + i] = rf_trueData; - } - - /* connect the xor node to the commit node */ - RF_ASSERT(xorNode->numSuccedents == 1); - RF_ASSERT(commitNode->numAntecedents == 1); - xorNode->succedents[0] = commitNode; - commitNode->antecedents[0] = xorNode; - commitNode->antType[0] = rf_control; - - /* connect the termNode to the commit node */ - RF_ASSERT(commitNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - commitNode->succedents[0] = termNode; - termNode->antType[0] = rf_control; - termNode->antecedents[0] = commitNode; -} - -#if (RF_INCLUDE_CHAINDECLUSTER > 0) -/****************************************************************************** - * Create a degraded read DAG for Chained Declustering - * - * Hdr -> Nil -> R(p/s)d -> Cmt -> Trm - * - * The "Rd" node reads data from the surviving disk in the mirror pair - * Rpd - read of primary copy - * Rsd - read of secondary copy - * - * Parameters: raidPtr - description of the physical array - * asmap - logical & physical addresses for this access - * bp - buffer ptr (for holding write data) - * flags - general flags (e.g. disk locking) - * allocList - list of memory allocated in DAG creation - *****************************************************************************/ - -void -rf_CreateRaidCDegradedReadDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) -{ - RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode; - RF_StripeNum_t parityStripeID; - int useMirror, i, shiftable; - RF_ReconUnitNum_t which_ru; - RF_PhysDiskAddr_t *pda; - - if ((asmap->numDataFailed + asmap->numParityFailed) == 0) { - shiftable = RF_TRUE; - } else { - shiftable = RF_FALSE; - } - useMirror = 0; - parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), - asmap->raidAddress, &which_ru); - - if (rf_dagDebug) { - printf("[Creating RAID C degraded read DAG]\n"); - } - dag_h->creator = "RaidCDegradedReadDAG"; - /* alloc the Wnd nodes and the Wmir node */ - if (asmap->numDataFailed == 0) - useMirror = RF_FALSE; - else - useMirror = RF_TRUE; - - /* total number of nodes = 1 + (block + commit + terminator) */ - RF_CallocAndAdd(nodes, 4, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - i = 0; - rdNode = &nodes[i]; - i++; - blockNode = &nodes[i]; - i++; - commitNode = &nodes[i]; - i++; - termNode = &nodes[i]; - i++; - - /* - * This dag can not commit until the commit node is reached. - * Errors prior to the commit point imply the dag has failed - * and must be retried. - */ - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* initialize the block, commit, and terminator nodes */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - - pda = asmap->physInfo; - RF_ASSERT(pda != NULL); - /* parityInfo must describe entire parity unit */ - RF_ASSERT(asmap->parityInfo->next == NULL); - - /* initialize the data node */ - if (!useMirror) { - rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rpd", allocList); - if (shiftable && rf_compute_workload_shift(raidPtr, pda)) { - /* shift this read to the next disk in line */ - rdNode->params[0].p = asmap->parityInfo; - rdNode->params[1].p = pda->bufPtr; - rdNode->params[2].v = parityStripeID; - rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } else { - /* read primary copy */ - rdNode->params[0].p = pda; - rdNode->params[1].p = pda->bufPtr; - rdNode->params[2].v = parityStripeID; - rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - } else { - /* read secondary copy of data */ - rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rsd", allocList); - rdNode->params[0].p = asmap->parityInfo; - rdNode->params[1].p = pda->bufPtr; - rdNode->params[2].v = parityStripeID; - rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - - /* connect header to block node */ - RF_ASSERT(dag_h->numSuccedents == 1); - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - /* connect block node to rdnode */ - RF_ASSERT(blockNode->numSuccedents == 1); - RF_ASSERT(rdNode->numAntecedents == 1); - blockNode->succedents[0] = rdNode; - rdNode->antecedents[0] = blockNode; - rdNode->antType[0] = rf_control; - - /* connect rdnode to commit node */ - RF_ASSERT(rdNode->numSuccedents == 1); - RF_ASSERT(commitNode->numAntecedents == 1); - rdNode->succedents[0] = commitNode; - commitNode->antecedents[0] = rdNode; - commitNode->antType[0] = rf_control; - - /* connect commit node to terminator */ - RF_ASSERT(commitNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - commitNode->succedents[0] = termNode; - termNode->antecedents[0] = commitNode; - termNode->antType[0] = rf_control; -} -#endif /* (RF_INCLUDE_CHAINDECLUSTER > 0) */ - -#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) -/* - * XXX move this elsewhere? - */ -void -rf_DD_GenerateFailedAccessASMs( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_PhysDiskAddr_t ** pdap, - int *nNodep, - RF_PhysDiskAddr_t ** pqpdap, - int *nPQNodep, - RF_AllocListElem_t * allocList) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - int PDAPerDisk, i; - RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; - int numDataCol = layoutPtr->numDataCol; - int state; - RF_SectorNum_t suoff, suend; - unsigned firstDataCol, napdas, count; - RF_SectorNum_t fone_start, fone_end, ftwo_start = 0, ftwo_end = 0; - RF_PhysDiskAddr_t *fone = asmap->failedPDAs[0], *ftwo = asmap->failedPDAs[1]; - RF_PhysDiskAddr_t *pda_p; - RF_PhysDiskAddr_t *phys_p; - RF_RaidAddr_t sosAddr; - - /* determine how many pda's we will have to generate per unaccess - * stripe. If there is only one failed data unit, it is one; if two, - * possibly two, depending wether they overlap. */ - - fone_start = rf_StripeUnitOffset(layoutPtr, fone->startSector); - fone_end = fone_start + fone->numSector; - -#define CONS_PDA(if,start,num) \ - pda_p->row = asmap->if->row; pda_p->col = asmap->if->col; \ - pda_p->startSector = ((asmap->if->startSector / secPerSU) * secPerSU) + start; \ - pda_p->numSector = num; \ - pda_p->next = NULL; \ - RF_MallocAndAdd(pda_p->bufPtr,rf_RaidAddressToByte(raidPtr,num),(char *), allocList) - - if (asmap->numDataFailed == 1) { - PDAPerDisk = 1; - state = 1; - RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - pda_p = *pqpdap; - /* build p */ - CONS_PDA(parityInfo, fone_start, fone->numSector); - pda_p->type = RF_PDA_TYPE_PARITY; - pda_p++; - /* build q */ - CONS_PDA(qInfo, fone_start, fone->numSector); - pda_p->type = RF_PDA_TYPE_Q; - } else { - ftwo_start = rf_StripeUnitOffset(layoutPtr, ftwo->startSector); - ftwo_end = ftwo_start + ftwo->numSector; - if (fone->numSector + ftwo->numSector > secPerSU) { - PDAPerDisk = 1; - state = 2; - RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - pda_p = *pqpdap; - CONS_PDA(parityInfo, 0, secPerSU); - pda_p->type = RF_PDA_TYPE_PARITY; - pda_p++; - CONS_PDA(qInfo, 0, secPerSU); - pda_p->type = RF_PDA_TYPE_Q; - } else { - PDAPerDisk = 2; - state = 3; - /* four of them, fone, then ftwo */ - RF_MallocAndAdd(*pqpdap, 4 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - pda_p = *pqpdap; - CONS_PDA(parityInfo, fone_start, fone->numSector); - pda_p->type = RF_PDA_TYPE_PARITY; - pda_p++; - CONS_PDA(qInfo, fone_start, fone->numSector); - pda_p->type = RF_PDA_TYPE_Q; - pda_p++; - CONS_PDA(parityInfo, ftwo_start, ftwo->numSector); - pda_p->type = RF_PDA_TYPE_PARITY; - pda_p++; - CONS_PDA(qInfo, ftwo_start, ftwo->numSector); - pda_p->type = RF_PDA_TYPE_Q; - } - } - /* figure out number of nonaccessed pda */ - napdas = PDAPerDisk * (numDataCol - asmap->numStripeUnitsAccessed - (ftwo == NULL ? 1 : 0)); - *nPQNodep = PDAPerDisk; - - /* sweep over the over accessed pda's, figuring out the number of - * additional pda's to generate. Of course, skip the failed ones */ - - count = 0; - for (pda_p = asmap->physInfo; pda_p; pda_p = pda_p->next) { - if ((pda_p == fone) || (pda_p == ftwo)) - continue; - suoff = rf_StripeUnitOffset(layoutPtr, pda_p->startSector); - suend = suoff + pda_p->numSector; - switch (state) { - case 1: /* one failed PDA to overlap */ - /* if a PDA doesn't contain the failed unit, it can - * only miss the start or end, not both */ - if ((suoff > fone_start) || (suend < fone_end)) - count++; - break; - case 2: /* whole stripe */ - if (suoff) /* leak at begining */ - count++; - if (suend < numDataCol) /* leak at end */ - count++; - break; - case 3: /* two disjoint units */ - if ((suoff > fone_start) || (suend < fone_end)) - count++; - if ((suoff > ftwo_start) || (suend < ftwo_end)) - count++; - break; - default: - RF_PANIC(); - } - } - - napdas += count; - *nNodep = napdas; - if (napdas == 0) - return; /* short circuit */ - - /* allocate up our list of pda's */ - - RF_CallocAndAdd(pda_p, napdas, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - *pdap = pda_p; - - /* linkem together */ - for (i = 0; i < (napdas - 1); i++) - pda_p[i].next = pda_p + (i + 1); - - /* march through the one's up to the first accessed disk */ - firstDataCol = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), asmap->physInfo->raidAddress) % numDataCol; - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - for (i = 0; i < firstDataCol; i++) { - if ((pda_p - (*pdap)) == napdas) - continue; - pda_p->type = RF_PDA_TYPE_DATA; - pda_p->raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - /* skip over dead disks */ - if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status)) - continue; - switch (state) { - case 1: /* fone */ - pda_p->numSector = fone->numSector; - pda_p->raidAddress += fone_start; - pda_p->startSector += fone_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - break; - case 2: /* full stripe */ - pda_p->numSector = secPerSU; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, secPerSU), (char *), allocList); - break; - case 3: /* two slabs */ - pda_p->numSector = fone->numSector; - pda_p->raidAddress += fone_start; - pda_p->startSector += fone_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - pda_p++; - pda_p->type = RF_PDA_TYPE_DATA; - pda_p->raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - pda_p->numSector = ftwo->numSector; - pda_p->raidAddress += ftwo_start; - pda_p->startSector += ftwo_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - break; - default: - RF_PANIC(); - } - pda_p++; - } - - /* march through the touched stripe units */ - for (phys_p = asmap->physInfo; phys_p; phys_p = phys_p->next, i++) { - if ((phys_p == asmap->failedPDAs[0]) || (phys_p == asmap->failedPDAs[1])) - continue; - suoff = rf_StripeUnitOffset(layoutPtr, phys_p->startSector); - suend = suoff + phys_p->numSector; - switch (state) { - case 1: /* single buffer */ - if (suoff > fone_start) { - RF_ASSERT(suend >= fone_end); - /* The data read starts after the mapped - * access, snip off the begining */ - pda_p->numSector = suoff - fone_start; - pda_p->raidAddress = sosAddr + (i * secPerSU) + fone_start; - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - pda_p++; - } - if (suend < fone_end) { - RF_ASSERT(suoff <= fone_start); - /* The data read stops before the end of the - * failed access, extend */ - pda_p->numSector = fone_end - suend; - pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - pda_p++; - } - break; - case 2: /* whole stripe unit */ - RF_ASSERT((suoff == 0) || (suend == secPerSU)); - if (suend < secPerSU) { /* short read, snip from end - * on */ - pda_p->numSector = secPerSU - suend; - pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - pda_p++; - } else - if (suoff > 0) { /* short at front */ - pda_p->numSector = suoff; - pda_p->raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - pda_p++; - } - break; - case 3: /* two nonoverlapping failures */ - if ((suoff > fone_start) || (suend < fone_end)) { - if (suoff > fone_start) { - RF_ASSERT(suend >= fone_end); - /* The data read starts after the - * mapped access, snip off the - * begining */ - pda_p->numSector = suoff - fone_start; - pda_p->raidAddress = sosAddr + (i * secPerSU) + fone_start; - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - pda_p++; - } - if (suend < fone_end) { - RF_ASSERT(suoff <= fone_start); - /* The data read stops before the end - * of the failed access, extend */ - pda_p->numSector = fone_end - suend; - pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - pda_p++; - } - } - if ((suoff > ftwo_start) || (suend < ftwo_end)) { - if (suoff > ftwo_start) { - RF_ASSERT(suend >= ftwo_end); - /* The data read starts after the - * mapped access, snip off the - * begining */ - pda_p->numSector = suoff - ftwo_start; - pda_p->raidAddress = sosAddr + (i * secPerSU) + ftwo_start; - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - pda_p++; - } - if (suend < ftwo_end) { - RF_ASSERT(suoff <= ftwo_start); - /* The data read stops before the end - * of the failed access, extend */ - pda_p->numSector = ftwo_end - suend; - pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - pda_p++; - } - } - break; - default: - RF_PANIC(); - } - } - - /* after the last accessed disk */ - for (; i < numDataCol; i++) { - if ((pda_p - (*pdap)) == napdas) - continue; - pda_p->type = RF_PDA_TYPE_DATA; - pda_p->raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - /* skip over dead disks */ - if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status)) - continue; - switch (state) { - case 1: /* fone */ - pda_p->numSector = fone->numSector; - pda_p->raidAddress += fone_start; - pda_p->startSector += fone_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - break; - case 2: /* full stripe */ - pda_p->numSector = secPerSU; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, secPerSU), (char *), allocList); - break; - case 3: /* two slabs */ - pda_p->numSector = fone->numSector; - pda_p->raidAddress += fone_start; - pda_p->startSector += fone_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - pda_p++; - pda_p->type = RF_PDA_TYPE_DATA; - pda_p->raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - pda_p->numSector = ftwo->numSector; - pda_p->raidAddress += ftwo_start; - pda_p->startSector += ftwo_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - break; - default: - RF_PANIC(); - } - pda_p++; - } - - RF_ASSERT(pda_p - *pdap == napdas); - return; -} -#define INIT_DISK_NODE(node,name) \ -rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 2,1,4,0, dag_h, name, allocList); \ -(node)->succedents[0] = unblockNode; \ -(node)->succedents[1] = recoveryNode; \ -(node)->antecedents[0] = blockNode; \ -(node)->antType[0] = rf_control - -#define DISK_NODE_PARAMS(_node_,_p_) \ - (_node_).params[0].p = _p_ ; \ - (_node_).params[1].p = (_p_)->bufPtr; \ - (_node_).params[2].v = parityStripeID; \ - (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru) - -void -rf_DoubleDegRead( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - char *redundantReadNodeName, - char *recoveryNodeName, - int (*recovFunc) (RF_DagNode_t *)) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *recoveryNode, *blockNode, - *unblockNode, *rpNodes, *rqNodes, *termNode; - RF_PhysDiskAddr_t *pda, *pqPDAs; - RF_PhysDiskAddr_t *npdas; - int nNodes, nRrdNodes, nRudNodes, i; - RF_ReconUnitNum_t which_ru; - int nReadNodes, nPQNodes; - RF_PhysDiskAddr_t *failedPDA = asmap->failedPDAs[0]; - RF_PhysDiskAddr_t *failedPDAtwo = asmap->failedPDAs[1]; - RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, &which_ru); - - if (rf_dagDebug) - printf("[Creating Double Degraded Read DAG]\n"); - rf_DD_GenerateFailedAccessASMs(raidPtr, asmap, &npdas, &nRrdNodes, &pqPDAs, &nPQNodes, allocList); - - nRudNodes = asmap->numStripeUnitsAccessed - (asmap->numDataFailed); - nReadNodes = nRrdNodes + nRudNodes + 2 * nPQNodes; - nNodes = 4 /* block, unblock, recovery, term */ + nReadNodes; - - RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - i = 0; - blockNode = &nodes[i]; - i += 1; - unblockNode = &nodes[i]; - i += 1; - recoveryNode = &nodes[i]; - i += 1; - termNode = &nodes[i]; - i += 1; - rudNodes = &nodes[i]; - i += nRudNodes; - rrdNodes = &nodes[i]; - i += nRrdNodes; - rpNodes = &nodes[i]; - i += nPQNodes; - rqNodes = &nodes[i]; - i += nPQNodes; - RF_ASSERT(i == nNodes); - - dag_h->numSuccedents = 1; - dag_h->succedents[0] = blockNode; - dag_h->creator = "DoubleDegRead"; - dag_h->numCommits = 0; - dag_h->numCommitNodes = 1; /* unblock */ - - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 2, 0, 0, dag_h, "Trm", allocList); - termNode->antecedents[0] = unblockNode; - termNode->antType[0] = rf_control; - termNode->antecedents[1] = recoveryNode; - termNode->antType[1] = rf_control; - - /* init the block and unblock nodes */ - /* The block node has all nodes except itself, unblock and recovery as - * successors. Similarly for predecessors of the unblock. */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nReadNodes, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nReadNodes, 0, 0, dag_h, "Nil", allocList); - - for (i = 0; i < nReadNodes; i++) { - blockNode->succedents[i] = rudNodes + i; - unblockNode->antecedents[i] = rudNodes + i; - unblockNode->antType[i] = rf_control; - } - unblockNode->succedents[0] = termNode; - - /* The recovery node has all the reads as predecessors, and the term - * node as successors. It gets a pda as a param from each of the read - * nodes plus the raidPtr. For each failed unit is has a result pda. */ - rf_InitNode(recoveryNode, rf_wait, RF_FALSE, recovFunc, rf_NullNodeUndoFunc, NULL, - 1, /* succesors */ - nReadNodes, /* preds */ - nReadNodes + 2, /* params */ - asmap->numDataFailed, /* results */ - dag_h, recoveryNodeName, allocList); - - recoveryNode->succedents[0] = termNode; - for (i = 0; i < nReadNodes; i++) { - recoveryNode->antecedents[i] = rudNodes + i; - recoveryNode->antType[i] = rf_trueData; - } - - /* build the read nodes, then come back and fill in recovery params - * and results */ - pda = asmap->physInfo; - for (i = 0; i < nRudNodes; pda = pda->next) { - if ((pda == failedPDA) || (pda == failedPDAtwo)) - continue; - INIT_DISK_NODE(rudNodes + i, "Rud"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rudNodes[i], pda); - i++; - } - - pda = npdas; - for (i = 0; i < nRrdNodes; i++, pda = pda->next) { - INIT_DISK_NODE(rrdNodes + i, "Rrd"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rrdNodes[i], pda); - } - - /* redundancy pdas */ - pda = pqPDAs; - INIT_DISK_NODE(rpNodes, "Rp"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rpNodes[0], pda); - pda++; - INIT_DISK_NODE(rqNodes, redundantReadNodeName); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rqNodes[0], pda); - if (nPQNodes == 2) { - pda++; - INIT_DISK_NODE(rpNodes + 1, "Rp"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rpNodes[1], pda); - pda++; - INIT_DISK_NODE(rqNodes + 1, redundantReadNodeName); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rqNodes[1], pda); - } - /* fill in recovery node params */ - for (i = 0; i < nReadNodes; i++) - recoveryNode->params[i] = rudNodes[i].params[0]; /* pda */ - recoveryNode->params[i++].p = (void *) raidPtr; - recoveryNode->params[i++].p = (void *) asmap; - recoveryNode->results[0] = failedPDA; - if (asmap->numDataFailed == 2) - recoveryNode->results[1] = failedPDAtwo; - - /* zero fill the target data buffers? */ -} - -#endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) */ diff --git a/sys/dev/raidframe/rf_dagdegrd.h b/sys/dev/raidframe/rf_dagdegrd.h deleted file mode 100644 index 2e899d8..0000000 --- a/sys/dev/raidframe/rf_dagdegrd.h +++ /dev/null @@ -1,64 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_dagdegrd.h,v 1.3 1999/02/05 00:06:07 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_DAGDEGRD_H_ -#define _RF__RF_DAGDEGRD_H_ - -#include <dev/raidframe/rf_types.h> - -/* degraded read DAG creation routines */ -void -rf_CreateRaidFiveDegradedReadDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); -void -rf_CreateRaidOneDegradedReadDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); -void -rf_CreateDegradedReadDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, - RF_RedFuncs_t * recFunc); -void -rf_CreateRaidCDegradedReadDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); -void -rf_DD_GenerateFailedAccessASMs(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_PhysDiskAddr_t ** pdap, - int *nNodep, RF_PhysDiskAddr_t ** pqpdap, int *nPQNodep, - RF_AllocListElem_t * allocList); -void -rf_DoubleDegRead(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, char *redundantReadNodeName, - char *recoveryNodeName, int (*recovFunc) (RF_DagNode_t *)); - -#endif /* !_RF__RF_DAGDEGRD_H_ */ diff --git a/sys/dev/raidframe/rf_dagdegwr.c b/sys/dev/raidframe/rf_dagdegwr.c deleted file mode 100644 index 70e0db6..0000000 --- a/sys/dev/raidframe/rf_dagdegwr.c +++ /dev/null @@ -1,846 +0,0 @@ -/* $NetBSD: rf_dagdegwr.c,v 1.6 2001/01/26 04:05:08 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * rf_dagdegwr.c - * - * code for creating degraded write DAGs - * - */ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_memchunk.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_dagdegwr.h> - - -/****************************************************************************** - * - * General comments on DAG creation: - * - * All DAGs in this file use roll-away error recovery. Each DAG has a single - * commit node, usually called "Cmt." If an error occurs before the Cmt node - * is reached, the execution engine will halt forward execution and work - * backward through the graph, executing the undo functions. Assuming that - * each node in the graph prior to the Cmt node are undoable and atomic - or - - * does not make changes to permanent state, the graph will fail atomically. - * If an error occurs after the Cmt node executes, the engine will roll-forward - * through the graph, blindly executing nodes until it reaches the end. - * If a graph reaches the end, it is assumed to have completed successfully. - * - * A graph has only 1 Cmt node. - * - */ - - -/****************************************************************************** - * - * The following wrappers map the standard DAG creation interface to the - * DAG creation routines. Additionally, these wrappers enable experimentation - * with new DAG structures by providing an extra level of indirection, allowing - * the DAG creation routines to be replaced at this single point. - */ - -static -RF_CREATE_DAG_FUNC_DECL(rf_CreateSimpleDegradedWriteDAG) -{ - rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, - flags, allocList, 1, rf_RecoveryXorFunc, RF_TRUE); -} - -void -rf_CreateDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *asmap; - RF_DagHeader_t *dag_h; - void *bp; - RF_RaidAccessFlags_t flags; - RF_AllocListElem_t *allocList; -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_PhysDiskAddr_t *failedPDA = asmap->failedPDAs[0]; - - RF_ASSERT(asmap->numDataFailed == 1); - dag_h->creator = "DegradedWriteDAG"; - - /* if the access writes only a portion of the failed unit, and also - * writes some portion of at least one surviving unit, we create two - * DAGs, one for the failed component and one for the non-failed - * component, and do them sequentially. Note that the fact that we're - * accessing only a portion of the failed unit indicates that the - * access either starts or ends in the failed unit, and hence we need - * create only two dags. This is inefficient in that the same data or - * parity can get read and written twice using this structure. I need - * to fix this to do the access all at once. */ - RF_ASSERT(!(asmap->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit)); - rf_CreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList); -} - - - -/****************************************************************************** - * - * DAG creation code begins here - */ - - - -/****************************************************************************** - * - * CommonCreateSimpleDegradedWriteDAG -- creates a DAG to do a degraded-mode - * write, which is as follows - * - * / {Wnq} --\ - * hdr -> blockNode -> Rod -> Xor -> Cmt -> Wnp ----> unblock -> term - * \ {Rod} / \ Wnd ---/ - * \ {Wnd} -/ - * - * commit nodes: Xor, Wnd - * - * IMPORTANT: - * This DAG generator does not work for double-degraded archs since it does not - * generate Q - * - * This dag is essentially identical to the large-write dag, except that the - * write to the failed data unit is suppressed. - * - * IMPORTANT: this dag does not work in the case where the access writes only - * a portion of the failed unit, and also writes some portion of at least one - * surviving SU. this case is handled in CreateDegradedWriteDAG above. - * - * The block & unblock nodes are leftovers from a previous version. They - * do nothing, but I haven't deleted them because it would be a tremendous - * effort to put them back in. - * - * This dag is used whenever a one of the data units in a write has failed. - * If it is the parity unit that failed, the nonredundant write dag (below) - * is used. - *****************************************************************************/ - -void -rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, - allocList, nfaults, redFunc, allowBufferRecycle) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *asmap; - RF_DagHeader_t *dag_h; - void *bp; - RF_RaidAccessFlags_t flags; - RF_AllocListElem_t *allocList; - int nfaults; - int (*redFunc) (RF_DagNode_t *); - int allowBufferRecycle; -{ - int nNodes, nRrdNodes, nWndNodes, nXorBufs, i, j, paramNum, - rdnodesFaked; - RF_DagNode_t *blockNode, *unblockNode, *wnpNode, *wnqNode, *termNode; - RF_DagNode_t *nodes, *wndNodes, *rrdNodes, *xorNode, *commitNode; - RF_SectorCount_t sectorsPerSU; - RF_ReconUnitNum_t which_ru; - char *xorTargetBuf = NULL; /* the target buffer for the XOR - * operation */ - char *overlappingPDAs;/* a temporary array of flags */ - RF_AccessStripeMapHeader_t *new_asm_h[2]; - RF_PhysDiskAddr_t *pda, *parityPDA; - RF_StripeNum_t parityStripeID; - RF_PhysDiskAddr_t *failedPDA; - RF_RaidLayout_t *layoutPtr; - - layoutPtr = &(raidPtr->Layout); - parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, - &which_ru); - sectorsPerSU = layoutPtr->sectorsPerStripeUnit; - /* failedPDA points to the pda within the asm that targets the failed - * disk */ - failedPDA = asmap->failedPDAs[0]; - - if (rf_dagDebug) - printf("[Creating degraded-write DAG]\n"); - - RF_ASSERT(asmap->numDataFailed == 1); - dag_h->creator = "SimpleDegradedWriteDAG"; - - /* - * Generate two ASMs identifying the surviving data - * we need in order to recover the lost data. - */ - /* overlappingPDAs array must be zero'd */ - RF_Calloc(overlappingPDAs, asmap->numStripeUnitsAccessed, sizeof(char), (char *)); - rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h, new_asm_h, - &nXorBufs, NULL, overlappingPDAs, allocList); - - /* create all the nodes at once */ - nWndNodes = asmap->numStripeUnitsAccessed - 1; /* no access is - * generated for the - * failed pda */ - - nRrdNodes = ((new_asm_h[0]) ? new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) + - ((new_asm_h[1]) ? new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0); - /* - * XXX - * - * There's a bug with a complete stripe overwrite- that means 0 reads - * of old data, and the rest of the DAG generation code doesn't like - * that. A release is coming, and I don't wanna risk breaking a critical - * DAG generator, so here's what I'm gonna do- if there's no read nodes, - * I'm gonna fake there being a read node, and I'm gonna swap in a - * no-op node in its place (to make all the link-up code happy). - * This should be fixed at some point. --jimz - */ - if (nRrdNodes == 0) { - nRrdNodes = 1; - rdnodesFaked = 1; - } else { - rdnodesFaked = 0; - } - /* lock, unlock, xor, Wnd, Rrd, W(nfaults) */ - nNodes = 5 + nfaults + nWndNodes + nRrdNodes; - RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), - (RF_DagNode_t *), allocList); - i = 0; - blockNode = &nodes[i]; - i += 1; - commitNode = &nodes[i]; - i += 1; - unblockNode = &nodes[i]; - i += 1; - termNode = &nodes[i]; - i += 1; - xorNode = &nodes[i]; - i += 1; - wnpNode = &nodes[i]; - i += 1; - wndNodes = &nodes[i]; - i += nWndNodes; - rrdNodes = &nodes[i]; - i += nRrdNodes; - if (nfaults == 2) { - wnqNode = &nodes[i]; - i += 1; - } else { - wnqNode = NULL; - } - RF_ASSERT(i == nNodes); - - /* this dag can not commit until all rrd and xor Nodes have completed */ - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - RF_ASSERT(nRrdNodes > 0); - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, nRrdNodes, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, nWndNodes + nfaults, 1, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, nWndNodes + nfaults, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1, - nRrdNodes, 2 * nXorBufs + 2, nfaults, dag_h, "Xrc", allocList); - - /* - * Fill in the Rrd nodes. If any of the rrd buffers are the same size as - * the failed buffer, save a pointer to it so we can use it as the target - * of the XOR. The pdas in the rrd nodes have been range-restricted, so if - * a buffer is the same size as the failed buffer, it must also be at the - * same alignment within the SU. - */ - i = 0; - if (new_asm_h[0]) { - for (i = 0, pda = new_asm_h[0]->stripeMap->physInfo; - i < new_asm_h[0]->stripeMap->numStripeUnitsAccessed; - i++, pda = pda->next) { - rf_InitNode(&rrdNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rrd", allocList); - RF_ASSERT(pda); - rrdNodes[i].params[0].p = pda; - rrdNodes[i].params[1].p = pda->bufPtr; - rrdNodes[i].params[2].v = parityStripeID; - rrdNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - } - /* i now equals the number of stripe units accessed in new_asm_h[0] */ - if (new_asm_h[1]) { - for (j = 0, pda = new_asm_h[1]->stripeMap->physInfo; - j < new_asm_h[1]->stripeMap->numStripeUnitsAccessed; - j++, pda = pda->next) { - rf_InitNode(&rrdNodes[i + j], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rrd", allocList); - RF_ASSERT(pda); - rrdNodes[i + j].params[0].p = pda; - rrdNodes[i + j].params[1].p = pda->bufPtr; - rrdNodes[i + j].params[2].v = parityStripeID; - rrdNodes[i + j].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - if (allowBufferRecycle && (pda->numSector == failedPDA->numSector)) - xorTargetBuf = pda->bufPtr; - } - } - if (rdnodesFaked) { - /* - * This is where we'll init that fake noop read node - * (XXX should the wakeup func be different?) - */ - rf_InitNode(&rrdNodes[0], rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 1, 0, 0, dag_h, "RrN", allocList); - } - /* - * Make a PDA for the parity unit. The parity PDA should start at - * the same offset into the SU as the failed PDA. - */ - /* Danner comment: I don't think this copy is really necessary. We are - * in one of two cases here. (1) The entire failed unit is written. - * Then asmap->parityInfo will describe the entire parity. (2) We are - * only writing a subset of the failed unit and nothing else. Then the - * asmap->parityInfo describes the failed unit and the copy can also - * be avoided. */ - - RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - parityPDA->row = asmap->parityInfo->row; - parityPDA->col = asmap->parityInfo->col; - parityPDA->startSector = ((asmap->parityInfo->startSector / sectorsPerSU) - * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU); - parityPDA->numSector = failedPDA->numSector; - - if (!xorTargetBuf) { - RF_CallocAndAdd(xorTargetBuf, 1, - rf_RaidAddressToByte(raidPtr, failedPDA->numSector), (char *), allocList); - } - /* init the Wnp node */ - rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnp", allocList); - wnpNode->params[0].p = parityPDA; - wnpNode->params[1].p = xorTargetBuf; - wnpNode->params[2].v = parityStripeID; - wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - - /* fill in the Wnq Node */ - if (nfaults == 2) { - { - RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t), - (RF_PhysDiskAddr_t *), allocList); - parityPDA->row = asmap->qInfo->row; - parityPDA->col = asmap->qInfo->col; - parityPDA->startSector = ((asmap->qInfo->startSector / sectorsPerSU) - * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU); - parityPDA->numSector = failedPDA->numSector; - - rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnq", allocList); - wnqNode->params[0].p = parityPDA; - RF_CallocAndAdd(xorNode->results[1], 1, - rf_RaidAddressToByte(raidPtr, failedPDA->numSector), (char *), allocList); - wnqNode->params[1].p = xorNode->results[1]; - wnqNode->params[2].v = parityStripeID; - wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - } - /* fill in the Wnd nodes */ - for (pda = asmap->physInfo, i = 0; i < nWndNodes; i++, pda = pda->next) { - if (pda == failedPDA) { - i--; - continue; - } - rf_InitNode(&wndNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList); - RF_ASSERT(pda); - wndNodes[i].params[0].p = pda; - wndNodes[i].params[1].p = pda->bufPtr; - wndNodes[i].params[2].v = parityStripeID; - wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - - /* fill in the results of the xor node */ - xorNode->results[0] = xorTargetBuf; - - /* fill in the params of the xor node */ - - paramNum = 0; - if (rdnodesFaked == 0) { - for (i = 0; i < nRrdNodes; i++) { - /* all the Rrd nodes need to be xored together */ - xorNode->params[paramNum++] = rrdNodes[i].params[0]; - xorNode->params[paramNum++] = rrdNodes[i].params[1]; - } - } - for (i = 0; i < nWndNodes; i++) { - /* any Wnd nodes that overlap the failed access need to be - * xored in */ - if (overlappingPDAs[i]) { - RF_MallocAndAdd(pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - bcopy((char *) wndNodes[i].params[0].p, (char *) pda, sizeof(RF_PhysDiskAddr_t)); - rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_DOBUFFER, 0); - xorNode->params[paramNum++].p = pda; - xorNode->params[paramNum++].p = pda->bufPtr; - } - } - RF_Free(overlappingPDAs, asmap->numStripeUnitsAccessed * sizeof(char)); - - /* - * Install the failed PDA into the xor param list so that the - * new data gets xor'd in. - */ - xorNode->params[paramNum++].p = failedPDA; - xorNode->params[paramNum++].p = failedPDA->bufPtr; - - /* - * The last 2 params to the recovery xor node are always the failed - * PDA and the raidPtr. install the failedPDA even though we have just - * done so above. This allows us to use the same XOR function for both - * degraded reads and degraded writes. - */ - xorNode->params[paramNum++].p = failedPDA; - xorNode->params[paramNum++].p = raidPtr; - RF_ASSERT(paramNum == 2 * nXorBufs + 2); - - /* - * Code to link nodes begins here - */ - - /* link header to block node */ - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - /* link block node to rd nodes */ - RF_ASSERT(blockNode->numSuccedents == nRrdNodes); - for (i = 0; i < nRrdNodes; i++) { - RF_ASSERT(rrdNodes[i].numAntecedents == 1); - blockNode->succedents[i] = &rrdNodes[i]; - rrdNodes[i].antecedents[0] = blockNode; - rrdNodes[i].antType[0] = rf_control; - } - - /* link read nodes to xor node */ - RF_ASSERT(xorNode->numAntecedents == nRrdNodes); - for (i = 0; i < nRrdNodes; i++) { - RF_ASSERT(rrdNodes[i].numSuccedents == 1); - rrdNodes[i].succedents[0] = xorNode; - xorNode->antecedents[i] = &rrdNodes[i]; - xorNode->antType[i] = rf_trueData; - } - - /* link xor node to commit node */ - RF_ASSERT(xorNode->numSuccedents == 1); - RF_ASSERT(commitNode->numAntecedents == 1); - xorNode->succedents[0] = commitNode; - commitNode->antecedents[0] = xorNode; - commitNode->antType[0] = rf_control; - - /* link commit node to wnd nodes */ - RF_ASSERT(commitNode->numSuccedents == nfaults + nWndNodes); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNodes[i].numAntecedents == 1); - commitNode->succedents[i] = &wndNodes[i]; - wndNodes[i].antecedents[0] = commitNode; - wndNodes[i].antType[0] = rf_control; - } - - /* link the commit node to wnp, wnq nodes */ - RF_ASSERT(wnpNode->numAntecedents == 1); - commitNode->succedents[nWndNodes] = wnpNode; - wnpNode->antecedents[0] = commitNode; - wnpNode->antType[0] = rf_control; - if (nfaults == 2) { - RF_ASSERT(wnqNode->numAntecedents == 1); - commitNode->succedents[nWndNodes + 1] = wnqNode; - wnqNode->antecedents[0] = commitNode; - wnqNode->antType[0] = rf_control; - } - /* link write new data nodes to unblock node */ - RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nfaults)); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNodes[i].numSuccedents == 1); - wndNodes[i].succedents[0] = unblockNode; - unblockNode->antecedents[i] = &wndNodes[i]; - unblockNode->antType[i] = rf_control; - } - - /* link write new parity node to unblock node */ - RF_ASSERT(wnpNode->numSuccedents == 1); - wnpNode->succedents[0] = unblockNode; - unblockNode->antecedents[nWndNodes] = wnpNode; - unblockNode->antType[nWndNodes] = rf_control; - - /* link write new q node to unblock node */ - if (nfaults == 2) { - RF_ASSERT(wnqNode->numSuccedents == 1); - wnqNode->succedents[0] = unblockNode; - unblockNode->antecedents[nWndNodes + 1] = wnqNode; - unblockNode->antType[nWndNodes + 1] = rf_control; - } - /* link unblock node to term node */ - RF_ASSERT(unblockNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - unblockNode->succedents[0] = termNode; - termNode->antecedents[0] = unblockNode; - termNode->antType[0] = rf_control; -} -#define CONS_PDA(if,start,num) \ - pda_p->row = asmap->if->row; pda_p->col = asmap->if->col; \ - pda_p->startSector = ((asmap->if->startSector / secPerSU) * secPerSU) + start; \ - pda_p->numSector = num; \ - pda_p->next = NULL; \ - RF_MallocAndAdd(pda_p->bufPtr,rf_RaidAddressToByte(raidPtr,num),(char *), allocList) -#if (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_EVENODD > 0) -void -rf_WriteGenerateFailedAccessASMs( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_PhysDiskAddr_t ** pdap, - int *nNodep, - RF_PhysDiskAddr_t ** pqpdap, - int *nPQNodep, - RF_AllocListElem_t * allocList) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - int PDAPerDisk, i; - RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; - int numDataCol = layoutPtr->numDataCol; - int state; - unsigned napdas; - RF_SectorNum_t fone_start, fone_end, ftwo_start = 0, ftwo_end; - RF_PhysDiskAddr_t *fone = asmap->failedPDAs[0], *ftwo = asmap->failedPDAs[1]; - RF_PhysDiskAddr_t *pda_p; - RF_RaidAddr_t sosAddr; - - /* determine how many pda's we will have to generate per unaccess - * stripe. If there is only one failed data unit, it is one; if two, - * possibly two, depending wether they overlap. */ - - fone_start = rf_StripeUnitOffset(layoutPtr, fone->startSector); - fone_end = fone_start + fone->numSector; - - if (asmap->numDataFailed == 1) { - PDAPerDisk = 1; - state = 1; - RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - pda_p = *pqpdap; - /* build p */ - CONS_PDA(parityInfo, fone_start, fone->numSector); - pda_p->type = RF_PDA_TYPE_PARITY; - pda_p++; - /* build q */ - CONS_PDA(qInfo, fone_start, fone->numSector); - pda_p->type = RF_PDA_TYPE_Q; - } else { - ftwo_start = rf_StripeUnitOffset(layoutPtr, ftwo->startSector); - ftwo_end = ftwo_start + ftwo->numSector; - if (fone->numSector + ftwo->numSector > secPerSU) { - PDAPerDisk = 1; - state = 2; - RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - pda_p = *pqpdap; - CONS_PDA(parityInfo, 0, secPerSU); - pda_p->type = RF_PDA_TYPE_PARITY; - pda_p++; - CONS_PDA(qInfo, 0, secPerSU); - pda_p->type = RF_PDA_TYPE_Q; - } else { - PDAPerDisk = 2; - state = 3; - /* four of them, fone, then ftwo */ - RF_MallocAndAdd(*pqpdap, 4 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - pda_p = *pqpdap; - CONS_PDA(parityInfo, fone_start, fone->numSector); - pda_p->type = RF_PDA_TYPE_PARITY; - pda_p++; - CONS_PDA(qInfo, fone_start, fone->numSector); - pda_p->type = RF_PDA_TYPE_Q; - pda_p++; - CONS_PDA(parityInfo, ftwo_start, ftwo->numSector); - pda_p->type = RF_PDA_TYPE_PARITY; - pda_p++; - CONS_PDA(qInfo, ftwo_start, ftwo->numSector); - pda_p->type = RF_PDA_TYPE_Q; - } - } - /* figure out number of nonaccessed pda */ - napdas = PDAPerDisk * (numDataCol - 2); - *nPQNodep = PDAPerDisk; - - *nNodep = napdas; - if (napdas == 0) - return; /* short circuit */ - - /* allocate up our list of pda's */ - - RF_CallocAndAdd(pda_p, napdas, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - *pdap = pda_p; - - /* linkem together */ - for (i = 0; i < (napdas - 1); i++) - pda_p[i].next = pda_p + (i + 1); - - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - for (i = 0; i < numDataCol; i++) { - if ((pda_p - (*pdap)) == napdas) - continue; - pda_p->type = RF_PDA_TYPE_DATA; - pda_p->raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - /* skip over dead disks */ - if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status)) - continue; - switch (state) { - case 1: /* fone */ - pda_p->numSector = fone->numSector; - pda_p->raidAddress += fone_start; - pda_p->startSector += fone_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - break; - case 2: /* full stripe */ - pda_p->numSector = secPerSU; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, secPerSU), (char *), allocList); - break; - case 3: /* two slabs */ - pda_p->numSector = fone->numSector; - pda_p->raidAddress += fone_start; - pda_p->startSector += fone_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - pda_p++; - pda_p->type = RF_PDA_TYPE_DATA; - pda_p->raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - pda_p->numSector = ftwo->numSector; - pda_p->raidAddress += ftwo_start; - pda_p->startSector += ftwo_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); - break; - default: - RF_PANIC(); - } - pda_p++; - } - - RF_ASSERT(pda_p - *pdap == napdas); - return; -} -#define DISK_NODE_PDA(node) ((node)->params[0].p) - -#define DISK_NODE_PARAMS(_node_,_p_) \ - (_node_).params[0].p = _p_ ; \ - (_node_).params[1].p = (_p_)->bufPtr; \ - (_node_).params[2].v = parityStripeID; \ - (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru) - -void -rf_DoubleDegSmallWrite( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - char *redundantReadNodeName, - char *redundantWriteNodeName, - char *recoveryNodeName, - int (*recovFunc) (RF_DagNode_t *)) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DagNode_t *nodes, *wudNodes, *rrdNodes, *recoveryNode, *blockNode, - *unblockNode, *rpNodes, *rqNodes, *wpNodes, *wqNodes, *termNode; - RF_PhysDiskAddr_t *pda, *pqPDAs; - RF_PhysDiskAddr_t *npdas; - int nWriteNodes, nNodes, nReadNodes, nRrdNodes, nWudNodes, i; - RF_ReconUnitNum_t which_ru; - int nPQNodes; - RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, &which_ru); - - /* simple small write case - First part looks like a reconstruct-read - * of the failed data units. Then a write of all data units not - * failed. */ - - - /* Hdr | ------Block- / / \ Rrd Rrd ... Rrd Rp Rq \ \ - * / -------PQ----- / \ \ Wud Wp WQ \ | / - * --Unblock- | T - * - * Rrd = read recovery data (potentially none) Wud = write user data - * (not incl. failed disks) Wp = Write P (could be two) Wq = Write Q - * (could be two) - * - */ - - rf_WriteGenerateFailedAccessASMs(raidPtr, asmap, &npdas, &nRrdNodes, &pqPDAs, &nPQNodes, allocList); - - RF_ASSERT(asmap->numDataFailed == 1); - - nWudNodes = asmap->numStripeUnitsAccessed - (asmap->numDataFailed); - nReadNodes = nRrdNodes + 2 * nPQNodes; - nWriteNodes = nWudNodes + 2 * nPQNodes; - nNodes = 4 + nReadNodes + nWriteNodes; - - RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - blockNode = nodes; - unblockNode = blockNode + 1; - termNode = unblockNode + 1; - recoveryNode = termNode + 1; - rrdNodes = recoveryNode + 1; - rpNodes = rrdNodes + nRrdNodes; - rqNodes = rpNodes + nPQNodes; - wudNodes = rqNodes + nPQNodes; - wpNodes = wudNodes + nWudNodes; - wqNodes = wpNodes + nPQNodes; - - dag_h->creator = "PQ_DDSimpleSmallWrite"; - dag_h->numSuccedents = 1; - dag_h->succedents[0] = blockNode; - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - termNode->antecedents[0] = unblockNode; - termNode->antType[0] = rf_control; - - /* init the block and unblock nodes */ - /* The block node has all the read nodes as successors */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nReadNodes, 0, 0, 0, dag_h, "Nil", allocList); - for (i = 0; i < nReadNodes; i++) - blockNode->succedents[i] = rrdNodes + i; - - /* The unblock node has all the writes as successors */ - rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nWriteNodes, 0, 0, dag_h, "Nil", allocList); - for (i = 0; i < nWriteNodes; i++) { - unblockNode->antecedents[i] = wudNodes + i; - unblockNode->antType[i] = rf_control; - } - unblockNode->succedents[0] = termNode; - -#define INIT_READ_NODE(node,name) \ - rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, allocList); \ - (node)->succedents[0] = recoveryNode; \ - (node)->antecedents[0] = blockNode; \ - (node)->antType[0] = rf_control; - - /* build the read nodes */ - pda = npdas; - for (i = 0; i < nRrdNodes; i++, pda = pda->next) { - INIT_READ_NODE(rrdNodes + i, "rrd"); - DISK_NODE_PARAMS(rrdNodes[i], pda); - } - - /* read redundancy pdas */ - pda = pqPDAs; - INIT_READ_NODE(rpNodes, "Rp"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rpNodes[0], pda); - pda++; - INIT_READ_NODE(rqNodes, redundantReadNodeName); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rqNodes[0], pda); - if (nPQNodes == 2) { - pda++; - INIT_READ_NODE(rpNodes + 1, "Rp"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rpNodes[1], pda); - pda++; - INIT_READ_NODE(rqNodes + 1, redundantReadNodeName); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rqNodes[1], pda); - } - /* the recovery node has all reads as precedessors and all writes as - * successors. It generates a result for every write P or write Q - * node. As parameters, it takes a pda per read and a pda per stripe - * of user data written. It also takes as the last params the raidPtr - * and asm. For results, it takes PDA for P & Q. */ - - - rf_InitNode(recoveryNode, rf_wait, RF_FALSE, recovFunc, rf_NullNodeUndoFunc, NULL, - nWriteNodes, /* succesors */ - nReadNodes, /* preds */ - nReadNodes + nWudNodes + 3, /* params */ - 2 * nPQNodes, /* results */ - dag_h, recoveryNodeName, allocList); - - - - for (i = 0; i < nReadNodes; i++) { - recoveryNode->antecedents[i] = rrdNodes + i; - recoveryNode->antType[i] = rf_control; - recoveryNode->params[i].p = DISK_NODE_PDA(rrdNodes + i); - } - for (i = 0; i < nWudNodes; i++) { - recoveryNode->succedents[i] = wudNodes + i; - } - recoveryNode->params[nReadNodes + nWudNodes].p = asmap->failedPDAs[0]; - recoveryNode->params[nReadNodes + nWudNodes + 1].p = raidPtr; - recoveryNode->params[nReadNodes + nWudNodes + 2].p = asmap; - - for (; i < nWriteNodes; i++) - recoveryNode->succedents[i] = wudNodes + i; - - pda = pqPDAs; - recoveryNode->results[0] = pda; - pda++; - recoveryNode->results[1] = pda; - if (nPQNodes == 2) { - pda++; - recoveryNode->results[2] = pda; - pda++; - recoveryNode->results[3] = pda; - } - /* fill writes */ -#define INIT_WRITE_NODE(node,name) \ - rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, allocList); \ - (node)->succedents[0] = unblockNode; \ - (node)->antecedents[0] = recoveryNode; \ - (node)->antType[0] = rf_control; - - pda = asmap->physInfo; - for (i = 0; i < nWudNodes; i++) { - INIT_WRITE_NODE(wudNodes + i, "Wd"); - DISK_NODE_PARAMS(wudNodes[i], pda); - recoveryNode->params[nReadNodes + i].p = DISK_NODE_PDA(wudNodes + i); - pda = pda->next; - } - /* write redundancy pdas */ - pda = pqPDAs; - INIT_WRITE_NODE(wpNodes, "Wp"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(wpNodes[0], pda); - pda++; - INIT_WRITE_NODE(wqNodes, "Wq"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(wqNodes[0], pda); - if (nPQNodes == 2) { - pda++; - INIT_WRITE_NODE(wpNodes + 1, "Wp"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(wpNodes[1], pda); - pda++; - INIT_WRITE_NODE(wqNodes + 1, "Wq"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(wqNodes[1], pda); - } -} -#endif /* (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_EVENODD > 0) */ diff --git a/sys/dev/raidframe/rf_dagdegwr.h b/sys/dev/raidframe/rf_dagdegwr.h deleted file mode 100644 index 1e4b5e2..0000000 --- a/sys/dev/raidframe/rf_dagdegwr.h +++ /dev/null @@ -1,55 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_dagdegwr.h,v 1.4 1999/08/15 02:36:03 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - - -#ifndef _RF__RF_DAGDEGWR_H_ -#define _RF__RF_DAGDEGWR_H_ - -/* degraded write DAG creation routines */ -void rf_CreateDegradedWriteDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); - -void rf_CommonCreateSimpleDegradedWriteDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, - int nfaults, int (*redFunc) (RF_DagNode_t *), int allowBufferRecycle); - -void rf_WriteGenerateFailedAccessASMs(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_PhysDiskAddr_t ** pdap, - int *nNodep, RF_PhysDiskAddr_t ** pqpdap, - int *nPQNodep, RF_AllocListElem_t * allocList); - -void rf_DoubleDegSmallWrite(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, char *redundantReadNodeName, - char *redundantWriteNodeName, char *recoveryNodeName, - int (*recovFunc) (RF_DagNode_t *)); - -#endif /* !_RF__RF_DAGDEGWR_H_ */ diff --git a/sys/dev/raidframe/rf_dagffrd.c b/sys/dev/raidframe/rf_dagffrd.c deleted file mode 100644 index 13c0af7..0000000 --- a/sys/dev/raidframe/rf_dagffrd.c +++ /dev/null @@ -1,441 +0,0 @@ -/* $NetBSD: rf_dagffrd.c,v 1.4 2000/01/07 03:40:58 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * rf_dagffrd.c - * - * code for creating fault-free read DAGs - * - */ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_memchunk.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_dagffrd.h> - -/****************************************************************************** - * - * General comments on DAG creation: - * - * All DAGs in this file use roll-away error recovery. Each DAG has a single - * commit node, usually called "Cmt." If an error occurs before the Cmt node - * is reached, the execution engine will halt forward execution and work - * backward through the graph, executing the undo functions. Assuming that - * each node in the graph prior to the Cmt node are undoable and atomic - or - - * does not make changes to permanent state, the graph will fail atomically. - * If an error occurs after the Cmt node executes, the engine will roll-forward - * through the graph, blindly executing nodes until it reaches the end. - * If a graph reaches the end, it is assumed to have completed successfully. - * - * A graph has only 1 Cmt node. - * - */ - - -/****************************************************************************** - * - * The following wrappers map the standard DAG creation interface to the - * DAG creation routines. Additionally, these wrappers enable experimentation - * with new DAG structures by providing an extra level of indirection, allowing - * the DAG creation routines to be replaced at this single point. - */ - -void -rf_CreateFaultFreeReadDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) -{ - rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - RF_IO_TYPE_READ); -} - - -/****************************************************************************** - * - * DAG creation code begins here - */ - -/****************************************************************************** - * - * creates a DAG to perform a nonredundant read or write of data within one - * stripe. - * For reads, this DAG is as follows: - * - * /---- read ----\ - * Header -- Block ---- read ---- Commit -- Terminate - * \---- read ----/ - * - * For writes, this DAG is as follows: - * - * /---- write ----\ - * Header -- Commit ---- write ---- Block -- Terminate - * \---- write ----/ - * - * There is one disk node per stripe unit accessed, and all disk nodes are in - * parallel. - * - * Tricky point here: The first disk node (read or write) is created - * normally. Subsequent disk nodes are created by copying the first one, - * and modifying a few params. The "succedents" and "antecedents" fields are - * _not_ re-created in each node, but rather left pointing to the same array - * that was malloc'd when the first node was created. Thus, it's essential - * that when this DAG is freed, the succedents and antecedents fields be freed - * in ONLY ONE of the read nodes. This does not apply to the "params" field - * because it is recreated for each READ node. - * - * Note that normal-priority accesses do not need to be tagged with their - * parity stripe ID, because they will never be promoted. Hence, I've - * commented-out the code to do this, and marked it with UNNEEDED. - * - *****************************************************************************/ - -void -rf_CreateNonredundantDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - RF_IoType_t type) -{ - RF_DagNode_t *nodes, *diskNodes, *blockNode, *commitNode, *termNode; - RF_PhysDiskAddr_t *pda = asmap->physInfo; - int (*doFunc) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *); - int i, n, totalNumNodes; - char *name; - - n = asmap->numStripeUnitsAccessed; - dag_h->creator = "NonredundantDAG"; - - RF_ASSERT(RF_IO_IS_R_OR_W(type)); - switch (type) { - case RF_IO_TYPE_READ: - doFunc = rf_DiskReadFunc; - undoFunc = rf_DiskReadUndoFunc; - name = "R "; - if (rf_dagDebug) - printf("[Creating non-redundant read DAG]\n"); - break; - case RF_IO_TYPE_WRITE: - doFunc = rf_DiskWriteFunc; - undoFunc = rf_DiskWriteUndoFunc; - name = "W "; - if (rf_dagDebug) - printf("[Creating non-redundant write DAG]\n"); - break; - default: - RF_PANIC(); - } - - /* - * For reads, the dag can not commit until the block node is reached. - * for writes, the dag commits immediately. - */ - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* - * Node count: - * 1 block node - * n data reads (or writes) - * 1 commit node - * 1 terminator node - */ - RF_ASSERT(n > 0); - totalNumNodes = n + 3; - RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), - (RF_DagNode_t *), allocList); - i = 0; - diskNodes = &nodes[i]; - i += n; - blockNode = &nodes[i]; - i += 1; - commitNode = &nodes[i]; - i += 1; - termNode = &nodes[i]; - i += 1; - RF_ASSERT(i == totalNumNodes); - - /* initialize nodes */ - switch (type) { - case RF_IO_TYPE_READ: - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, n, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, n, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - break; - case RF_IO_TYPE_WRITE: - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, n, 1, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, n, 0, 0, dag_h, "Trm", allocList); - break; - default: - RF_PANIC(); - } - - for (i = 0; i < n; i++) { - RF_ASSERT(pda != NULL); - rf_InitNode(&diskNodes[i], rf_wait, RF_FALSE, doFunc, undoFunc, rf_GenericWakeupFunc, - 1, 1, 4, 0, dag_h, name, allocList); - diskNodes[i].params[0].p = pda; - diskNodes[i].params[1].p = pda->bufPtr; - /* parity stripe id is not necessary */ - diskNodes[i].params[2].v = 0; - diskNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); - pda = pda->next; - } - - /* - * Connect nodes. - */ - - /* connect hdr to block node */ - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - if (type == RF_IO_TYPE_READ) { - /* connecting a nonredundant read DAG */ - RF_ASSERT(blockNode->numSuccedents == n); - RF_ASSERT(commitNode->numAntecedents == n); - for (i = 0; i < n; i++) { - /* connect block node to each read node */ - RF_ASSERT(diskNodes[i].numAntecedents == 1); - blockNode->succedents[i] = &diskNodes[i]; - diskNodes[i].antecedents[0] = blockNode; - diskNodes[i].antType[0] = rf_control; - - /* connect each read node to the commit node */ - RF_ASSERT(diskNodes[i].numSuccedents == 1); - diskNodes[i].succedents[0] = commitNode; - commitNode->antecedents[i] = &diskNodes[i]; - commitNode->antType[i] = rf_control; - } - /* connect the commit node to the term node */ - RF_ASSERT(commitNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - commitNode->succedents[0] = termNode; - termNode->antecedents[0] = commitNode; - termNode->antType[0] = rf_control; - } else { - /* connecting a nonredundant write DAG */ - /* connect the block node to the commit node */ - RF_ASSERT(blockNode->numSuccedents == 1); - RF_ASSERT(commitNode->numAntecedents == 1); - blockNode->succedents[0] = commitNode; - commitNode->antecedents[0] = blockNode; - commitNode->antType[0] = rf_control; - - RF_ASSERT(commitNode->numSuccedents == n); - RF_ASSERT(termNode->numAntecedents == n); - RF_ASSERT(termNode->numSuccedents == 0); - for (i = 0; i < n; i++) { - /* connect the commit node to each write node */ - RF_ASSERT(diskNodes[i].numAntecedents == 1); - commitNode->succedents[i] = &diskNodes[i]; - diskNodes[i].antecedents[0] = commitNode; - diskNodes[i].antType[0] = rf_control; - - /* connect each write node to the term node */ - RF_ASSERT(diskNodes[i].numSuccedents == 1); - diskNodes[i].succedents[0] = termNode; - termNode->antecedents[i] = &diskNodes[i]; - termNode->antType[i] = rf_control; - } - } -} -/****************************************************************************** - * Create a fault-free read DAG for RAID level 1 - * - * Hdr -> Nil -> Rmir -> Cmt -> Trm - * - * The "Rmir" node schedules a read from the disk in the mirror pair with the - * shortest disk queue. the proper queue is selected at Rmir execution. this - * deferred mapping is unlike other archs in RAIDframe which generally fix - * mapping at DAG creation time. - * - * Parameters: raidPtr - description of the physical array - * asmap - logical & physical addresses for this access - * bp - buffer ptr (for holding read data) - * flags - general flags (e.g. disk locking) - * allocList - list of memory allocated in DAG creation - *****************************************************************************/ - -static void -CreateMirrorReadDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - int (*readfunc) (RF_DagNode_t * node)) -{ - RF_DagNode_t *readNodes, *nodes, *blockNode, *commitNode, *termNode; - RF_PhysDiskAddr_t *data_pda = asmap->physInfo; - RF_PhysDiskAddr_t *parity_pda = asmap->parityInfo; - int i, n, totalNumNodes; - - n = asmap->numStripeUnitsAccessed; - dag_h->creator = "RaidOneReadDAG"; - if (rf_dagDebug) { - printf("[Creating RAID level 1 read DAG]\n"); - } - /* - * This dag can not commit until the commit node is reached - * errors prior to the commit point imply the dag has failed. - */ - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* - * Node count: - * n data reads - * 1 block node - * 1 commit node - * 1 terminator node - */ - RF_ASSERT(n > 0); - totalNumNodes = n + 3; - RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), - (RF_DagNode_t *), allocList); - i = 0; - readNodes = &nodes[i]; - i += n; - blockNode = &nodes[i]; - i += 1; - commitNode = &nodes[i]; - i += 1; - termNode = &nodes[i]; - i += 1; - RF_ASSERT(i == totalNumNodes); - - /* initialize nodes */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, - rf_NullNodeUndoFunc, NULL, n, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, - rf_NullNodeUndoFunc, NULL, 1, n, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, - rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - - for (i = 0; i < n; i++) { - RF_ASSERT(data_pda != NULL); - RF_ASSERT(parity_pda != NULL); - rf_InitNode(&readNodes[i], rf_wait, RF_FALSE, readfunc, - rf_DiskReadMirrorUndoFunc, rf_GenericWakeupFunc, 1, 1, 5, 0, dag_h, - "Rmir", allocList); - readNodes[i].params[0].p = data_pda; - readNodes[i].params[1].p = data_pda->bufPtr; - /* parity stripe id is not necessary */ - readNodes[i].params[2].p = 0; - readNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); - readNodes[i].params[4].p = parity_pda; - data_pda = data_pda->next; - parity_pda = parity_pda->next; - } - - /* - * Connect nodes - */ - - /* connect hdr to block node */ - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - /* connect block node to read nodes */ - RF_ASSERT(blockNode->numSuccedents == n); - for (i = 0; i < n; i++) { - RF_ASSERT(readNodes[i].numAntecedents == 1); - blockNode->succedents[i] = &readNodes[i]; - readNodes[i].antecedents[0] = blockNode; - readNodes[i].antType[0] = rf_control; - } - - /* connect read nodes to commit node */ - RF_ASSERT(commitNode->numAntecedents == n); - for (i = 0; i < n; i++) { - RF_ASSERT(readNodes[i].numSuccedents == 1); - readNodes[i].succedents[0] = commitNode; - commitNode->antecedents[i] = &readNodes[i]; - commitNode->antType[i] = rf_control; - } - - /* connect commit node to term node */ - RF_ASSERT(commitNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - commitNode->succedents[0] = termNode; - termNode->antecedents[0] = commitNode; - termNode->antType[0] = rf_control; -} - -void -rf_CreateMirrorIdleReadDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) -{ - CreateMirrorReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - rf_DiskReadMirrorIdleFunc); -} - -void -rf_CreateMirrorPartitionReadDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) -{ - CreateMirrorReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - rf_DiskReadMirrorPartitionFunc); -} diff --git a/sys/dev/raidframe/rf_dagffrd.h b/sys/dev/raidframe/rf_dagffrd.h deleted file mode 100644 index 6862a8d..0000000 --- a/sys/dev/raidframe/rf_dagffrd.h +++ /dev/null @@ -1,53 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_dagffrd.h,v 1.3 1999/02/05 00:06:07 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_DAGFFRD_H_ -#define _RF__RF_DAGFFRD_H_ - -#include <dev/raidframe/rf_types.h> - -/* fault-free read DAG creation routines */ -void -rf_CreateFaultFreeReadDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList); -void -rf_CreateNonredundantDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, RF_IoType_t type); -void -rf_CreateMirrorIdleReadDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); -void -rf_CreateMirrorPartitionReadDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); - -#endif /* !_RF__RF_DAGFFRD_H_ */ diff --git a/sys/dev/raidframe/rf_dagffwr.c b/sys/dev/raidframe/rf_dagffwr.c deleted file mode 100644 index 9216b29..0000000 --- a/sys/dev/raidframe/rf_dagffwr.c +++ /dev/null @@ -1,2131 +0,0 @@ -/* $NetBSD: rf_dagffwr.c,v 1.5 2000/01/07 03:40:58 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * rf_dagff.c - * - * code for creating fault-free DAGs - * - */ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_dagffrd.h> -#include <dev/raidframe/rf_memchunk.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_dagffwr.h> - -/****************************************************************************** - * - * General comments on DAG creation: - * - * All DAGs in this file use roll-away error recovery. Each DAG has a single - * commit node, usually called "Cmt." If an error occurs before the Cmt node - * is reached, the execution engine will halt forward execution and work - * backward through the graph, executing the undo functions. Assuming that - * each node in the graph prior to the Cmt node are undoable and atomic - or - - * does not make changes to permanent state, the graph will fail atomically. - * If an error occurs after the Cmt node executes, the engine will roll-forward - * through the graph, blindly executing nodes until it reaches the end. - * If a graph reaches the end, it is assumed to have completed successfully. - * - * A graph has only 1 Cmt node. - * - */ - - -/****************************************************************************** - * - * The following wrappers map the standard DAG creation interface to the - * DAG creation routines. Additionally, these wrappers enable experimentation - * with new DAG structures by providing an extra level of indirection, allowing - * the DAG creation routines to be replaced at this single point. - */ - - -void -rf_CreateNonRedundantWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - RF_IoType_t type) -{ - rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - RF_IO_TYPE_WRITE); -} - -void -rf_CreateRAID0WriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - RF_IoType_t type) -{ - rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - RF_IO_TYPE_WRITE); -} - -void -rf_CreateSmallWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) -{ - /* "normal" rollaway */ - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - &rf_xorFuncs, NULL); -} - -void -rf_CreateLargeWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) -{ - /* "normal" rollaway */ - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - 1, rf_RegularXorFunc, RF_TRUE); -} - - -/****************************************************************************** - * - * DAG creation code begins here - */ - - -/****************************************************************************** - * - * creates a DAG to perform a large-write operation: - * - * / Rod \ / Wnd \ - * H -- block- Rod - Xor - Cmt - Wnd --- T - * \ Rod / \ Wnp / - * \[Wnq]/ - * - * The XOR node also does the Q calculation in the P+Q architecture. - * All nodes are before the commit node (Cmt) are assumed to be atomic and - * undoable - or - they make no changes to permanent state. - * - * Rod = read old data - * Cmt = commit node - * Wnp = write new parity - * Wnd = write new data - * Wnq = write new "q" - * [] denotes optional segments in the graph - * - * Parameters: raidPtr - description of the physical array - * asmap - logical & physical addresses for this access - * bp - buffer ptr (holds write data) - * flags - general flags (e.g. disk locking) - * allocList - list of memory allocated in DAG creation - * nfaults - number of faults array can tolerate - * (equal to # redundancy units in stripe) - * redfuncs - list of redundancy generating functions - * - *****************************************************************************/ - -void -rf_CommonCreateLargeWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - int nfaults, - int (*redFunc) (RF_DagNode_t *), - int allowBufferRecycle) -{ - RF_DagNode_t *nodes, *wndNodes, *rodNodes, *xorNode, *wnpNode; - RF_DagNode_t *wnqNode, *blockNode, *commitNode, *termNode; - int nWndNodes, nRodNodes, i, nodeNum, asmNum; - RF_AccessStripeMapHeader_t *new_asm_h[2]; - RF_StripeNum_t parityStripeID; - char *sosBuffer, *eosBuffer; - RF_ReconUnitNum_t which_ru; - RF_RaidLayout_t *layoutPtr; - RF_PhysDiskAddr_t *pda; - - layoutPtr = &(raidPtr->Layout); - parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, - &which_ru); - - if (rf_dagDebug) { - printf("[Creating large-write DAG]\n"); - } - dag_h->creator = "LargeWriteDAG"; - - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* alloc the nodes: Wnd, xor, commit, block, term, and Wnp */ - nWndNodes = asmap->numStripeUnitsAccessed; - RF_CallocAndAdd(nodes, nWndNodes + 4 + nfaults, sizeof(RF_DagNode_t), - (RF_DagNode_t *), allocList); - i = 0; - wndNodes = &nodes[i]; - i += nWndNodes; - xorNode = &nodes[i]; - i += 1; - wnpNode = &nodes[i]; - i += 1; - blockNode = &nodes[i]; - i += 1; - commitNode = &nodes[i]; - i += 1; - termNode = &nodes[i]; - i += 1; - if (nfaults == 2) { - wnqNode = &nodes[i]; - i += 1; - } else { - wnqNode = NULL; - } - rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h, new_asm_h, - &nRodNodes, &sosBuffer, &eosBuffer, allocList); - if (nRodNodes > 0) { - RF_CallocAndAdd(rodNodes, nRodNodes, sizeof(RF_DagNode_t), - (RF_DagNode_t *), allocList); - } else { - rodNodes = NULL; - } - - /* begin node initialization */ - if (nRodNodes > 0) { - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, nRodNodes, 0, 0, 0, dag_h, "Nil", allocList); - } else { - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); - } - - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, - nWndNodes + nfaults, 1, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, - 0, nWndNodes + nfaults, 0, 0, dag_h, "Trm", allocList); - - /* initialize the Rod nodes */ - for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) { - if (new_asm_h[asmNum]) { - pda = new_asm_h[asmNum]->stripeMap->physInfo; - while (pda) { - rf_InitNode(&rodNodes[nodeNum], rf_wait, RF_FALSE, rf_DiskReadFunc, - rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, - "Rod", allocList); - rodNodes[nodeNum].params[0].p = pda; - rodNodes[nodeNum].params[1].p = pda->bufPtr; - rodNodes[nodeNum].params[2].v = parityStripeID; - rodNodes[nodeNum].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, 0, which_ru); - nodeNum++; - pda = pda->next; - } - } - } - RF_ASSERT(nodeNum == nRodNodes); - - /* initialize the wnd nodes */ - pda = asmap->physInfo; - for (i = 0; i < nWndNodes; i++) { - rf_InitNode(&wndNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList); - RF_ASSERT(pda != NULL); - wndNodes[i].params[0].p = pda; - wndNodes[i].params[1].p = pda->bufPtr; - wndNodes[i].params[2].v = parityStripeID; - wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - pda = pda->next; - } - - /* initialize the redundancy node */ - if (nRodNodes > 0) { - rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1, - nRodNodes, 2 * (nWndNodes + nRodNodes) + 1, nfaults, dag_h, - "Xr ", allocList); - } else { - rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1, - 1, 2 * (nWndNodes + nRodNodes) + 1, nfaults, dag_h, "Xr ", allocList); - } - xorNode->flags |= RF_DAGNODE_FLAG_YIELD; - for (i = 0; i < nWndNodes; i++) { - xorNode->params[2 * i + 0] = wndNodes[i].params[0]; /* pda */ - xorNode->params[2 * i + 1] = wndNodes[i].params[1]; /* buf ptr */ - } - for (i = 0; i < nRodNodes; i++) { - xorNode->params[2 * (nWndNodes + i) + 0] = rodNodes[i].params[0]; /* pda */ - xorNode->params[2 * (nWndNodes + i) + 1] = rodNodes[i].params[1]; /* buf ptr */ - } - /* xor node needs to get at RAID information */ - xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr; - - /* - * Look for an Rod node that reads a complete SU. If none, alloc a buffer - * to receive the parity info. Note that we can't use a new data buffer - * because it will not have gotten written when the xor occurs. - */ - if (allowBufferRecycle) { - for (i = 0; i < nRodNodes; i++) { - if (((RF_PhysDiskAddr_t *) rodNodes[i].params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit) - break; - } - } - if ((!allowBufferRecycle) || (i == nRodNodes)) { - RF_CallocAndAdd(xorNode->results[0], 1, - rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), - (void *), allocList); - } else { - xorNode->results[0] = rodNodes[i].params[1].p; - } - - /* initialize the Wnp node */ - rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnp", allocList); - wnpNode->params[0].p = asmap->parityInfo; - wnpNode->params[1].p = xorNode->results[0]; - wnpNode->params[2].v = parityStripeID; - wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - /* parityInfo must describe entire parity unit */ - RF_ASSERT(asmap->parityInfo->next == NULL); - - if (nfaults == 2) { - /* - * We never try to recycle a buffer for the Q calcuation - * in addition to the parity. This would cause two buffers - * to get smashed during the P and Q calculation, guaranteeing - * one would be wrong. - */ - RF_CallocAndAdd(xorNode->results[1], 1, - rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), - (void *), allocList); - rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnq", allocList); - wnqNode->params[0].p = asmap->qInfo; - wnqNode->params[1].p = xorNode->results[1]; - wnqNode->params[2].v = parityStripeID; - wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - /* parityInfo must describe entire parity unit */ - RF_ASSERT(asmap->parityInfo->next == NULL); - } - /* - * Connect nodes to form graph. - */ - - /* connect dag header to block node */ - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - if (nRodNodes > 0) { - /* connect the block node to the Rod nodes */ - RF_ASSERT(blockNode->numSuccedents == nRodNodes); - RF_ASSERT(xorNode->numAntecedents == nRodNodes); - for (i = 0; i < nRodNodes; i++) { - RF_ASSERT(rodNodes[i].numAntecedents == 1); - blockNode->succedents[i] = &rodNodes[i]; - rodNodes[i].antecedents[0] = blockNode; - rodNodes[i].antType[0] = rf_control; - - /* connect the Rod nodes to the Xor node */ - RF_ASSERT(rodNodes[i].numSuccedents == 1); - rodNodes[i].succedents[0] = xorNode; - xorNode->antecedents[i] = &rodNodes[i]; - xorNode->antType[i] = rf_trueData; - } - } else { - /* connect the block node to the Xor node */ - RF_ASSERT(blockNode->numSuccedents == 1); - RF_ASSERT(xorNode->numAntecedents == 1); - blockNode->succedents[0] = xorNode; - xorNode->antecedents[0] = blockNode; - xorNode->antType[0] = rf_control; - } - - /* connect the xor node to the commit node */ - RF_ASSERT(xorNode->numSuccedents == 1); - RF_ASSERT(commitNode->numAntecedents == 1); - xorNode->succedents[0] = commitNode; - commitNode->antecedents[0] = xorNode; - commitNode->antType[0] = rf_control; - - /* connect the commit node to the write nodes */ - RF_ASSERT(commitNode->numSuccedents == nWndNodes + nfaults); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNodes->numAntecedents == 1); - commitNode->succedents[i] = &wndNodes[i]; - wndNodes[i].antecedents[0] = commitNode; - wndNodes[i].antType[0] = rf_control; - } - RF_ASSERT(wnpNode->numAntecedents == 1); - commitNode->succedents[nWndNodes] = wnpNode; - wnpNode->antecedents[0] = commitNode; - wnpNode->antType[0] = rf_trueData; - if (nfaults == 2) { - RF_ASSERT(wnqNode->numAntecedents == 1); - commitNode->succedents[nWndNodes + 1] = wnqNode; - wnqNode->antecedents[0] = commitNode; - wnqNode->antType[0] = rf_trueData; - } - /* connect the write nodes to the term node */ - RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults); - RF_ASSERT(termNode->numSuccedents == 0); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNodes->numSuccedents == 1); - wndNodes[i].succedents[0] = termNode; - termNode->antecedents[i] = &wndNodes[i]; - termNode->antType[i] = rf_control; - } - RF_ASSERT(wnpNode->numSuccedents == 1); - wnpNode->succedents[0] = termNode; - termNode->antecedents[nWndNodes] = wnpNode; - termNode->antType[nWndNodes] = rf_control; - if (nfaults == 2) { - RF_ASSERT(wnqNode->numSuccedents == 1); - wnqNode->succedents[0] = termNode; - termNode->antecedents[nWndNodes + 1] = wnqNode; - termNode->antType[nWndNodes + 1] = rf_control; - } -} -/****************************************************************************** - * - * creates a DAG to perform a small-write operation (either raid 5 or pq), - * which is as follows: - * - * Hdr -> Nil -> Rop -> Xor -> Cmt ----> Wnp [Unp] --> Trm - * \- Rod X / \----> Wnd [Und]-/ - * [\- Rod X / \---> Wnd [Und]-/] - * [\- Roq -> Q / \--> Wnq [Unq]-/] - * - * Rop = read old parity - * Rod = read old data - * Roq = read old "q" - * Cmt = commit node - * Und = unlock data disk - * Unp = unlock parity disk - * Unq = unlock q disk - * Wnp = write new parity - * Wnd = write new data - * Wnq = write new "q" - * [ ] denotes optional segments in the graph - * - * Parameters: raidPtr - description of the physical array - * asmap - logical & physical addresses for this access - * bp - buffer ptr (holds write data) - * flags - general flags (e.g. disk locking) - * allocList - list of memory allocated in DAG creation - * pfuncs - list of parity generating functions - * qfuncs - list of q generating functions - * - * A null qfuncs indicates single fault tolerant - *****************************************************************************/ - -void -rf_CommonCreateSmallWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - RF_RedFuncs_t * pfuncs, - RF_RedFuncs_t * qfuncs) -{ - RF_DagNode_t *readDataNodes, *readParityNodes, *readQNodes, *termNode; - RF_DagNode_t *unlockDataNodes, *unlockParityNodes, *unlockQNodes; - RF_DagNode_t *xorNodes, *qNodes, *blockNode, *commitNode, *nodes; - RF_DagNode_t *writeDataNodes, *writeParityNodes, *writeQNodes; - int i, j, nNodes, totalNumNodes, lu_flag; - RF_ReconUnitNum_t which_ru; - int (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *); - int (*qfunc) (RF_DagNode_t *); - int numDataNodes, numParityNodes; - RF_StripeNum_t parityStripeID; - RF_PhysDiskAddr_t *pda; - char *name, *qname; - long nfaults; - - nfaults = qfuncs ? 2 : 1; - lu_flag = (rf_enableAtomicRMW) ? 1 : 0; /* lock/unlock flag */ - - parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), - asmap->raidAddress, &which_ru); - pda = asmap->physInfo; - numDataNodes = asmap->numStripeUnitsAccessed; - numParityNodes = (asmap->parityInfo->next) ? 2 : 1; - - if (rf_dagDebug) { - printf("[Creating small-write DAG]\n"); - } - RF_ASSERT(numDataNodes > 0); - dag_h->creator = "SmallWriteDAG"; - - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* - * DAG creation occurs in four steps: - * 1. count the number of nodes in the DAG - * 2. create the nodes - * 3. initialize the nodes - * 4. connect the nodes - */ - - /* - * Step 1. compute number of nodes in the graph - */ - - /* number of nodes: a read and write for each data unit a redundancy - * computation node for each parity node (nfaults * nparity) a read - * and write for each parity unit a block and commit node (2) a - * terminate node if atomic RMW an unlock node for each data unit, - * redundancy unit */ - totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes) - + (nfaults * 2 * numParityNodes) + 3; - if (lu_flag) { - totalNumNodes += (numDataNodes + (nfaults * numParityNodes)); - } - /* - * Step 2. create the nodes - */ - RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), - (RF_DagNode_t *), allocList); - i = 0; - blockNode = &nodes[i]; - i += 1; - commitNode = &nodes[i]; - i += 1; - readDataNodes = &nodes[i]; - i += numDataNodes; - readParityNodes = &nodes[i]; - i += numParityNodes; - writeDataNodes = &nodes[i]; - i += numDataNodes; - writeParityNodes = &nodes[i]; - i += numParityNodes; - xorNodes = &nodes[i]; - i += numParityNodes; - termNode = &nodes[i]; - i += 1; - if (lu_flag) { - unlockDataNodes = &nodes[i]; - i += numDataNodes; - unlockParityNodes = &nodes[i]; - i += numParityNodes; - } else { - unlockDataNodes = unlockParityNodes = NULL; - } - if (nfaults == 2) { - readQNodes = &nodes[i]; - i += numParityNodes; - writeQNodes = &nodes[i]; - i += numParityNodes; - qNodes = &nodes[i]; - i += numParityNodes; - if (lu_flag) { - unlockQNodes = &nodes[i]; - i += numParityNodes; - } else { - unlockQNodes = NULL; - } - } else { - readQNodes = writeQNodes = qNodes = unlockQNodes = NULL; - } - RF_ASSERT(i == totalNumNodes); - - /* - * Step 3. initialize the nodes - */ - /* initialize block node (Nil) */ - nNodes = numDataNodes + (nfaults * numParityNodes); - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, nNodes, 0, 0, 0, dag_h, "Nil", allocList); - - /* initialize commit node (Cmt) */ - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, nNodes, (nfaults * numParityNodes), 0, 0, dag_h, "Cmt", allocList); - - /* initialize terminate node (Trm) */ - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, nNodes, 0, 0, dag_h, "Trm", allocList); - - /* initialize nodes which read old data (Rod) */ - for (i = 0; i < numDataNodes; i++) { - rf_InitNode(&readDataNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, (nfaults * numParityNodes), 1, 4, 0, dag_h, - "Rod", allocList); - RF_ASSERT(pda != NULL); - /* physical disk addr desc */ - readDataNodes[i].params[0].p = pda; - /* buffer to hold old data */ - readDataNodes[i].params[1].p = rf_AllocBuffer(raidPtr, - dag_h, pda, allocList); - readDataNodes[i].params[2].v = parityStripeID; - readDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - lu_flag, 0, which_ru); - pda = pda->next; - for (j = 0; j < readDataNodes[i].numSuccedents; j++) { - readDataNodes[i].propList[j] = NULL; - } - } - - /* initialize nodes which read old parity (Rop) */ - pda = asmap->parityInfo; - i = 0; - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(pda != NULL); - rf_InitNode(&readParityNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, - rf_DiskReadUndoFunc, rf_GenericWakeupFunc, numParityNodes, 1, 4, - 0, dag_h, "Rop", allocList); - readParityNodes[i].params[0].p = pda; - /* buffer to hold old parity */ - readParityNodes[i].params[1].p = rf_AllocBuffer(raidPtr, - dag_h, pda, allocList); - readParityNodes[i].params[2].v = parityStripeID; - readParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - lu_flag, 0, which_ru); - pda = pda->next; - for (j = 0; j < readParityNodes[i].numSuccedents; j++) { - readParityNodes[i].propList[0] = NULL; - } - } - - /* initialize nodes which read old Q (Roq) */ - if (nfaults == 2) { - pda = asmap->qInfo; - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(pda != NULL); - rf_InitNode(&readQNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, numParityNodes, 1, 4, 0, dag_h, "Roq", allocList); - readQNodes[i].params[0].p = pda; - /* buffer to hold old Q */ - readQNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, - allocList); - readQNodes[i].params[2].v = parityStripeID; - readQNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - lu_flag, 0, which_ru); - pda = pda->next; - for (j = 0; j < readQNodes[i].numSuccedents; j++) { - readQNodes[i].propList[0] = NULL; - } - } - } - /* initialize nodes which write new data (Wnd) */ - pda = asmap->physInfo; - for (i = 0; i < numDataNodes; i++) { - RF_ASSERT(pda != NULL); - rf_InitNode(&writeDataNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, - rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, - "Wnd", allocList); - /* physical disk addr desc */ - writeDataNodes[i].params[0].p = pda; - /* buffer holding new data to be written */ - writeDataNodes[i].params[1].p = pda->bufPtr; - writeDataNodes[i].params[2].v = parityStripeID; - writeDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, 0, which_ru); - if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockDataNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, - rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, - "Und", allocList); - /* physical disk addr desc */ - unlockDataNodes[i].params[0].p = pda; - unlockDataNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, lu_flag, which_ru); - } - pda = pda->next; - } - - /* - * Initialize nodes which compute new parity and Q. - */ - /* - * We use the simple XOR func in the double-XOR case, and when - * we're accessing only a portion of one stripe unit. The distinction - * between the two is that the regular XOR func assumes that the targbuf - * is a full SU in size, and examines the pda associated with the buffer - * to decide where within the buffer to XOR the data, whereas - * the simple XOR func just XORs the data into the start of the buffer. - */ - if ((numParityNodes == 2) || ((numDataNodes == 1) - && (asmap->totalSectorsAccessed < raidPtr->Layout.sectorsPerStripeUnit))) { - func = pfuncs->simple; - undoFunc = rf_NullNodeUndoFunc; - name = pfuncs->SimpleName; - if (qfuncs) { - qfunc = qfuncs->simple; - qname = qfuncs->SimpleName; - } else { - qfunc = NULL; - qname = NULL; - } - } else { - func = pfuncs->regular; - undoFunc = rf_NullNodeUndoFunc; - name = pfuncs->RegularName; - if (qfuncs) { - qfunc = qfuncs->regular; - qname = qfuncs->RegularName; - } else { - qfunc = NULL; - qname = NULL; - } - } - /* - * Initialize the xor nodes: params are {pda,buf} - * from {Rod,Wnd,Rop} nodes, and raidPtr - */ - if (numParityNodes == 2) { - /* double-xor case */ - for (i = 0; i < numParityNodes; i++) { - /* note: no wakeup func for xor */ - rf_InitNode(&xorNodes[i], rf_wait, RF_FALSE, func, undoFunc, NULL, - 1, (numDataNodes + numParityNodes), 7, 1, dag_h, name, allocList); - xorNodes[i].flags |= RF_DAGNODE_FLAG_YIELD; - xorNodes[i].params[0] = readDataNodes[i].params[0]; - xorNodes[i].params[1] = readDataNodes[i].params[1]; - xorNodes[i].params[2] = readParityNodes[i].params[0]; - xorNodes[i].params[3] = readParityNodes[i].params[1]; - xorNodes[i].params[4] = writeDataNodes[i].params[0]; - xorNodes[i].params[5] = writeDataNodes[i].params[1]; - xorNodes[i].params[6].p = raidPtr; - /* use old parity buf as target buf */ - xorNodes[i].results[0] = readParityNodes[i].params[1].p; - if (nfaults == 2) { - /* note: no wakeup func for qor */ - rf_InitNode(&qNodes[i], rf_wait, RF_FALSE, qfunc, undoFunc, NULL, 1, - (numDataNodes + numParityNodes), 7, 1, dag_h, qname, allocList); - qNodes[i].params[0] = readDataNodes[i].params[0]; - qNodes[i].params[1] = readDataNodes[i].params[1]; - qNodes[i].params[2] = readQNodes[i].params[0]; - qNodes[i].params[3] = readQNodes[i].params[1]; - qNodes[i].params[4] = writeDataNodes[i].params[0]; - qNodes[i].params[5] = writeDataNodes[i].params[1]; - qNodes[i].params[6].p = raidPtr; - /* use old Q buf as target buf */ - qNodes[i].results[0] = readQNodes[i].params[1].p; - } - } - } else { - /* there is only one xor node in this case */ - rf_InitNode(&xorNodes[0], rf_wait, RF_FALSE, func, undoFunc, NULL, 1, - (numDataNodes + numParityNodes), - (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, name, allocList); - xorNodes[0].flags |= RF_DAGNODE_FLAG_YIELD; - for (i = 0; i < numDataNodes + 1; i++) { - /* set up params related to Rod and Rop nodes */ - xorNodes[0].params[2 * i + 0] = readDataNodes[i].params[0]; /* pda */ - xorNodes[0].params[2 * i + 1] = readDataNodes[i].params[1]; /* buffer ptr */ - } - for (i = 0; i < numDataNodes; i++) { - /* set up params related to Wnd and Wnp nodes */ - xorNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = /* pda */ - writeDataNodes[i].params[0]; - xorNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = /* buffer ptr */ - writeDataNodes[i].params[1]; - } - /* xor node needs to get at RAID information */ - xorNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr; - xorNodes[0].results[0] = readParityNodes[0].params[1].p; - if (nfaults == 2) { - rf_InitNode(&qNodes[0], rf_wait, RF_FALSE, qfunc, undoFunc, NULL, 1, - (numDataNodes + numParityNodes), - (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, - qname, allocList); - for (i = 0; i < numDataNodes; i++) { - /* set up params related to Rod */ - qNodes[0].params[2 * i + 0] = readDataNodes[i].params[0]; /* pda */ - qNodes[0].params[2 * i + 1] = readDataNodes[i].params[1]; /* buffer ptr */ - } - /* and read old q */ - qNodes[0].params[2 * numDataNodes + 0] = /* pda */ - readQNodes[0].params[0]; - qNodes[0].params[2 * numDataNodes + 1] = /* buffer ptr */ - readQNodes[0].params[1]; - for (i = 0; i < numDataNodes; i++) { - /* set up params related to Wnd nodes */ - qNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = /* pda */ - writeDataNodes[i].params[0]; - qNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = /* buffer ptr */ - writeDataNodes[i].params[1]; - } - /* xor node needs to get at RAID information */ - qNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr; - qNodes[0].results[0] = readQNodes[0].params[1].p; - } - } - - /* initialize nodes which write new parity (Wnp) */ - pda = asmap->parityInfo; - for (i = 0; i < numParityNodes; i++) { - rf_InitNode(&writeParityNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, - rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, - "Wnp", allocList); - RF_ASSERT(pda != NULL); - writeParityNodes[i].params[0].p = pda; /* param 1 (bufPtr) - * filled in by xor node */ - writeParityNodes[i].params[1].p = xorNodes[i].results[0]; /* buffer pointer for - * parity write - * operation */ - writeParityNodes[i].params[2].v = parityStripeID; - writeParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, 0, which_ru); - if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockParityNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, - rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, - "Unp", allocList); - unlockParityNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - unlockParityNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, lu_flag, which_ru); - } - pda = pda->next; - } - - /* initialize nodes which write new Q (Wnq) */ - if (nfaults == 2) { - pda = asmap->qInfo; - for (i = 0; i < numParityNodes; i++) { - rf_InitNode(&writeQNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, - rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, - "Wnq", allocList); - RF_ASSERT(pda != NULL); - writeQNodes[i].params[0].p = pda; /* param 1 (bufPtr) - * filled in by xor node */ - writeQNodes[i].params[1].p = qNodes[i].results[0]; /* buffer pointer for - * parity write - * operation */ - writeQNodes[i].params[2].v = parityStripeID; - writeQNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, 0, which_ru); - if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockQNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, - rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, - "Unq", allocList); - unlockQNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - unlockQNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, lu_flag, which_ru); - } - pda = pda->next; - } - } - /* - * Step 4. connect the nodes. - */ - - /* connect header to block node */ - dag_h->succedents[0] = blockNode; - - /* connect block node to read old data nodes */ - RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults))); - for (i = 0; i < numDataNodes; i++) { - blockNode->succedents[i] = &readDataNodes[i]; - RF_ASSERT(readDataNodes[i].numAntecedents == 1); - readDataNodes[i].antecedents[0] = blockNode; - readDataNodes[i].antType[0] = rf_control; - } - - /* connect block node to read old parity nodes */ - for (i = 0; i < numParityNodes; i++) { - blockNode->succedents[numDataNodes + i] = &readParityNodes[i]; - RF_ASSERT(readParityNodes[i].numAntecedents == 1); - readParityNodes[i].antecedents[0] = blockNode; - readParityNodes[i].antType[0] = rf_control; - } - - /* connect block node to read old Q nodes */ - if (nfaults == 2) { - for (i = 0; i < numParityNodes; i++) { - blockNode->succedents[numDataNodes + numParityNodes + i] = &readQNodes[i]; - RF_ASSERT(readQNodes[i].numAntecedents == 1); - readQNodes[i].antecedents[0] = blockNode; - readQNodes[i].antType[0] = rf_control; - } - } - /* connect read old data nodes to xor nodes */ - for (i = 0; i < numDataNodes; i++) { - RF_ASSERT(readDataNodes[i].numSuccedents == (nfaults * numParityNodes)); - for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(xorNodes[j].numAntecedents == numDataNodes + numParityNodes); - readDataNodes[i].succedents[j] = &xorNodes[j]; - xorNodes[j].antecedents[i] = &readDataNodes[i]; - xorNodes[j].antType[i] = rf_trueData; - } - } - - /* connect read old data nodes to q nodes */ - if (nfaults == 2) { - for (i = 0; i < numDataNodes; i++) { - for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(qNodes[j].numAntecedents == numDataNodes + numParityNodes); - readDataNodes[i].succedents[numParityNodes + j] = &qNodes[j]; - qNodes[j].antecedents[i] = &readDataNodes[i]; - qNodes[j].antType[i] = rf_trueData; - } - } - } - /* connect read old parity nodes to xor nodes */ - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(readParityNodes[i].numSuccedents == numParityNodes); - for (j = 0; j < numParityNodes; j++) { - readParityNodes[i].succedents[j] = &xorNodes[j]; - xorNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i]; - xorNodes[j].antType[numDataNodes + i] = rf_trueData; - } - } - - /* connect read old q nodes to q nodes */ - if (nfaults == 2) { - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(readParityNodes[i].numSuccedents == numParityNodes); - for (j = 0; j < numParityNodes; j++) { - readQNodes[i].succedents[j] = &qNodes[j]; - qNodes[j].antecedents[numDataNodes + i] = &readQNodes[i]; - qNodes[j].antType[numDataNodes + i] = rf_trueData; - } - } - } - /* connect xor nodes to commit node */ - RF_ASSERT(commitNode->numAntecedents == (nfaults * numParityNodes)); - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(xorNodes[i].numSuccedents == 1); - xorNodes[i].succedents[0] = commitNode; - commitNode->antecedents[i] = &xorNodes[i]; - commitNode->antType[i] = rf_control; - } - - /* connect q nodes to commit node */ - if (nfaults == 2) { - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(qNodes[i].numSuccedents == 1); - qNodes[i].succedents[0] = commitNode; - commitNode->antecedents[i + numParityNodes] = &qNodes[i]; - commitNode->antType[i + numParityNodes] = rf_control; - } - } - /* connect commit node to write nodes */ - RF_ASSERT(commitNode->numSuccedents == (numDataNodes + (nfaults * numParityNodes))); - for (i = 0; i < numDataNodes; i++) { - RF_ASSERT(writeDataNodes[i].numAntecedents == 1); - commitNode->succedents[i] = &writeDataNodes[i]; - writeDataNodes[i].antecedents[0] = commitNode; - writeDataNodes[i].antType[0] = rf_trueData; - } - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(writeParityNodes[i].numAntecedents == 1); - commitNode->succedents[i + numDataNodes] = &writeParityNodes[i]; - writeParityNodes[i].antecedents[0] = commitNode; - writeParityNodes[i].antType[0] = rf_trueData; - } - if (nfaults == 2) { - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(writeQNodes[i].numAntecedents == 1); - commitNode->succedents[i + numDataNodes + numParityNodes] = &writeQNodes[i]; - writeQNodes[i].antecedents[0] = commitNode; - writeQNodes[i].antType[0] = rf_trueData; - } - } - RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); - RF_ASSERT(termNode->numSuccedents == 0); - for (i = 0; i < numDataNodes; i++) { - if (lu_flag) { - /* connect write new data nodes to unlock nodes */ - RF_ASSERT(writeDataNodes[i].numSuccedents == 1); - RF_ASSERT(unlockDataNodes[i].numAntecedents == 1); - writeDataNodes[i].succedents[0] = &unlockDataNodes[i]; - unlockDataNodes[i].antecedents[0] = &writeDataNodes[i]; - unlockDataNodes[i].antType[0] = rf_control; - - /* connect unlock nodes to term node */ - RF_ASSERT(unlockDataNodes[i].numSuccedents == 1); - unlockDataNodes[i].succedents[0] = termNode; - termNode->antecedents[i] = &unlockDataNodes[i]; - termNode->antType[i] = rf_control; - } else { - /* connect write new data nodes to term node */ - RF_ASSERT(writeDataNodes[i].numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); - writeDataNodes[i].succedents[0] = termNode; - termNode->antecedents[i] = &writeDataNodes[i]; - termNode->antType[i] = rf_control; - } - } - - for (i = 0; i < numParityNodes; i++) { - if (lu_flag) { - /* connect write new parity nodes to unlock nodes */ - RF_ASSERT(writeParityNodes[i].numSuccedents == 1); - RF_ASSERT(unlockParityNodes[i].numAntecedents == 1); - writeParityNodes[i].succedents[0] = &unlockParityNodes[i]; - unlockParityNodes[i].antecedents[0] = &writeParityNodes[i]; - unlockParityNodes[i].antType[0] = rf_control; - - /* connect unlock nodes to term node */ - RF_ASSERT(unlockParityNodes[i].numSuccedents == 1); - unlockParityNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + i] = &unlockParityNodes[i]; - termNode->antType[numDataNodes + i] = rf_control; - } else { - RF_ASSERT(writeParityNodes[i].numSuccedents == 1); - writeParityNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + i] = &writeParityNodes[i]; - termNode->antType[numDataNodes + i] = rf_control; - } - } - - if (nfaults == 2) { - for (i = 0; i < numParityNodes; i++) { - if (lu_flag) { - /* connect write new Q nodes to unlock nodes */ - RF_ASSERT(writeQNodes[i].numSuccedents == 1); - RF_ASSERT(unlockQNodes[i].numAntecedents == 1); - writeQNodes[i].succedents[0] = &unlockQNodes[i]; - unlockQNodes[i].antecedents[0] = &writeQNodes[i]; - unlockQNodes[i].antType[0] = rf_control; - - /* connect unlock nodes to unblock node */ - RF_ASSERT(unlockQNodes[i].numSuccedents == 1); - unlockQNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + numParityNodes + i] = &unlockQNodes[i]; - termNode->antType[numDataNodes + numParityNodes + i] = rf_control; - } else { - RF_ASSERT(writeQNodes[i].numSuccedents == 1); - writeQNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + numParityNodes + i] = &writeQNodes[i]; - termNode->antType[numDataNodes + numParityNodes + i] = rf_control; - } - } - } -} - - -/****************************************************************************** - * create a write graph (fault-free or degraded) for RAID level 1 - * - * Hdr -> Commit -> Wpd -> Nil -> Trm - * -> Wsd -> - * - * The "Wpd" node writes data to the primary copy in the mirror pair - * The "Wsd" node writes data to the secondary copy in the mirror pair - * - * Parameters: raidPtr - description of the physical array - * asmap - logical & physical addresses for this access - * bp - buffer ptr (holds write data) - * flags - general flags (e.g. disk locking) - * allocList - list of memory allocated in DAG creation - *****************************************************************************/ - -void -rf_CreateRaidOneWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) -{ - RF_DagNode_t *unblockNode, *termNode, *commitNode; - RF_DagNode_t *nodes, *wndNode, *wmirNode; - int nWndNodes, nWmirNodes, i; - RF_ReconUnitNum_t which_ru; - RF_PhysDiskAddr_t *pda, *pdaP; - RF_StripeNum_t parityStripeID; - - parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), - asmap->raidAddress, &which_ru); - if (rf_dagDebug) { - printf("[Creating RAID level 1 write DAG]\n"); - } - dag_h->creator = "RaidOneWriteDAG"; - - /* 2 implies access not SU aligned */ - nWmirNodes = (asmap->parityInfo->next) ? 2 : 1; - nWndNodes = (asmap->physInfo->next) ? 2 : 1; - - /* alloc the Wnd nodes and the Wmir node */ - if (asmap->numDataFailed == 1) - nWndNodes--; - if (asmap->numParityFailed == 1) - nWmirNodes--; - - /* total number of nodes = nWndNodes + nWmirNodes + (commit + unblock - * + terminator) */ - RF_CallocAndAdd(nodes, nWndNodes + nWmirNodes + 3, sizeof(RF_DagNode_t), - (RF_DagNode_t *), allocList); - i = 0; - wndNode = &nodes[i]; - i += nWndNodes; - wmirNode = &nodes[i]; - i += nWmirNodes; - commitNode = &nodes[i]; - i += 1; - unblockNode = &nodes[i]; - i += 1; - termNode = &nodes[i]; - i += 1; - RF_ASSERT(i == (nWndNodes + nWmirNodes + 3)); - - /* this dag can commit immediately */ - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* initialize the commit, unblock, and term nodes */ - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, (nWndNodes + nWmirNodes), 0, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, (nWndNodes + nWmirNodes), 0, 0, dag_h, "Nil", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - - /* initialize the wnd nodes */ - if (nWndNodes > 0) { - pda = asmap->physInfo; - for (i = 0; i < nWndNodes; i++) { - rf_InitNode(&wndNode[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wpd", allocList); - RF_ASSERT(pda != NULL); - wndNode[i].params[0].p = pda; - wndNode[i].params[1].p = pda->bufPtr; - wndNode[i].params[2].v = parityStripeID; - wndNode[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - pda = pda->next; - } - RF_ASSERT(pda == NULL); - } - /* initialize the mirror nodes */ - if (nWmirNodes > 0) { - pda = asmap->physInfo; - pdaP = asmap->parityInfo; - for (i = 0; i < nWmirNodes; i++) { - rf_InitNode(&wmirNode[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wsd", allocList); - RF_ASSERT(pda != NULL); - wmirNode[i].params[0].p = pdaP; - wmirNode[i].params[1].p = pda->bufPtr; - wmirNode[i].params[2].v = parityStripeID; - wmirNode[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - pda = pda->next; - pdaP = pdaP->next; - } - RF_ASSERT(pda == NULL); - RF_ASSERT(pdaP == NULL); - } - /* link the header node to the commit node */ - RF_ASSERT(dag_h->numSuccedents == 1); - RF_ASSERT(commitNode->numAntecedents == 0); - dag_h->succedents[0] = commitNode; - - /* link the commit node to the write nodes */ - RF_ASSERT(commitNode->numSuccedents == (nWndNodes + nWmirNodes)); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNode[i].numAntecedents == 1); - commitNode->succedents[i] = &wndNode[i]; - wndNode[i].antecedents[0] = commitNode; - wndNode[i].antType[0] = rf_control; - } - for (i = 0; i < nWmirNodes; i++) { - RF_ASSERT(wmirNode[i].numAntecedents == 1); - commitNode->succedents[i + nWndNodes] = &wmirNode[i]; - wmirNode[i].antecedents[0] = commitNode; - wmirNode[i].antType[0] = rf_control; - } - - /* link the write nodes to the unblock node */ - RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes)); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNode[i].numSuccedents == 1); - wndNode[i].succedents[0] = unblockNode; - unblockNode->antecedents[i] = &wndNode[i]; - unblockNode->antType[i] = rf_control; - } - for (i = 0; i < nWmirNodes; i++) { - RF_ASSERT(wmirNode[i].numSuccedents == 1); - wmirNode[i].succedents[0] = unblockNode; - unblockNode->antecedents[i + nWndNodes] = &wmirNode[i]; - unblockNode->antType[i + nWndNodes] = rf_control; - } - - /* link the unblock node to the term node */ - RF_ASSERT(unblockNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - unblockNode->succedents[0] = termNode; - termNode->antecedents[0] = unblockNode; - termNode->antType[0] = rf_control; -} - - - -/* DAGs which have no commit points. - * - * The following DAGs are used in forward and backward error recovery experiments. - * They are identical to the DAGs above this comment with the exception that the - * the commit points have been removed. - */ - - - -void -rf_CommonCreateLargeWriteDAGFwd( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - int nfaults, - int (*redFunc) (RF_DagNode_t *), - int allowBufferRecycle) -{ - RF_DagNode_t *nodes, *wndNodes, *rodNodes, *xorNode, *wnpNode; - RF_DagNode_t *wnqNode, *blockNode, *syncNode, *termNode; - int nWndNodes, nRodNodes, i, nodeNum, asmNum; - RF_AccessStripeMapHeader_t *new_asm_h[2]; - RF_StripeNum_t parityStripeID; - char *sosBuffer, *eosBuffer; - RF_ReconUnitNum_t which_ru; - RF_RaidLayout_t *layoutPtr; - RF_PhysDiskAddr_t *pda; - - layoutPtr = &(raidPtr->Layout); - parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru); - - if (rf_dagDebug) - printf("[Creating large-write DAG]\n"); - dag_h->creator = "LargeWriteDAGFwd"; - - dag_h->numCommitNodes = 0; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* alloc the nodes: Wnd, xor, commit, block, term, and Wnp */ - nWndNodes = asmap->numStripeUnitsAccessed; - RF_CallocAndAdd(nodes, nWndNodes + 4 + nfaults, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - i = 0; - wndNodes = &nodes[i]; - i += nWndNodes; - xorNode = &nodes[i]; - i += 1; - wnpNode = &nodes[i]; - i += 1; - blockNode = &nodes[i]; - i += 1; - syncNode = &nodes[i]; - i += 1; - termNode = &nodes[i]; - i += 1; - if (nfaults == 2) { - wnqNode = &nodes[i]; - i += 1; - } else { - wnqNode = NULL; - } - rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h, new_asm_h, &nRodNodes, &sosBuffer, &eosBuffer, allocList); - if (nRodNodes > 0) { - RF_CallocAndAdd(rodNodes, nRodNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - } else { - rodNodes = NULL; - } - - /* begin node initialization */ - if (nRodNodes > 0) { - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(syncNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nWndNodes + 1, nRodNodes, 0, 0, dag_h, "Nil", allocList); - } else { - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(syncNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nWndNodes + 1, 1, 0, 0, dag_h, "Nil", allocList); - } - - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0, dag_h, "Trm", allocList); - - /* initialize the Rod nodes */ - for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) { - if (new_asm_h[asmNum]) { - pda = new_asm_h[asmNum]->stripeMap->physInfo; - while (pda) { - rf_InitNode(&rodNodes[nodeNum], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rod", allocList); - rodNodes[nodeNum].params[0].p = pda; - rodNodes[nodeNum].params[1].p = pda->bufPtr; - rodNodes[nodeNum].params[2].v = parityStripeID; - rodNodes[nodeNum].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - nodeNum++; - pda = pda->next; - } - } - } - RF_ASSERT(nodeNum == nRodNodes); - - /* initialize the wnd nodes */ - pda = asmap->physInfo; - for (i = 0; i < nWndNodes; i++) { - rf_InitNode(&wndNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList); - RF_ASSERT(pda != NULL); - wndNodes[i].params[0].p = pda; - wndNodes[i].params[1].p = pda->bufPtr; - wndNodes[i].params[2].v = parityStripeID; - wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - pda = pda->next; - } - - /* initialize the redundancy node */ - rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1, nfaults, 2 * (nWndNodes + nRodNodes) + 1, nfaults, dag_h, "Xr ", allocList); - xorNode->flags |= RF_DAGNODE_FLAG_YIELD; - for (i = 0; i < nWndNodes; i++) { - xorNode->params[2 * i + 0] = wndNodes[i].params[0]; /* pda */ - xorNode->params[2 * i + 1] = wndNodes[i].params[1]; /* buf ptr */ - } - for (i = 0; i < nRodNodes; i++) { - xorNode->params[2 * (nWndNodes + i) + 0] = rodNodes[i].params[0]; /* pda */ - xorNode->params[2 * (nWndNodes + i) + 1] = rodNodes[i].params[1]; /* buf ptr */ - } - xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr; /* xor node needs to get - * at RAID information */ - - /* look for an Rod node that reads a complete SU. If none, alloc a - * buffer to receive the parity info. Note that we can't use a new - * data buffer because it will not have gotten written when the xor - * occurs. */ - if (allowBufferRecycle) { - for (i = 0; i < nRodNodes; i++) - if (((RF_PhysDiskAddr_t *) rodNodes[i].params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit) - break; - } - if ((!allowBufferRecycle) || (i == nRodNodes)) { - RF_CallocAndAdd(xorNode->results[0], 1, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (void *), allocList); - } else - xorNode->results[0] = rodNodes[i].params[1].p; - - /* initialize the Wnp node */ - rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnp", allocList); - wnpNode->params[0].p = asmap->parityInfo; - wnpNode->params[1].p = xorNode->results[0]; - wnpNode->params[2].v = parityStripeID; - wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - RF_ASSERT(asmap->parityInfo->next == NULL); /* parityInfo must - * describe entire - * parity unit */ - - if (nfaults == 2) { - /* we never try to recycle a buffer for the Q calcuation in - * addition to the parity. This would cause two buffers to get - * smashed during the P and Q calculation, guaranteeing one - * would be wrong. */ - RF_CallocAndAdd(xorNode->results[1], 1, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (void *), allocList); - rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnq", allocList); - wnqNode->params[0].p = asmap->qInfo; - wnqNode->params[1].p = xorNode->results[1]; - wnqNode->params[2].v = parityStripeID; - wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - RF_ASSERT(asmap->parityInfo->next == NULL); /* parityInfo must - * describe entire - * parity unit */ - } - /* connect nodes to form graph */ - - /* connect dag header to block node */ - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - if (nRodNodes > 0) { - /* connect the block node to the Rod nodes */ - RF_ASSERT(blockNode->numSuccedents == nRodNodes); - RF_ASSERT(syncNode->numAntecedents == nRodNodes); - for (i = 0; i < nRodNodes; i++) { - RF_ASSERT(rodNodes[i].numAntecedents == 1); - blockNode->succedents[i] = &rodNodes[i]; - rodNodes[i].antecedents[0] = blockNode; - rodNodes[i].antType[0] = rf_control; - - /* connect the Rod nodes to the Nil node */ - RF_ASSERT(rodNodes[i].numSuccedents == 1); - rodNodes[i].succedents[0] = syncNode; - syncNode->antecedents[i] = &rodNodes[i]; - syncNode->antType[i] = rf_trueData; - } - } else { - /* connect the block node to the Nil node */ - RF_ASSERT(blockNode->numSuccedents == 1); - RF_ASSERT(syncNode->numAntecedents == 1); - blockNode->succedents[0] = syncNode; - syncNode->antecedents[0] = blockNode; - syncNode->antType[0] = rf_control; - } - - /* connect the sync node to the Wnd nodes */ - RF_ASSERT(syncNode->numSuccedents == (1 + nWndNodes)); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNodes->numAntecedents == 1); - syncNode->succedents[i] = &wndNodes[i]; - wndNodes[i].antecedents[0] = syncNode; - wndNodes[i].antType[0] = rf_control; - } - - /* connect the sync node to the Xor node */ - RF_ASSERT(xorNode->numAntecedents == 1); - syncNode->succedents[nWndNodes] = xorNode; - xorNode->antecedents[0] = syncNode; - xorNode->antType[0] = rf_control; - - /* connect the xor node to the write parity node */ - RF_ASSERT(xorNode->numSuccedents == nfaults); - RF_ASSERT(wnpNode->numAntecedents == 1); - xorNode->succedents[0] = wnpNode; - wnpNode->antecedents[0] = xorNode; - wnpNode->antType[0] = rf_trueData; - if (nfaults == 2) { - RF_ASSERT(wnqNode->numAntecedents == 1); - xorNode->succedents[1] = wnqNode; - wnqNode->antecedents[0] = xorNode; - wnqNode->antType[0] = rf_trueData; - } - /* connect the write nodes to the term node */ - RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults); - RF_ASSERT(termNode->numSuccedents == 0); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNodes->numSuccedents == 1); - wndNodes[i].succedents[0] = termNode; - termNode->antecedents[i] = &wndNodes[i]; - termNode->antType[i] = rf_control; - } - RF_ASSERT(wnpNode->numSuccedents == 1); - wnpNode->succedents[0] = termNode; - termNode->antecedents[nWndNodes] = wnpNode; - termNode->antType[nWndNodes] = rf_control; - if (nfaults == 2) { - RF_ASSERT(wnqNode->numSuccedents == 1); - wnqNode->succedents[0] = termNode; - termNode->antecedents[nWndNodes + 1] = wnqNode; - termNode->antType[nWndNodes + 1] = rf_control; - } -} - - -/****************************************************************************** - * - * creates a DAG to perform a small-write operation (either raid 5 or pq), - * which is as follows: - * - * Hdr -> Nil -> Rop - Xor - Wnp [Unp] -- Trm - * \- Rod X- Wnd [Und] -------/ - * [\- Rod X- Wnd [Und] ------/] - * [\- Roq - Q --> Wnq [Unq]-/] - * - * Rop = read old parity - * Rod = read old data - * Roq = read old "q" - * Cmt = commit node - * Und = unlock data disk - * Unp = unlock parity disk - * Unq = unlock q disk - * Wnp = write new parity - * Wnd = write new data - * Wnq = write new "q" - * [ ] denotes optional segments in the graph - * - * Parameters: raidPtr - description of the physical array - * asmap - logical & physical addresses for this access - * bp - buffer ptr (holds write data) - * flags - general flags (e.g. disk locking) - * allocList - list of memory allocated in DAG creation - * pfuncs - list of parity generating functions - * qfuncs - list of q generating functions - * - * A null qfuncs indicates single fault tolerant - *****************************************************************************/ - -void -rf_CommonCreateSmallWriteDAGFwd( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - RF_RedFuncs_t * pfuncs, - RF_RedFuncs_t * qfuncs) -{ - RF_DagNode_t *readDataNodes, *readParityNodes, *readQNodes, *termNode; - RF_DagNode_t *unlockDataNodes, *unlockParityNodes, *unlockQNodes; - RF_DagNode_t *xorNodes, *qNodes, *blockNode, *nodes; - RF_DagNode_t *writeDataNodes, *writeParityNodes, *writeQNodes; - int i, j, nNodes, totalNumNodes, lu_flag; - RF_ReconUnitNum_t which_ru; - int (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *); - int (*qfunc) (RF_DagNode_t *); - int numDataNodes, numParityNodes; - RF_StripeNum_t parityStripeID; - RF_PhysDiskAddr_t *pda; - char *name, *qname; - long nfaults; - - nfaults = qfuncs ? 2 : 1; - lu_flag = (rf_enableAtomicRMW) ? 1 : 0; /* lock/unlock flag */ - - parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru); - pda = asmap->physInfo; - numDataNodes = asmap->numStripeUnitsAccessed; - numParityNodes = (asmap->parityInfo->next) ? 2 : 1; - - if (rf_dagDebug) - printf("[Creating small-write DAG]\n"); - RF_ASSERT(numDataNodes > 0); - dag_h->creator = "SmallWriteDAGFwd"; - - dag_h->numCommitNodes = 0; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - qfunc = NULL; - qname = NULL; - - /* DAG creation occurs in four steps: 1. count the number of nodes in - * the DAG 2. create the nodes 3. initialize the nodes 4. connect the - * nodes */ - - /* Step 1. compute number of nodes in the graph */ - - /* number of nodes: a read and write for each data unit a redundancy - * computation node for each parity node (nfaults * nparity) a read - * and write for each parity unit a block node a terminate node if - * atomic RMW an unlock node for each data unit, redundancy unit */ - totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes) + (nfaults * 2 * numParityNodes) + 2; - if (lu_flag) - totalNumNodes += (numDataNodes + (nfaults * numParityNodes)); - - - /* Step 2. create the nodes */ - RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - i = 0; - blockNode = &nodes[i]; - i += 1; - readDataNodes = &nodes[i]; - i += numDataNodes; - readParityNodes = &nodes[i]; - i += numParityNodes; - writeDataNodes = &nodes[i]; - i += numDataNodes; - writeParityNodes = &nodes[i]; - i += numParityNodes; - xorNodes = &nodes[i]; - i += numParityNodes; - termNode = &nodes[i]; - i += 1; - if (lu_flag) { - unlockDataNodes = &nodes[i]; - i += numDataNodes; - unlockParityNodes = &nodes[i]; - i += numParityNodes; - } else { - unlockDataNodes = unlockParityNodes = NULL; - } - if (nfaults == 2) { - readQNodes = &nodes[i]; - i += numParityNodes; - writeQNodes = &nodes[i]; - i += numParityNodes; - qNodes = &nodes[i]; - i += numParityNodes; - if (lu_flag) { - unlockQNodes = &nodes[i]; - i += numParityNodes; - } else { - unlockQNodes = NULL; - } - } else { - readQNodes = writeQNodes = qNodes = unlockQNodes = NULL; - } - RF_ASSERT(i == totalNumNodes); - - /* Step 3. initialize the nodes */ - /* initialize block node (Nil) */ - nNodes = numDataNodes + (nfaults * numParityNodes); - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", allocList); - - /* initialize terminate node (Trm) */ - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, nNodes, 0, 0, dag_h, "Trm", allocList); - - /* initialize nodes which read old data (Rod) */ - for (i = 0; i < numDataNodes; i++) { - rf_InitNode(&readDataNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, (numParityNodes * nfaults) + 1, 1, 4, 0, dag_h, "Rod", allocList); - RF_ASSERT(pda != NULL); - readDataNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - readDataNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old - * data */ - readDataNodes[i].params[2].v = parityStripeID; - readDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru); - pda = pda->next; - for (j = 0; j < readDataNodes[i].numSuccedents; j++) - readDataNodes[i].propList[j] = NULL; - } - - /* initialize nodes which read old parity (Rop) */ - pda = asmap->parityInfo; - i = 0; - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(pda != NULL); - rf_InitNode(&readParityNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, numParityNodes, 1, 4, 0, dag_h, "Rop", allocList); - readParityNodes[i].params[0].p = pda; - readParityNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old - * parity */ - readParityNodes[i].params[2].v = parityStripeID; - readParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru); - for (j = 0; j < readParityNodes[i].numSuccedents; j++) - readParityNodes[i].propList[0] = NULL; - pda = pda->next; - } - - /* initialize nodes which read old Q (Roq) */ - if (nfaults == 2) { - pda = asmap->qInfo; - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(pda != NULL); - rf_InitNode(&readQNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, numParityNodes, 1, 4, 0, dag_h, "Roq", allocList); - readQNodes[i].params[0].p = pda; - readQNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old Q */ - readQNodes[i].params[2].v = parityStripeID; - readQNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru); - for (j = 0; j < readQNodes[i].numSuccedents; j++) - readQNodes[i].propList[0] = NULL; - pda = pda->next; - } - } - /* initialize nodes which write new data (Wnd) */ - pda = asmap->physInfo; - for (i = 0; i < numDataNodes; i++) { - RF_ASSERT(pda != NULL); - rf_InitNode(&writeDataNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList); - writeDataNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - writeDataNodes[i].params[1].p = pda->bufPtr; /* buffer holding new - * data to be written */ - writeDataNodes[i].params[2].v = parityStripeID; - writeDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - - if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockDataNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Und", allocList); - unlockDataNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - unlockDataNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru); - } - pda = pda->next; - } - - - /* initialize nodes which compute new parity and Q */ - /* we use the simple XOR func in the double-XOR case, and when we're - * accessing only a portion of one stripe unit. the distinction - * between the two is that the regular XOR func assumes that the - * targbuf is a full SU in size, and examines the pda associated with - * the buffer to decide where within the buffer to XOR the data, - * whereas the simple XOR func just XORs the data into the start of - * the buffer. */ - if ((numParityNodes == 2) || ((numDataNodes == 1) && (asmap->totalSectorsAccessed < raidPtr->Layout.sectorsPerStripeUnit))) { - func = pfuncs->simple; - undoFunc = rf_NullNodeUndoFunc; - name = pfuncs->SimpleName; - if (qfuncs) { - qfunc = qfuncs->simple; - qname = qfuncs->SimpleName; - } - } else { - func = pfuncs->regular; - undoFunc = rf_NullNodeUndoFunc; - name = pfuncs->RegularName; - if (qfuncs) { - qfunc = qfuncs->regular; - qname = qfuncs->RegularName; - } - } - /* initialize the xor nodes: params are {pda,buf} from {Rod,Wnd,Rop} - * nodes, and raidPtr */ - if (numParityNodes == 2) { /* double-xor case */ - for (i = 0; i < numParityNodes; i++) { - rf_InitNode(&xorNodes[i], rf_wait, RF_FALSE, func, undoFunc, NULL, numParityNodes, numParityNodes + numDataNodes, 7, 1, dag_h, name, allocList); /* no wakeup func for - * xor */ - xorNodes[i].flags |= RF_DAGNODE_FLAG_YIELD; - xorNodes[i].params[0] = readDataNodes[i].params[0]; - xorNodes[i].params[1] = readDataNodes[i].params[1]; - xorNodes[i].params[2] = readParityNodes[i].params[0]; - xorNodes[i].params[3] = readParityNodes[i].params[1]; - xorNodes[i].params[4] = writeDataNodes[i].params[0]; - xorNodes[i].params[5] = writeDataNodes[i].params[1]; - xorNodes[i].params[6].p = raidPtr; - xorNodes[i].results[0] = readParityNodes[i].params[1].p; /* use old parity buf as - * target buf */ - if (nfaults == 2) { - rf_InitNode(&qNodes[i], rf_wait, RF_FALSE, qfunc, undoFunc, NULL, numParityNodes, numParityNodes + numDataNodes, 7, 1, dag_h, qname, allocList); /* no wakeup func for - * xor */ - qNodes[i].params[0] = readDataNodes[i].params[0]; - qNodes[i].params[1] = readDataNodes[i].params[1]; - qNodes[i].params[2] = readQNodes[i].params[0]; - qNodes[i].params[3] = readQNodes[i].params[1]; - qNodes[i].params[4] = writeDataNodes[i].params[0]; - qNodes[i].params[5] = writeDataNodes[i].params[1]; - qNodes[i].params[6].p = raidPtr; - qNodes[i].results[0] = readQNodes[i].params[1].p; /* use old Q buf as - * target buf */ - } - } - } else { - /* there is only one xor node in this case */ - rf_InitNode(&xorNodes[0], rf_wait, RF_FALSE, func, undoFunc, NULL, numParityNodes, numParityNodes + numDataNodes, (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, name, allocList); - xorNodes[0].flags |= RF_DAGNODE_FLAG_YIELD; - for (i = 0; i < numDataNodes + 1; i++) { - /* set up params related to Rod and Rop nodes */ - xorNodes[0].params[2 * i + 0] = readDataNodes[i].params[0]; /* pda */ - xorNodes[0].params[2 * i + 1] = readDataNodes[i].params[1]; /* buffer pointer */ - } - for (i = 0; i < numDataNodes; i++) { - /* set up params related to Wnd and Wnp nodes */ - xorNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = writeDataNodes[i].params[0]; /* pda */ - xorNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = writeDataNodes[i].params[1]; /* buffer pointer */ - } - xorNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr; /* xor node needs to get - * at RAID information */ - xorNodes[0].results[0] = readParityNodes[0].params[1].p; - if (nfaults == 2) { - rf_InitNode(&qNodes[0], rf_wait, RF_FALSE, qfunc, undoFunc, NULL, numParityNodes, numParityNodes + numDataNodes, (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, qname, allocList); - for (i = 0; i < numDataNodes; i++) { - /* set up params related to Rod */ - qNodes[0].params[2 * i + 0] = readDataNodes[i].params[0]; /* pda */ - qNodes[0].params[2 * i + 1] = readDataNodes[i].params[1]; /* buffer pointer */ - } - /* and read old q */ - qNodes[0].params[2 * numDataNodes + 0] = readQNodes[0].params[0]; /* pda */ - qNodes[0].params[2 * numDataNodes + 1] = readQNodes[0].params[1]; /* buffer pointer */ - for (i = 0; i < numDataNodes; i++) { - /* set up params related to Wnd nodes */ - qNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = writeDataNodes[i].params[0]; /* pda */ - qNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = writeDataNodes[i].params[1]; /* buffer pointer */ - } - qNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr; /* xor node needs to get - * at RAID information */ - qNodes[0].results[0] = readQNodes[0].params[1].p; - } - } - - /* initialize nodes which write new parity (Wnp) */ - pda = asmap->parityInfo; - for (i = 0; i < numParityNodes; i++) { - rf_InitNode(&writeParityNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, numParityNodes, 4, 0, dag_h, "Wnp", allocList); - RF_ASSERT(pda != NULL); - writeParityNodes[i].params[0].p = pda; /* param 1 (bufPtr) - * filled in by xor node */ - writeParityNodes[i].params[1].p = xorNodes[i].results[0]; /* buffer pointer for - * parity write - * operation */ - writeParityNodes[i].params[2].v = parityStripeID; - writeParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - - if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockParityNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Unp", allocList); - unlockParityNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - unlockParityNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru); - } - pda = pda->next; - } - - /* initialize nodes which write new Q (Wnq) */ - if (nfaults == 2) { - pda = asmap->qInfo; - for (i = 0; i < numParityNodes; i++) { - rf_InitNode(&writeQNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, numParityNodes, 4, 0, dag_h, "Wnq", allocList); - RF_ASSERT(pda != NULL); - writeQNodes[i].params[0].p = pda; /* param 1 (bufPtr) - * filled in by xor node */ - writeQNodes[i].params[1].p = qNodes[i].results[0]; /* buffer pointer for - * parity write - * operation */ - writeQNodes[i].params[2].v = parityStripeID; - writeQNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - - if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockQNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Unq", allocList); - unlockQNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - unlockQNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru); - } - pda = pda->next; - } - } - /* Step 4. connect the nodes */ - - /* connect header to block node */ - dag_h->succedents[0] = blockNode; - - /* connect block node to read old data nodes */ - RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults))); - for (i = 0; i < numDataNodes; i++) { - blockNode->succedents[i] = &readDataNodes[i]; - RF_ASSERT(readDataNodes[i].numAntecedents == 1); - readDataNodes[i].antecedents[0] = blockNode; - readDataNodes[i].antType[0] = rf_control; - } - - /* connect block node to read old parity nodes */ - for (i = 0; i < numParityNodes; i++) { - blockNode->succedents[numDataNodes + i] = &readParityNodes[i]; - RF_ASSERT(readParityNodes[i].numAntecedents == 1); - readParityNodes[i].antecedents[0] = blockNode; - readParityNodes[i].antType[0] = rf_control; - } - - /* connect block node to read old Q nodes */ - if (nfaults == 2) - for (i = 0; i < numParityNodes; i++) { - blockNode->succedents[numDataNodes + numParityNodes + i] = &readQNodes[i]; - RF_ASSERT(readQNodes[i].numAntecedents == 1); - readQNodes[i].antecedents[0] = blockNode; - readQNodes[i].antType[0] = rf_control; - } - - /* connect read old data nodes to write new data nodes */ - for (i = 0; i < numDataNodes; i++) { - RF_ASSERT(readDataNodes[i].numSuccedents == ((nfaults * numParityNodes) + 1)); - RF_ASSERT(writeDataNodes[i].numAntecedents == 1); - readDataNodes[i].succedents[0] = &writeDataNodes[i]; - writeDataNodes[i].antecedents[0] = &readDataNodes[i]; - writeDataNodes[i].antType[0] = rf_antiData; - } - - /* connect read old data nodes to xor nodes */ - for (i = 0; i < numDataNodes; i++) { - for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(xorNodes[j].numAntecedents == numDataNodes + numParityNodes); - readDataNodes[i].succedents[1 + j] = &xorNodes[j]; - xorNodes[j].antecedents[i] = &readDataNodes[i]; - xorNodes[j].antType[i] = rf_trueData; - } - } - - /* connect read old data nodes to q nodes */ - if (nfaults == 2) - for (i = 0; i < numDataNodes; i++) - for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(qNodes[j].numAntecedents == numDataNodes + numParityNodes); - readDataNodes[i].succedents[1 + numParityNodes + j] = &qNodes[j]; - qNodes[j].antecedents[i] = &readDataNodes[i]; - qNodes[j].antType[i] = rf_trueData; - } - - /* connect read old parity nodes to xor nodes */ - for (i = 0; i < numParityNodes; i++) { - for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(readParityNodes[i].numSuccedents == numParityNodes); - readParityNodes[i].succedents[j] = &xorNodes[j]; - xorNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i]; - xorNodes[j].antType[numDataNodes + i] = rf_trueData; - } - } - - /* connect read old q nodes to q nodes */ - if (nfaults == 2) - for (i = 0; i < numParityNodes; i++) { - for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(readQNodes[i].numSuccedents == numParityNodes); - readQNodes[i].succedents[j] = &qNodes[j]; - qNodes[j].antecedents[numDataNodes + i] = &readQNodes[i]; - qNodes[j].antType[numDataNodes + i] = rf_trueData; - } - } - - /* connect xor nodes to the write new parity nodes */ - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(writeParityNodes[i].numAntecedents == numParityNodes); - for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(xorNodes[j].numSuccedents == numParityNodes); - xorNodes[i].succedents[j] = &writeParityNodes[j]; - writeParityNodes[j].antecedents[i] = &xorNodes[i]; - writeParityNodes[j].antType[i] = rf_trueData; - } - } - - /* connect q nodes to the write new q nodes */ - if (nfaults == 2) - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(writeQNodes[i].numAntecedents == numParityNodes); - for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(qNodes[j].numSuccedents == 1); - qNodes[i].succedents[j] = &writeQNodes[j]; - writeQNodes[j].antecedents[i] = &qNodes[i]; - writeQNodes[j].antType[i] = rf_trueData; - } - } - - RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); - RF_ASSERT(termNode->numSuccedents == 0); - for (i = 0; i < numDataNodes; i++) { - if (lu_flag) { - /* connect write new data nodes to unlock nodes */ - RF_ASSERT(writeDataNodes[i].numSuccedents == 1); - RF_ASSERT(unlockDataNodes[i].numAntecedents == 1); - writeDataNodes[i].succedents[0] = &unlockDataNodes[i]; - unlockDataNodes[i].antecedents[0] = &writeDataNodes[i]; - unlockDataNodes[i].antType[0] = rf_control; - - /* connect unlock nodes to term node */ - RF_ASSERT(unlockDataNodes[i].numSuccedents == 1); - unlockDataNodes[i].succedents[0] = termNode; - termNode->antecedents[i] = &unlockDataNodes[i]; - termNode->antType[i] = rf_control; - } else { - /* connect write new data nodes to term node */ - RF_ASSERT(writeDataNodes[i].numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); - writeDataNodes[i].succedents[0] = termNode; - termNode->antecedents[i] = &writeDataNodes[i]; - termNode->antType[i] = rf_control; - } - } - - for (i = 0; i < numParityNodes; i++) { - if (lu_flag) { - /* connect write new parity nodes to unlock nodes */ - RF_ASSERT(writeParityNodes[i].numSuccedents == 1); - RF_ASSERT(unlockParityNodes[i].numAntecedents == 1); - writeParityNodes[i].succedents[0] = &unlockParityNodes[i]; - unlockParityNodes[i].antecedents[0] = &writeParityNodes[i]; - unlockParityNodes[i].antType[0] = rf_control; - - /* connect unlock nodes to term node */ - RF_ASSERT(unlockParityNodes[i].numSuccedents == 1); - unlockParityNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + i] = &unlockParityNodes[i]; - termNode->antType[numDataNodes + i] = rf_control; - } else { - RF_ASSERT(writeParityNodes[i].numSuccedents == 1); - writeParityNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + i] = &writeParityNodes[i]; - termNode->antType[numDataNodes + i] = rf_control; - } - } - - if (nfaults == 2) - for (i = 0; i < numParityNodes; i++) { - if (lu_flag) { - /* connect write new Q nodes to unlock nodes */ - RF_ASSERT(writeQNodes[i].numSuccedents == 1); - RF_ASSERT(unlockQNodes[i].numAntecedents == 1); - writeQNodes[i].succedents[0] = &unlockQNodes[i]; - unlockQNodes[i].antecedents[0] = &writeQNodes[i]; - unlockQNodes[i].antType[0] = rf_control; - - /* connect unlock nodes to unblock node */ - RF_ASSERT(unlockQNodes[i].numSuccedents == 1); - unlockQNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + numParityNodes + i] = &unlockQNodes[i]; - termNode->antType[numDataNodes + numParityNodes + i] = rf_control; - } else { - RF_ASSERT(writeQNodes[i].numSuccedents == 1); - writeQNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + numParityNodes + i] = &writeQNodes[i]; - termNode->antType[numDataNodes + numParityNodes + i] = rf_control; - } - } -} - - - -/****************************************************************************** - * create a write graph (fault-free or degraded) for RAID level 1 - * - * Hdr Nil -> Wpd -> Nil -> Trm - * Nil -> Wsd -> - * - * The "Wpd" node writes data to the primary copy in the mirror pair - * The "Wsd" node writes data to the secondary copy in the mirror pair - * - * Parameters: raidPtr - description of the physical array - * asmap - logical & physical addresses for this access - * bp - buffer ptr (holds write data) - * flags - general flags (e.g. disk locking) - * allocList - list of memory allocated in DAG creation - *****************************************************************************/ - -void -rf_CreateRaidOneWriteDAGFwd( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) -{ - RF_DagNode_t *blockNode, *unblockNode, *termNode; - RF_DagNode_t *nodes, *wndNode, *wmirNode; - int nWndNodes, nWmirNodes, i; - RF_ReconUnitNum_t which_ru; - RF_PhysDiskAddr_t *pda, *pdaP; - RF_StripeNum_t parityStripeID; - - parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), - asmap->raidAddress, &which_ru); - if (rf_dagDebug) { - printf("[Creating RAID level 1 write DAG]\n"); - } - nWmirNodes = (asmap->parityInfo->next) ? 2 : 1; /* 2 implies access not - * SU aligned */ - nWndNodes = (asmap->physInfo->next) ? 2 : 1; - - /* alloc the Wnd nodes and the Wmir node */ - if (asmap->numDataFailed == 1) - nWndNodes--; - if (asmap->numParityFailed == 1) - nWmirNodes--; - - /* total number of nodes = nWndNodes + nWmirNodes + (block + unblock + - * terminator) */ - RF_CallocAndAdd(nodes, nWndNodes + nWmirNodes + 3, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - i = 0; - wndNode = &nodes[i]; - i += nWndNodes; - wmirNode = &nodes[i]; - i += nWmirNodes; - blockNode = &nodes[i]; - i += 1; - unblockNode = &nodes[i]; - i += 1; - termNode = &nodes[i]; - i += 1; - RF_ASSERT(i == (nWndNodes + nWmirNodes + 3)); - - /* this dag can commit immediately */ - dag_h->numCommitNodes = 0; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* initialize the unblock and term nodes */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, (nWndNodes + nWmirNodes), 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, (nWndNodes + nWmirNodes), 0, 0, dag_h, "Nil", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - - /* initialize the wnd nodes */ - if (nWndNodes > 0) { - pda = asmap->physInfo; - for (i = 0; i < nWndNodes; i++) { - rf_InitNode(&wndNode[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wpd", allocList); - RF_ASSERT(pda != NULL); - wndNode[i].params[0].p = pda; - wndNode[i].params[1].p = pda->bufPtr; - wndNode[i].params[2].v = parityStripeID; - wndNode[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - pda = pda->next; - } - RF_ASSERT(pda == NULL); - } - /* initialize the mirror nodes */ - if (nWmirNodes > 0) { - pda = asmap->physInfo; - pdaP = asmap->parityInfo; - for (i = 0; i < nWmirNodes; i++) { - rf_InitNode(&wmirNode[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wsd", allocList); - RF_ASSERT(pda != NULL); - wmirNode[i].params[0].p = pdaP; - wmirNode[i].params[1].p = pda->bufPtr; - wmirNode[i].params[2].v = parityStripeID; - wmirNode[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - pda = pda->next; - pdaP = pdaP->next; - } - RF_ASSERT(pda == NULL); - RF_ASSERT(pdaP == NULL); - } - /* link the header node to the block node */ - RF_ASSERT(dag_h->numSuccedents == 1); - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - /* link the block node to the write nodes */ - RF_ASSERT(blockNode->numSuccedents == (nWndNodes + nWmirNodes)); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNode[i].numAntecedents == 1); - blockNode->succedents[i] = &wndNode[i]; - wndNode[i].antecedents[0] = blockNode; - wndNode[i].antType[0] = rf_control; - } - for (i = 0; i < nWmirNodes; i++) { - RF_ASSERT(wmirNode[i].numAntecedents == 1); - blockNode->succedents[i + nWndNodes] = &wmirNode[i]; - wmirNode[i].antecedents[0] = blockNode; - wmirNode[i].antType[0] = rf_control; - } - - /* link the write nodes to the unblock node */ - RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes)); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNode[i].numSuccedents == 1); - wndNode[i].succedents[0] = unblockNode; - unblockNode->antecedents[i] = &wndNode[i]; - unblockNode->antType[i] = rf_control; - } - for (i = 0; i < nWmirNodes; i++) { - RF_ASSERT(wmirNode[i].numSuccedents == 1); - wmirNode[i].succedents[0] = unblockNode; - unblockNode->antecedents[i + nWndNodes] = &wmirNode[i]; - unblockNode->antType[i + nWndNodes] = rf_control; - } - - /* link the unblock node to the term node */ - RF_ASSERT(unblockNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - unblockNode->succedents[0] = termNode; - termNode->antecedents[0] = unblockNode; - termNode->antType[0] = rf_control; - - return; -} diff --git a/sys/dev/raidframe/rf_dagffwr.h b/sys/dev/raidframe/rf_dagffwr.h deleted file mode 100644 index f65875e..0000000 --- a/sys/dev/raidframe/rf_dagffwr.h +++ /dev/null @@ -1,77 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_dagffwr.h,v 1.3 1999/02/05 00:06:08 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_DAGFFWR_H_ -#define _RF__RF_DAGFFWR_H_ - -#include <dev/raidframe/rf_types.h> - -/* fault-free write DAG creation routines */ -void -rf_CreateNonRedundantWriteDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, - RF_IoType_t type); -void -rf_CreateRAID0WriteDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, RF_IoType_t type); -void -rf_CreateSmallWriteDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList); -void -rf_CreateLargeWriteDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList); -void -rf_CommonCreateLargeWriteDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, int nfaults, - int (*redFunc) (RF_DagNode_t *), int allowBufferRecycle); - void rf_CommonCreateLargeWriteDAGFwd(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, int nfaults, - int (*redFunc) (RF_DagNode_t *), int allowBufferRecycle); - void rf_CommonCreateSmallWriteDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, - RF_RedFuncs_t * pfuncs, RF_RedFuncs_t * qfuncs); - void rf_CommonCreateSmallWriteDAGFwd(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, - RF_RedFuncs_t * pfuncs, RF_RedFuncs_t * qfuncs); - void rf_CreateRaidOneWriteDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList); - void rf_CreateRaidOneWriteDAGFwd(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); - -#endif /* !_RF__RF_DAGFFWR_H_ */ diff --git a/sys/dev/raidframe/rf_dagflags.h b/sys/dev/raidframe/rf_dagflags.h deleted file mode 100644 index b0777bd..0000000 --- a/sys/dev/raidframe/rf_dagflags.h +++ /dev/null @@ -1,68 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_dagflags.h,v 1.3 1999/02/05 00:06:08 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/************************************************************************************** - * - * dagflags.h -- flags that can be given to DoAccess - * I pulled these out of dag.h because routines that call DoAccess may need these flags, - * but certainly do not need the declarations related to the DAG data structures. - * - **************************************************************************************/ - - -#ifndef _RF__RF_DAGFLAGS_H_ -#define _RF__RF_DAGFLAGS_H_ - -/* - * Bitmasks for the "flags" parameter (RF_RaidAccessFlags_t) used - * by DoAccess, SelectAlgorithm, and the DAG creation routines. - * - * If USE_DAG or USE_ASM is specified, neither the DAG nor the ASM - * will be modified, which means that you can't SUPRESS if you - * specify USE_DAG. - */ - -#define RF_DAG_FLAGS_NONE 0 /* no flags */ -#define RF_DAG_SUPPRESS_LOCKS (1<<0) /* supress all stripe locks in - * the DAG */ -#define RF_DAG_RETURN_ASM (1<<1) /* create an ASM and return it - * instead of freeing it */ -#define RF_DAG_RETURN_DAG (1<<2) /* create a DAG and return it - * instead of freeing it */ -#define RF_DAG_NONBLOCKING_IO (1<<3) /* cause DoAccess to be - * non-blocking */ -#define RF_DAG_ACCESS_COMPLETE (1<<4) /* the access is complete */ -#define RF_DAG_DISPATCH_RETURNED (1<<5) /* used to handle the case - * where the dag invokes no - * I/O */ -#define RF_DAG_TEST_ACCESS (1<<6) /* this access came through - * rf_ioctl instead of - * rf_strategy */ - -#endif /* !_RF__RF_DAGFLAGS_H_ */ diff --git a/sys/dev/raidframe/rf_dagfuncs.c b/sys/dev/raidframe/rf_dagfuncs.c deleted file mode 100644 index a1ba8150..0000000 --- a/sys/dev/raidframe/rf_dagfuncs.c +++ /dev/null @@ -1,906 +0,0 @@ -/* $NetBSD: rf_dagfuncs.c,v 1.7 2001/02/03 12:51:10 mrg Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * dagfuncs.c -- DAG node execution routines - * - * Rules: - * 1. Every DAG execution function must eventually cause node->status to - * get set to "good" or "bad", and "FinishNode" to be called. In the - * case of nodes that complete immediately (xor, NullNodeFunc, etc), - * the node execution function can do these two things directly. In - * the case of nodes that have to wait for some event (a disk read to - * complete, a lock to be released, etc) to occur before they can - * complete, this is typically achieved by having whatever module - * is doing the operation call GenericWakeupFunc upon completion. - * 2. DAG execution functions should check the status in the DAG header - * and NOP out their operations if the status is not "enable". However, - * execution functions that release resources must be sure to release - * them even when they NOP out the function that would use them. - * Functions that acquire resources should go ahead and acquire them - * even when they NOP, so that a downstream release node will not have - * to check to find out whether or not the acquire was suppressed. - */ - -#include <sys/param.h> -#if defined(__NetBSD__) -#include <sys/ioctl.h> -#elif defined(__FreeBSD__) -#include <sys/ioccom.h> -#include <sys/filio.h> -#endif - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_layout.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_acctrace.h> -#include <dev/raidframe/rf_diskqueue.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_engine.h> -#include <dev/raidframe/rf_dagutils.h> - -#include <dev/raidframe/rf_kintf.h> - -#if RF_INCLUDE_PARITYLOGGING > 0 -#include <dev/raidframe/rf_paritylog.h> -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ - -int (*rf_DiskReadFunc) (RF_DagNode_t *); -int (*rf_DiskWriteFunc) (RF_DagNode_t *); -int (*rf_DiskReadUndoFunc) (RF_DagNode_t *); -int (*rf_DiskWriteUndoFunc) (RF_DagNode_t *); -int (*rf_DiskUnlockFunc) (RF_DagNode_t *); -int (*rf_DiskUnlockUndoFunc) (RF_DagNode_t *); -int (*rf_RegularXorUndoFunc) (RF_DagNode_t *); -int (*rf_SimpleXorUndoFunc) (RF_DagNode_t *); -int (*rf_RecoveryXorUndoFunc) (RF_DagNode_t *); - -/***************************************************************************************** - * main (only) configuration routine for this module - ****************************************************************************************/ -int -rf_ConfigureDAGFuncs(listp) - RF_ShutdownList_t **listp; -{ - RF_ASSERT(((sizeof(long) == 8) && RF_LONGSHIFT == 3) || ((sizeof(long) == 4) && RF_LONGSHIFT == 2)); - rf_DiskReadFunc = rf_DiskReadFuncForThreads; - rf_DiskReadUndoFunc = rf_DiskUndoFunc; - rf_DiskWriteFunc = rf_DiskWriteFuncForThreads; - rf_DiskWriteUndoFunc = rf_DiskUndoFunc; - rf_DiskUnlockFunc = rf_DiskUnlockFuncForThreads; - rf_DiskUnlockUndoFunc = rf_NullNodeUndoFunc; - rf_RegularXorUndoFunc = rf_NullNodeUndoFunc; - rf_SimpleXorUndoFunc = rf_NullNodeUndoFunc; - rf_RecoveryXorUndoFunc = rf_NullNodeUndoFunc; - return (0); -} - - - -/***************************************************************************************** - * the execution function associated with a terminate node - ****************************************************************************************/ -int -rf_TerminateFunc(node) - RF_DagNode_t *node; -{ - RF_ASSERT(node->dagHdr->numCommits == node->dagHdr->numCommitNodes); - node->status = rf_good; - return (rf_FinishNode(node, RF_THREAD_CONTEXT)); -} - -int -rf_TerminateUndoFunc(node) - RF_DagNode_t *node; -{ - return (0); -} - - -/***************************************************************************************** - * execution functions associated with a mirror node - * - * parameters: - * - * 0 - physical disk addres of data - * 1 - buffer for holding read data - * 2 - parity stripe ID - * 3 - flags - * 4 - physical disk address of mirror (parity) - * - ****************************************************************************************/ - -int -rf_DiskReadMirrorIdleFunc(node) - RF_DagNode_t *node; -{ - /* select the mirror copy with the shortest queue and fill in node - * parameters with physical disk address */ - - rf_SelectMirrorDiskIdle(node); - return (rf_DiskReadFunc(node)); -} - -int -rf_DiskReadMirrorPartitionFunc(node) - RF_DagNode_t *node; -{ - /* select the mirror copy with the shortest queue and fill in node - * parameters with physical disk address */ - - rf_SelectMirrorDiskPartition(node); - return (rf_DiskReadFunc(node)); -} - -int -rf_DiskReadMirrorUndoFunc(node) - RF_DagNode_t *node; -{ - return (0); -} - - - -#if RF_INCLUDE_PARITYLOGGING > 0 -/***************************************************************************************** - * the execution function associated with a parity log update node - ****************************************************************************************/ -int -rf_ParityLogUpdateFunc(node) - RF_DagNode_t *node; -{ - RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; - caddr_t buf = (caddr_t) node->params[1].p; - RF_ParityLogData_t *logData; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - - if (node->dagHdr->status == rf_enable) { - RF_ETIMER_START(timer); - logData = rf_CreateParityLogData(RF_UPDATE, pda, buf, - (RF_Raid_t *) (node->dagHdr->raidPtr), - node->wakeFunc, (void *) node, - node->dagHdr->tracerec, timer); - if (logData) - rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE); - else { - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->plog_us += RF_ETIMER_VAL_US(timer); - (node->wakeFunc) (node, ENOMEM); - } - } - return (0); -} - - -/***************************************************************************************** - * the execution function associated with a parity log overwrite node - ****************************************************************************************/ -int -rf_ParityLogOverwriteFunc(node) - RF_DagNode_t *node; -{ - RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; - caddr_t buf = (caddr_t) node->params[1].p; - RF_ParityLogData_t *logData; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - - if (node->dagHdr->status == rf_enable) { - RF_ETIMER_START(timer); - logData = rf_CreateParityLogData(RF_OVERWRITE, pda, buf, (RF_Raid_t *) (node->dagHdr->raidPtr), - node->wakeFunc, (void *) node, node->dagHdr->tracerec, timer); - if (logData) - rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE); - else { - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->plog_us += RF_ETIMER_VAL_US(timer); - (node->wakeFunc) (node, ENOMEM); - } - } - return (0); -} -#else /* RF_INCLUDE_PARITYLOGGING > 0 */ - -int -rf_ParityLogUpdateFunc(node) - RF_DagNode_t *node; -{ - return (0); -} -int -rf_ParityLogOverwriteFunc(node) - RF_DagNode_t *node; -{ - return (0); -} -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ - -int -rf_ParityLogUpdateUndoFunc(node) - RF_DagNode_t *node; -{ - return (0); -} - -int -rf_ParityLogOverwriteUndoFunc(node) - RF_DagNode_t *node; -{ - return (0); -} -/***************************************************************************************** - * the execution function associated with a NOP node - ****************************************************************************************/ -int -rf_NullNodeFunc(node) - RF_DagNode_t *node; -{ - node->status = rf_good; - return (rf_FinishNode(node, RF_THREAD_CONTEXT)); -} - -int -rf_NullNodeUndoFunc(node) - RF_DagNode_t *node; -{ - node->status = rf_undone; - return (rf_FinishNode(node, RF_THREAD_CONTEXT)); -} - - -/***************************************************************************************** - * the execution function associated with a disk-read node - ****************************************************************************************/ -int -rf_DiskReadFuncForThreads(node) - RF_DagNode_t *node; -{ - RF_DiskQueueData_t *req; - RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; - caddr_t buf = (caddr_t) node->params[1].p; - RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v; - unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v); - unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v); - unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); - unsigned which_ru = RF_EXTRACT_RU(node->params[3].v); - RF_DiskQueueDataFlags_t flags = 0; - RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_READ : RF_IO_TYPE_NOP; - RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; - void *b_proc = NULL; - -#if defined(__NetBSD__) - if (node->dagHdr->bp) - b_proc = (void *) ((RF_Buf_t) node->dagHdr->bp)->b_proc; -#endif - - RF_ASSERT(!(lock && unlock)); - flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0; - flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0; - - req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector, - buf, parityStripeID, which_ru, - (int (*) (void *, int)) node->wakeFunc, - node, NULL, node->dagHdr->tracerec, - (void *) (node->dagHdr->raidPtr), flags, b_proc); - if (!req) { - (node->wakeFunc) (node, ENOMEM); - } else { - node->dagFuncData = (void *) req; - rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority); - } - return (0); -} - - -/***************************************************************************************** - * the execution function associated with a disk-write node - ****************************************************************************************/ -int -rf_DiskWriteFuncForThreads(node) - RF_DagNode_t *node; -{ - RF_DiskQueueData_t *req; - RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; - caddr_t buf = (caddr_t) node->params[1].p; - RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v; - unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v); - unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v); - unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); - unsigned which_ru = RF_EXTRACT_RU(node->params[3].v); - RF_DiskQueueDataFlags_t flags = 0; - RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_WRITE : RF_IO_TYPE_NOP; - RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; - void *b_proc = NULL; - -#if defined(__NetBSD__) - if (node->dagHdr->bp) - b_proc = (void *) ((RF_Buf_t) node->dagHdr->bp)->b_proc; -#endif - - /* normal processing (rollaway or forward recovery) begins here */ - RF_ASSERT(!(lock && unlock)); - flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0; - flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0; - req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector, - buf, parityStripeID, which_ru, - (int (*) (void *, int)) node->wakeFunc, - (void *) node, NULL, - node->dagHdr->tracerec, - (void *) (node->dagHdr->raidPtr), - flags, b_proc); - - if (!req) { - (node->wakeFunc) (node, ENOMEM); - } else { - node->dagFuncData = (void *) req; - rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority); - } - - return (0); -} -/***************************************************************************************** - * the undo function for disk nodes - * Note: this is not a proper undo of a write node, only locks are released. - * old data is not restored to disk! - ****************************************************************************************/ -int -rf_DiskUndoFunc(node) - RF_DagNode_t *node; -{ - RF_DiskQueueData_t *req; - RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; - RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; - - req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, - 0L, 0, NULL, 0L, 0, - (int (*) (void *, int)) node->wakeFunc, - (void *) node, - NULL, node->dagHdr->tracerec, - (void *) (node->dagHdr->raidPtr), - RF_UNLOCK_DISK_QUEUE, NULL); - if (!req) - (node->wakeFunc) (node, ENOMEM); - else { - node->dagFuncData = (void *) req; - rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, RF_IO_NORMAL_PRIORITY); - } - - return (0); -} -/***************************************************************************************** - * the execution function associated with an "unlock disk queue" node - ****************************************************************************************/ -int -rf_DiskUnlockFuncForThreads(node) - RF_DagNode_t *node; -{ - RF_DiskQueueData_t *req; - RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; - RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; - - req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, - 0L, 0, NULL, 0L, 0, - (int (*) (void *, int)) node->wakeFunc, - (void *) node, - NULL, node->dagHdr->tracerec, - (void *) (node->dagHdr->raidPtr), - RF_UNLOCK_DISK_QUEUE, NULL); - if (!req) - (node->wakeFunc) (node, ENOMEM); - else { - node->dagFuncData = (void *) req; - rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, RF_IO_NORMAL_PRIORITY); - } - - return (0); -} -/***************************************************************************************** - * Callback routine for DiskRead and DiskWrite nodes. When the disk op completes, - * the routine is called to set the node status and inform the execution engine that - * the node has fired. - ****************************************************************************************/ -int -rf_GenericWakeupFunc(node, status) - RF_DagNode_t *node; - int status; -{ - switch (node->status) { - case rf_bwd1: - node->status = rf_bwd2; - if (node->dagFuncData) - rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData); - return (rf_DiskWriteFuncForThreads(node)); - break; - case rf_fired: - if (status) - node->status = rf_bad; - else - node->status = rf_good; - break; - case rf_recover: - /* probably should never reach this case */ - if (status) - node->status = rf_panic; - else - node->status = rf_undone; - break; - default: - printf("rf_GenericWakeupFunc:"); - printf("node->status is %d,", node->status); - printf("status is %d \n", status); - RF_PANIC(); - break; - } - if (node->dagFuncData) - rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData); - return (rf_FinishNode(node, RF_INTR_CONTEXT)); -} - - -/***************************************************************************************** - * there are three distinct types of xor nodes - * A "regular xor" is used in the fault-free case where the access spans a complete - * stripe unit. It assumes that the result buffer is one full stripe unit in size, - * and uses the stripe-unit-offset values that it computes from the PDAs to determine - * where within the stripe unit to XOR each argument buffer. - * - * A "simple xor" is used in the fault-free case where the access touches only a portion - * of one (or two, in some cases) stripe unit(s). It assumes that all the argument - * buffers are of the same size and have the same stripe unit offset. - * - * A "recovery xor" is used in the degraded-mode case. It's similar to the regular - * xor function except that it takes the failed PDA as an additional parameter, and - * uses it to determine what portions of the argument buffers need to be xor'd into - * the result buffer, and where in the result buffer they should go. - ****************************************************************************************/ - -/* xor the params together and store the result in the result field. - * assume the result field points to a buffer that is the size of one SU, - * and use the pda params to determine where within the buffer to XOR - * the input buffers. - */ -int -rf_RegularXorFunc(node) - RF_DagNode_t *node; -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - int i, retcode; - - retcode = 0; - if (node->dagHdr->status == rf_enable) { - /* don't do the XOR if the input is the same as the output */ - RF_ETIMER_START(timer); - for (i = 0; i < node->numParams - 1; i += 2) - if (node->params[i + 1].p != node->results[0]) { - retcode = rf_XorIntoBuffer(raidPtr, (RF_PhysDiskAddr_t *) node->params[i].p, - (char *) node->params[i + 1].p, (char *) node->results[0], node->dagHdr->bp); - } - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->xor_us += RF_ETIMER_VAL_US(timer); - } - return (rf_GenericWakeupFunc(node, retcode)); /* call wake func - * explicitly since no - * I/O in this node */ -} -/* xor the inputs into the result buffer, ignoring placement issues */ -int -rf_SimpleXorFunc(node) - RF_DagNode_t *node; -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; - int i, retcode = 0; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - - if (node->dagHdr->status == rf_enable) { - RF_ETIMER_START(timer); - /* don't do the XOR if the input is the same as the output */ - for (i = 0; i < node->numParams - 1; i += 2) - if (node->params[i + 1].p != node->results[0]) { - retcode = rf_bxor((char *)node->params[i + 1].p, - (char *)node->results[0], - rf_RaidAddressToByte(raidPtr, - ((RF_PhysDiskAddr_t *)node->params[i].p)-> - numSector), (RF_Buf_t)node->dagHdr->bp); - } - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->xor_us += RF_ETIMER_VAL_US(timer); - } - return (rf_GenericWakeupFunc(node, retcode)); /* call wake func - * explicitly since no - * I/O in this node */ -} -/* this xor is used by the degraded-mode dag functions to recover lost data. - * the second-to-last parameter is the PDA for the failed portion of the access. - * the code here looks at this PDA and assumes that the xor target buffer is - * equal in size to the number of sectors in the failed PDA. It then uses - * the other PDAs in the parameter list to determine where within the target - * buffer the corresponding data should be xored. - */ -int -rf_RecoveryXorFunc(node) - RF_DagNode_t *node; -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; - RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; - int i, retcode = 0; - RF_PhysDiskAddr_t *pda; - int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); - char *srcbuf, *destbuf; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - - if (node->dagHdr->status == rf_enable) { - RF_ETIMER_START(timer); - for (i = 0; i < node->numParams - 2; i += 2) - if (node->params[i + 1].p != node->results[0]) { - pda = (RF_PhysDiskAddr_t *) node->params[i].p; - srcbuf = (char *) node->params[i + 1].p; - suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); - destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); - retcode = rf_bxor(srcbuf, destbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), node->dagHdr->bp); - } - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->xor_us += RF_ETIMER_VAL_US(timer); - } - return (rf_GenericWakeupFunc(node, retcode)); -} -/***************************************************************************************** - * The next three functions are utilities used by the above xor-execution functions. - ****************************************************************************************/ - - -/* - * this is just a glorified buffer xor. targbuf points to a buffer that is one full stripe unit - * in size. srcbuf points to a buffer that may be less than 1 SU, but never more. When the - * access described by pda is one SU in size (which by implication means it's SU-aligned), - * all that happens is (targbuf) <- (srcbuf ^ targbuf). When the access is less than one - * SU in size the XOR occurs on only the portion of targbuf identified in the pda. - */ - -int -rf_XorIntoBuffer(raidPtr, pda, srcbuf, targbuf, bp) - RF_Raid_t *raidPtr; - RF_PhysDiskAddr_t *pda; - char *srcbuf; - char *targbuf; - void *bp; -{ - char *targptr; - int sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; - int SUOffset = pda->startSector % sectPerSU; - int length, retcode = 0; - - RF_ASSERT(pda->numSector <= sectPerSU); - - targptr = targbuf + rf_RaidAddressToByte(raidPtr, SUOffset); - length = rf_RaidAddressToByte(raidPtr, pda->numSector); - retcode = rf_bxor(srcbuf, targptr, length, bp); - return (retcode); -} -/* it really should be the case that the buffer pointers (returned by malloc) - * are aligned to the natural word size of the machine, so this is the only - * case we optimize for. The length should always be a multiple of the sector - * size, so there should be no problem with leftover bytes at the end. - */ -int -rf_bxor(src, dest, len, bp) - char *src; - char *dest; - int len; - void *bp; -{ - unsigned mask = sizeof(long) - 1, retcode = 0; - - if (!(((unsigned long) src) & mask) && !(((unsigned long) dest) & mask) && !(len & mask)) { - retcode = rf_longword_bxor((unsigned long *) src, (unsigned long *) dest, len >> RF_LONGSHIFT, bp); - } else { - RF_ASSERT(0); - } - return (retcode); -} -/* map a user buffer into kernel space, if necessary */ -#define REMAP_VA(_bp,x,y) (y) = (x) - -/* When XORing in kernel mode, we need to map each user page to kernel space before we can access it. - * We don't want to assume anything about which input buffers are in kernel/user - * space, nor about their alignment, so in each loop we compute the maximum number - * of bytes that we can xor without crossing any page boundaries, and do only this many - * bytes before the next remap. - */ -int -rf_longword_bxor(src, dest, len, bp) - unsigned long *src; - unsigned long *dest; - int len; /* longwords */ - void *bp; -{ - unsigned long *end = src + len; - unsigned long d0, d1, d2, d3, s0, s1, s2, s3; /* temps */ - unsigned long *pg_src, *pg_dest; /* per-page source/dest - * pointers */ - int longs_this_time;/* # longwords to xor in the current iteration */ - - REMAP_VA(bp, src, pg_src); - REMAP_VA(bp, dest, pg_dest); - if (!pg_src || !pg_dest) - return (EFAULT); - - while (len >= 4) { - longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(pg_src), RF_BLIP(pg_dest)) >> RF_LONGSHIFT); /* note len in longwords */ - src += longs_this_time; - dest += longs_this_time; - len -= longs_this_time; - while (longs_this_time >= 4) { - d0 = pg_dest[0]; - d1 = pg_dest[1]; - d2 = pg_dest[2]; - d3 = pg_dest[3]; - s0 = pg_src[0]; - s1 = pg_src[1]; - s2 = pg_src[2]; - s3 = pg_src[3]; - pg_dest[0] = d0 ^ s0; - pg_dest[1] = d1 ^ s1; - pg_dest[2] = d2 ^ s2; - pg_dest[3] = d3 ^ s3; - pg_src += 4; - pg_dest += 4; - longs_this_time -= 4; - } - while (longs_this_time > 0) { /* cannot cross any page - * boundaries here */ - *pg_dest++ ^= *pg_src++; - longs_this_time--; - } - - /* either we're done, or we've reached a page boundary on one - * (or possibly both) of the pointers */ - if (len) { - if (RF_PAGE_ALIGNED(src)) - REMAP_VA(bp, src, pg_src); - if (RF_PAGE_ALIGNED(dest)) - REMAP_VA(bp, dest, pg_dest); - if (!pg_src || !pg_dest) - return (EFAULT); - } - } - while (src < end) { - *pg_dest++ ^= *pg_src++; - src++; - dest++; - len--; - if (RF_PAGE_ALIGNED(src)) - REMAP_VA(bp, src, pg_src); - if (RF_PAGE_ALIGNED(dest)) - REMAP_VA(bp, dest, pg_dest); - } - RF_ASSERT(len == 0); - return (0); -} - - -/* - dst = a ^ b ^ c; - a may equal dst - see comment above longword_bxor -*/ -int -rf_longword_bxor3(dst, a, b, c, len, bp) - unsigned long *dst; - unsigned long *a; - unsigned long *b; - unsigned long *c; - int len; /* length in longwords */ - void *bp; -{ - unsigned long a0, a1, a2, a3, b0, b1, b2, b3; - unsigned long *pg_a, *pg_b, *pg_c, *pg_dst; /* per-page source/dest - * pointers */ - int longs_this_time;/* # longs to xor in the current iteration */ - char dst_is_a = 0; - - REMAP_VA(bp, a, pg_a); - REMAP_VA(bp, b, pg_b); - REMAP_VA(bp, c, pg_c); - if (a == dst) { - pg_dst = pg_a; - dst_is_a = 1; - } else { - REMAP_VA(bp, dst, pg_dst); - } - - /* align dest to cache line. Can't cross a pg boundary on dst here. */ - while ((((unsigned long) pg_dst) & 0x1f)) { - *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; - dst++; - a++; - b++; - c++; - if (RF_PAGE_ALIGNED(a)) { - REMAP_VA(bp, a, pg_a); - if (!pg_a) - return (EFAULT); - } - if (RF_PAGE_ALIGNED(b)) { - REMAP_VA(bp, a, pg_b); - if (!pg_b) - return (EFAULT); - } - if (RF_PAGE_ALIGNED(c)) { - REMAP_VA(bp, a, pg_c); - if (!pg_c) - return (EFAULT); - } - len--; - } - - while (len > 4) { - longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(a), RF_MIN(RF_BLIP(b), RF_MIN(RF_BLIP(c), RF_BLIP(dst)))) >> RF_LONGSHIFT); - a += longs_this_time; - b += longs_this_time; - c += longs_this_time; - dst += longs_this_time; - len -= longs_this_time; - while (longs_this_time >= 4) { - a0 = pg_a[0]; - longs_this_time -= 4; - - a1 = pg_a[1]; - a2 = pg_a[2]; - - a3 = pg_a[3]; - pg_a += 4; - - b0 = pg_b[0]; - b1 = pg_b[1]; - - b2 = pg_b[2]; - b3 = pg_b[3]; - /* start dual issue */ - a0 ^= b0; - b0 = pg_c[0]; - - pg_b += 4; - a1 ^= b1; - - a2 ^= b2; - a3 ^= b3; - - b1 = pg_c[1]; - a0 ^= b0; - - b2 = pg_c[2]; - a1 ^= b1; - - b3 = pg_c[3]; - a2 ^= b2; - - pg_dst[0] = a0; - a3 ^= b3; - pg_dst[1] = a1; - pg_c += 4; - pg_dst[2] = a2; - pg_dst[3] = a3; - pg_dst += 4; - } - while (longs_this_time > 0) { /* cannot cross any page - * boundaries here */ - *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; - longs_this_time--; - } - - if (len) { - if (RF_PAGE_ALIGNED(a)) { - REMAP_VA(bp, a, pg_a); - if (!pg_a) - return (EFAULT); - if (dst_is_a) - pg_dst = pg_a; - } - if (RF_PAGE_ALIGNED(b)) { - REMAP_VA(bp, b, pg_b); - if (!pg_b) - return (EFAULT); - } - if (RF_PAGE_ALIGNED(c)) { - REMAP_VA(bp, c, pg_c); - if (!pg_c) - return (EFAULT); - } - if (!dst_is_a) - if (RF_PAGE_ALIGNED(dst)) { - REMAP_VA(bp, dst, pg_dst); - if (!pg_dst) - return (EFAULT); - } - } - } - while (len) { - *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; - dst++; - a++; - b++; - c++; - if (RF_PAGE_ALIGNED(a)) { - REMAP_VA(bp, a, pg_a); - if (!pg_a) - return (EFAULT); - if (dst_is_a) - pg_dst = pg_a; - } - if (RF_PAGE_ALIGNED(b)) { - REMAP_VA(bp, b, pg_b); - if (!pg_b) - return (EFAULT); - } - if (RF_PAGE_ALIGNED(c)) { - REMAP_VA(bp, c, pg_c); - if (!pg_c) - return (EFAULT); - } - if (!dst_is_a) - if (RF_PAGE_ALIGNED(dst)) { - REMAP_VA(bp, dst, pg_dst); - if (!pg_dst) - return (EFAULT); - } - len--; - } - return (0); -} - -int -rf_bxor3(dst, a, b, c, len, bp) - unsigned char *dst; - unsigned char *a; - unsigned char *b; - unsigned char *c; - unsigned long len; - void *bp; -{ - RF_ASSERT(((RF_UL(dst) | RF_UL(a) | RF_UL(b) | RF_UL(c) | len) & 0x7) == 0); - - return (rf_longword_bxor3((unsigned long *) dst, (unsigned long *) a, - (unsigned long *) b, (unsigned long *) c, len >> RF_LONGSHIFT, bp)); -} diff --git a/sys/dev/raidframe/rf_dagfuncs.h b/sys/dev/raidframe/rf_dagfuncs.h deleted file mode 100644 index da7e8b2..0000000 --- a/sys/dev/raidframe/rf_dagfuncs.h +++ /dev/null @@ -1,90 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_dagfuncs.h,v 1.4 2000/03/30 13:39:07 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, William V. Courtright II, Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************************** - * - * dagfuncs.h -- header file for DAG node execution routines - * - ****************************************************************************************/ - -#ifndef _RF__RF_DAGFUNCS_H_ -#define _RF__RF_DAGFUNCS_H_ - -int rf_ConfigureDAGFuncs(RF_ShutdownList_t ** listp); -int rf_TerminateFunc(RF_DagNode_t * node); -int rf_TerminateUndoFunc(RF_DagNode_t * node); -int rf_DiskReadMirrorIdleFunc(RF_DagNode_t * node); -int rf_DiskReadMirrorPartitionFunc(RF_DagNode_t * node); -int rf_DiskReadMirrorUndoFunc(RF_DagNode_t * node); -int rf_ParityLogUpdateFunc(RF_DagNode_t * node); -int rf_ParityLogOverwriteFunc(RF_DagNode_t * node); -int rf_ParityLogUpdateUndoFunc(RF_DagNode_t * node); -int rf_ParityLogOverwriteUndoFunc(RF_DagNode_t * node); -int rf_NullNodeFunc(RF_DagNode_t * node); -int rf_NullNodeUndoFunc(RF_DagNode_t * node); -int rf_DiskReadFuncForThreads(RF_DagNode_t * node); -int rf_DiskWriteFuncForThreads(RF_DagNode_t * node); -int rf_DiskUndoFunc(RF_DagNode_t * node); -int rf_DiskUnlockFuncForThreads(RF_DagNode_t * node); -int rf_GenericWakeupFunc(RF_DagNode_t * node, int status); -int rf_RegularXorFunc(RF_DagNode_t * node); -int rf_SimpleXorFunc(RF_DagNode_t * node); -int rf_RecoveryXorFunc(RF_DagNode_t * node); -int -rf_XorIntoBuffer(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * pda, char *srcbuf, - char *targbuf, void *bp); -int rf_bxor(char *src, char *dest, int len, void *bp); -int -rf_longword_bxor(unsigned long *src, unsigned long *dest, int len, void *bp); -int -rf_longword_bxor3(unsigned long *dest, unsigned long *a, unsigned long *b, - unsigned long *c, int len, void *bp); -int -rf_bxor3(unsigned char *dst, unsigned char *a, unsigned char *b, - unsigned char *c, unsigned long len, void *bp); - -/* function ptrs defined in ConfigureDAGFuncs() */ -extern int (*rf_DiskReadFunc) (RF_DagNode_t *); -extern int (*rf_DiskWriteFunc) (RF_DagNode_t *); -extern int (*rf_DiskReadUndoFunc) (RF_DagNode_t *); -extern int (*rf_DiskWriteUndoFunc) (RF_DagNode_t *); -extern int (*rf_DiskUnlockFunc) (RF_DagNode_t *); -extern int (*rf_DiskUnlockUndoFunc) (RF_DagNode_t *); -extern int (*rf_SimpleXorUndoFunc) (RF_DagNode_t *); -extern int (*rf_RegularXorUndoFunc) (RF_DagNode_t *); -extern int (*rf_RecoveryXorUndoFunc) (RF_DagNode_t *); - -/* macros for manipulating the param[3] in a read or write node */ -#define RF_CREATE_PARAM3(pri, lk, unlk, wru) (((RF_uint64)(((wru&0xFFFFFF)<<8)|((lk)?0x10:0)|((unlk)?0x20:0)|((pri)&0xF)) )) -#define RF_EXTRACT_PRIORITY(_x_) ((((unsigned) ((unsigned long)(_x_))) >> 0) & 0x0F) -#define RF_EXTRACT_LOCK_FLAG(_x_) ((((unsigned) ((unsigned long)(_x_))) >> 4) & 0x1) -#define RF_EXTRACT_UNLOCK_FLAG(_x_) ((((unsigned) ((unsigned long)(_x_))) >> 5) & 0x1) -#define RF_EXTRACT_RU(_x_) ((((unsigned) ((unsigned long)(_x_))) >> 8) & 0xFFFFFF) - -#endif /* !_RF__RF_DAGFUNCS_H_ */ diff --git a/sys/dev/raidframe/rf_dagutils.c b/sys/dev/raidframe/rf_dagutils.c deleted file mode 100644 index c961870..0000000 --- a/sys/dev/raidframe/rf_dagutils.c +++ /dev/null @@ -1,1299 +0,0 @@ -/* $NetBSD: rf_dagutils.c,v 1.6 1999/12/09 02:26:09 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Authors: Mark Holland, William V. Courtright II, Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/****************************************************************************** - * - * rf_dagutils.c -- utility routines for manipulating dags - * - *****************************************************************************/ - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_freelist.h> -#include <dev/raidframe/rf_map.h> -#include <dev/raidframe/rf_shutdown.h> - -#define SNUM_DIFF(_a_,_b_) (((_a_)>(_b_))?((_a_)-(_b_)):((_b_)-(_a_))) - -RF_RedFuncs_t rf_xorFuncs = { - rf_RegularXorFunc, "Reg Xr", -rf_SimpleXorFunc, "Simple Xr"}; - -RF_RedFuncs_t rf_xorRecoveryFuncs = { - rf_RecoveryXorFunc, "Recovery Xr", -rf_RecoveryXorFunc, "Recovery Xr"}; - -static void rf_RecurPrintDAG(RF_DagNode_t *, int, int); -static void rf_PrintDAG(RF_DagHeader_t *); -static int -rf_ValidateBranch(RF_DagNode_t *, int *, int *, - RF_DagNode_t **, int); -static void rf_ValidateBranchVisitedBits(RF_DagNode_t *, int, int); -static void rf_ValidateVisitedBits(RF_DagHeader_t *); - -/****************************************************************************** - * - * InitNode - initialize a dag node - * - * the size of the propList array is always the same as that of the - * successors array. - * - *****************************************************************************/ -void -rf_InitNode( - RF_DagNode_t * node, - RF_NodeStatus_t initstatus, - int commit, - int (*doFunc) (RF_DagNode_t * node), - int (*undoFunc) (RF_DagNode_t * node), - int (*wakeFunc) (RF_DagNode_t * node, int status), - int nSucc, - int nAnte, - int nParam, - int nResult, - RF_DagHeader_t * hdr, - char *name, - RF_AllocListElem_t * alist) -{ - void **ptrs; - int nptrs; - - if (nAnte > RF_MAX_ANTECEDENTS) - RF_PANIC(); - node->status = initstatus; - node->commitNode = commit; - node->doFunc = doFunc; - node->undoFunc = undoFunc; - node->wakeFunc = wakeFunc; - node->numParams = nParam; - node->numResults = nResult; - node->numAntecedents = nAnte; - node->numAntDone = 0; - node->next = NULL; - node->numSuccedents = nSucc; - node->name = name; - node->dagHdr = hdr; - node->visited = 0; - - /* allocate all the pointers with one call to malloc */ - nptrs = nSucc + nAnte + nResult + nSucc; - - if (nptrs <= RF_DAG_PTRCACHESIZE) { - /* - * The dag_ptrs field of the node is basically some scribble - * space to be used here. We could get rid of it, and always - * allocate the range of pointers, but that's expensive. So, - * we pick a "common case" size for the pointer cache. Hopefully, - * we'll find that: - * (1) Generally, nptrs doesn't exceed RF_DAG_PTRCACHESIZE by - * only a little bit (least efficient case) - * (2) Generally, ntprs isn't a lot less than RF_DAG_PTRCACHESIZE - * (wasted memory) - */ - ptrs = (void **) node->dag_ptrs; - } else { - RF_CallocAndAdd(ptrs, nptrs, sizeof(void *), (void **), alist); - } - node->succedents = (nSucc) ? (RF_DagNode_t **) ptrs : NULL; - node->antecedents = (nAnte) ? (RF_DagNode_t **) (ptrs + nSucc) : NULL; - node->results = (nResult) ? (void **) (ptrs + nSucc + nAnte) : NULL; - node->propList = (nSucc) ? (RF_PropHeader_t **) (ptrs + nSucc + nAnte + nResult) : NULL; - - if (nParam) { - if (nParam <= RF_DAG_PARAMCACHESIZE) { - node->params = (RF_DagParam_t *) node->dag_params; - } else { - RF_CallocAndAdd(node->params, nParam, sizeof(RF_DagParam_t), (RF_DagParam_t *), alist); - } - } else { - node->params = NULL; - } -} - - - -/****************************************************************************** - * - * allocation and deallocation routines - * - *****************************************************************************/ - -void -rf_FreeDAG(dag_h) - RF_DagHeader_t *dag_h; -{ - RF_AccessStripeMapHeader_t *asmap, *t_asmap; - RF_DagHeader_t *nextDag; - int i; - - while (dag_h) { - nextDag = dag_h->next; - for (i = 0; dag_h->memChunk[i] && i < RF_MAXCHUNKS; i++) { - /* release mem chunks */ - rf_ReleaseMemChunk(dag_h->memChunk[i]); - dag_h->memChunk[i] = NULL; - } - - RF_ASSERT(i == dag_h->chunkIndex); - if (dag_h->xtraChunkCnt > 0) { - /* free xtraMemChunks */ - for (i = 0; dag_h->xtraMemChunk[i] && i < dag_h->xtraChunkIndex; i++) { - rf_ReleaseMemChunk(dag_h->xtraMemChunk[i]); - dag_h->xtraMemChunk[i] = NULL; - } - RF_ASSERT(i == dag_h->xtraChunkIndex); - /* free ptrs to xtraMemChunks */ - RF_Free(dag_h->xtraMemChunk, dag_h->xtraChunkCnt * sizeof(RF_ChunkDesc_t *)); - } - rf_FreeAllocList(dag_h->allocList); - for (asmap = dag_h->asmList; asmap;) { - t_asmap = asmap; - asmap = asmap->next; - rf_FreeAccessStripeMap(t_asmap); - } - rf_FreeDAGHeader(dag_h); - dag_h = nextDag; - } -} - -RF_PropHeader_t * -rf_MakePropListEntry( - RF_DagHeader_t * dag_h, - int resultNum, - int paramNum, - RF_PropHeader_t * next, - RF_AllocListElem_t * allocList) -{ - RF_PropHeader_t *p; - - RF_CallocAndAdd(p, 1, sizeof(RF_PropHeader_t), - (RF_PropHeader_t *), allocList); - p->resultNum = resultNum; - p->paramNum = paramNum; - p->next = next; - return (p); -} - -static RF_FreeList_t *rf_dagh_freelist; - -#define RF_MAX_FREE_DAGH 128 -#define RF_DAGH_INC 16 -#define RF_DAGH_INITIAL 32 - -static void rf_ShutdownDAGs(void *); -static void -rf_ShutdownDAGs(ignored) - void *ignored; -{ - RF_FREELIST_DESTROY(rf_dagh_freelist, next, (RF_DagHeader_t *)); -} - -int -rf_ConfigureDAGs(listp) - RF_ShutdownList_t **listp; -{ - int rc; - - RF_FREELIST_CREATE(rf_dagh_freelist, RF_MAX_FREE_DAGH, - RF_DAGH_INC, sizeof(RF_DagHeader_t)); - if (rf_dagh_freelist == NULL) - return (ENOMEM); - rc = rf_ShutdownCreate(listp, rf_ShutdownDAGs, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - rf_ShutdownDAGs(NULL); - return (rc); - } - RF_FREELIST_PRIME(rf_dagh_freelist, RF_DAGH_INITIAL, next, - (RF_DagHeader_t *)); - return (0); -} - -RF_DagHeader_t * -rf_AllocDAGHeader() -{ - RF_DagHeader_t *dh; - - RF_FREELIST_GET(rf_dagh_freelist, dh, next, (RF_DagHeader_t *)); - if (dh) { - bzero((char *) dh, sizeof(RF_DagHeader_t)); - } - return (dh); -} - -void -rf_FreeDAGHeader(RF_DagHeader_t * dh) -{ - RF_FREELIST_FREE(rf_dagh_freelist, dh, next); -} -/* allocates a buffer big enough to hold the data described by pda */ -void * -rf_AllocBuffer( - RF_Raid_t * raidPtr, - RF_DagHeader_t * dag_h, - RF_PhysDiskAddr_t * pda, - RF_AllocListElem_t * allocList) -{ - char *p; - - RF_MallocAndAdd(p, pda->numSector << raidPtr->logBytesPerSector, - (char *), allocList); - return ((void *) p); -} -/****************************************************************************** - * - * debug routines - * - *****************************************************************************/ - -char * -rf_NodeStatusString(RF_DagNode_t * node) -{ - switch (node->status) { - case rf_wait:return ("wait"); - case rf_fired: - return ("fired"); - case rf_good: - return ("good"); - case rf_bad: - return ("bad"); - default: - return ("?"); - } -} - -void -rf_PrintNodeInfoString(RF_DagNode_t * node) -{ - RF_PhysDiskAddr_t *pda; - int (*df) (RF_DagNode_t *) = node->doFunc; - int i, lk, unlk; - void *bufPtr; - - if ((df == rf_DiskReadFunc) || (df == rf_DiskWriteFunc) - || (df == rf_DiskReadMirrorIdleFunc) - || (df == rf_DiskReadMirrorPartitionFunc)) { - pda = (RF_PhysDiskAddr_t *) node->params[0].p; - bufPtr = (void *) node->params[1].p; - lk = RF_EXTRACT_LOCK_FLAG(node->params[3].v); - unlk = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); - RF_ASSERT(!(lk && unlk)); - printf("r %d c %d offs %ld nsect %d buf 0x%lx %s\n", pda->row, pda->col, - (long) pda->startSector, (int) pda->numSector, (long) bufPtr, - (lk) ? "LOCK" : ((unlk) ? "UNLK" : " ")); - return; - } - if (df == rf_DiskUnlockFunc) { - pda = (RF_PhysDiskAddr_t *) node->params[0].p; - lk = RF_EXTRACT_LOCK_FLAG(node->params[3].v); - unlk = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); - RF_ASSERT(!(lk && unlk)); - printf("r %d c %d %s\n", pda->row, pda->col, - (lk) ? "LOCK" : ((unlk) ? "UNLK" : "nop")); - return; - } - if ((df == rf_SimpleXorFunc) || (df == rf_RegularXorFunc) - || (df == rf_RecoveryXorFunc)) { - printf("result buf 0x%lx\n", (long) node->results[0]); - for (i = 0; i < node->numParams - 1; i += 2) { - pda = (RF_PhysDiskAddr_t *) node->params[i].p; - bufPtr = (RF_PhysDiskAddr_t *) node->params[i + 1].p; - printf(" buf 0x%lx r%d c%d offs %ld nsect %d\n", - (long) bufPtr, pda->row, pda->col, - (long) pda->startSector, (int) pda->numSector); - } - return; - } -#if RF_INCLUDE_PARITYLOGGING > 0 - if (df == rf_ParityLogOverwriteFunc || df == rf_ParityLogUpdateFunc) { - for (i = 0; i < node->numParams - 1; i += 2) { - pda = (RF_PhysDiskAddr_t *) node->params[i].p; - bufPtr = (RF_PhysDiskAddr_t *) node->params[i + 1].p; - printf(" r%d c%d offs %ld nsect %d buf 0x%lx\n", - pda->row, pda->col, (long) pda->startSector, - (int) pda->numSector, (long) bufPtr); - } - return; - } -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ - - if ((df == rf_TerminateFunc) || (df == rf_NullNodeFunc)) { - printf("\n"); - return; - } - printf("?\n"); -} - -static void -rf_RecurPrintDAG(node, depth, unvisited) - RF_DagNode_t *node; - int depth; - int unvisited; -{ - char *anttype; - int i; - - node->visited = (unvisited) ? 0 : 1; - printf("(%d) %d C%d %s: %s,s%d %d/%d,a%d/%d,p%d,r%d S{", depth, - node->nodeNum, node->commitNode, node->name, rf_NodeStatusString(node), - node->numSuccedents, node->numSuccFired, node->numSuccDone, - node->numAntecedents, node->numAntDone, node->numParams, node->numResults); - for (i = 0; i < node->numSuccedents; i++) { - printf("%d%s", node->succedents[i]->nodeNum, - ((i == node->numSuccedents - 1) ? "\0" : " ")); - } - printf("} A{"); - for (i = 0; i < node->numAntecedents; i++) { - switch (node->antType[i]) { - case rf_trueData: - anttype = "T"; - break; - case rf_antiData: - anttype = "A"; - break; - case rf_outputData: - anttype = "O"; - break; - case rf_control: - anttype = "C"; - break; - default: - anttype = "?"; - break; - } - printf("%d(%s)%s", node->antecedents[i]->nodeNum, anttype, (i == node->numAntecedents - 1) ? "\0" : " "); - } - printf("}; "); - rf_PrintNodeInfoString(node); - for (i = 0; i < node->numSuccedents; i++) { - if (node->succedents[i]->visited == unvisited) - rf_RecurPrintDAG(node->succedents[i], depth + 1, unvisited); - } -} - -static void -rf_PrintDAG(dag_h) - RF_DagHeader_t *dag_h; -{ - int unvisited, i; - char *status; - - /* set dag status */ - switch (dag_h->status) { - case rf_enable: - status = "enable"; - break; - case rf_rollForward: - status = "rollForward"; - break; - case rf_rollBackward: - status = "rollBackward"; - break; - default: - status = "illegal!"; - break; - } - /* find out if visited bits are currently set or clear */ - unvisited = dag_h->succedents[0]->visited; - - printf("DAG type: %s\n", dag_h->creator); - printf("format is (depth) num commit type: status,nSucc nSuccFired/nSuccDone,nAnte/nAnteDone,nParam,nResult S{x} A{x(type)}; info\n"); - printf("(0) %d Hdr: %s, s%d, (commit %d/%d) S{", dag_h->nodeNum, - status, dag_h->numSuccedents, dag_h->numCommitNodes, dag_h->numCommits); - for (i = 0; i < dag_h->numSuccedents; i++) { - printf("%d%s", dag_h->succedents[i]->nodeNum, - ((i == dag_h->numSuccedents - 1) ? "\0" : " ")); - } - printf("};\n"); - for (i = 0; i < dag_h->numSuccedents; i++) { - if (dag_h->succedents[i]->visited == unvisited) - rf_RecurPrintDAG(dag_h->succedents[i], 1, unvisited); - } -} -/* assigns node numbers */ -int -rf_AssignNodeNums(RF_DagHeader_t * dag_h) -{ - int unvisited, i, nnum; - RF_DagNode_t *node; - - nnum = 0; - unvisited = dag_h->succedents[0]->visited; - - dag_h->nodeNum = nnum++; - for (i = 0; i < dag_h->numSuccedents; i++) { - node = dag_h->succedents[i]; - if (node->visited == unvisited) { - nnum = rf_RecurAssignNodeNums(dag_h->succedents[i], nnum, unvisited); - } - } - return (nnum); -} - -int -rf_RecurAssignNodeNums(node, num, unvisited) - RF_DagNode_t *node; - int num; - int unvisited; -{ - int i; - - node->visited = (unvisited) ? 0 : 1; - - node->nodeNum = num++; - for (i = 0; i < node->numSuccedents; i++) { - if (node->succedents[i]->visited == unvisited) { - num = rf_RecurAssignNodeNums(node->succedents[i], num, unvisited); - } - } - return (num); -} -/* set the header pointers in each node to "newptr" */ -void -rf_ResetDAGHeaderPointers(dag_h, newptr) - RF_DagHeader_t *dag_h; - RF_DagHeader_t *newptr; -{ - int i; - for (i = 0; i < dag_h->numSuccedents; i++) - if (dag_h->succedents[i]->dagHdr != newptr) - rf_RecurResetDAGHeaderPointers(dag_h->succedents[i], newptr); -} - -void -rf_RecurResetDAGHeaderPointers(node, newptr) - RF_DagNode_t *node; - RF_DagHeader_t *newptr; -{ - int i; - node->dagHdr = newptr; - for (i = 0; i < node->numSuccedents; i++) - if (node->succedents[i]->dagHdr != newptr) - rf_RecurResetDAGHeaderPointers(node->succedents[i], newptr); -} - - -void -rf_PrintDAGList(RF_DagHeader_t * dag_h) -{ - int i = 0; - - for (; dag_h; dag_h = dag_h->next) { - rf_AssignNodeNums(dag_h); - printf("\n\nDAG %d IN LIST:\n", i++); - rf_PrintDAG(dag_h); - } -} - -static int -rf_ValidateBranch(node, scount, acount, nodes, unvisited) - RF_DagNode_t *node; - int *scount; - int *acount; - RF_DagNode_t **nodes; - int unvisited; -{ - int i, retcode = 0; - - /* construct an array of node pointers indexed by node num */ - node->visited = (unvisited) ? 0 : 1; - nodes[node->nodeNum] = node; - - if (node->next != NULL) { - printf("INVALID DAG: next pointer in node is not NULL\n"); - retcode = 1; - } - if (node->status != rf_wait) { - printf("INVALID DAG: Node status is not wait\n"); - retcode = 1; - } - if (node->numAntDone != 0) { - printf("INVALID DAG: numAntDone is not zero\n"); - retcode = 1; - } - if (node->doFunc == rf_TerminateFunc) { - if (node->numSuccedents != 0) { - printf("INVALID DAG: Terminator node has succedents\n"); - retcode = 1; - } - } else { - if (node->numSuccedents == 0) { - printf("INVALID DAG: Non-terminator node has no succedents\n"); - retcode = 1; - } - } - for (i = 0; i < node->numSuccedents; i++) { - if (!node->succedents[i]) { - printf("INVALID DAG: succedent %d of node %s is NULL\n", i, node->name); - retcode = 1; - } - scount[node->succedents[i]->nodeNum]++; - } - for (i = 0; i < node->numAntecedents; i++) { - if (!node->antecedents[i]) { - printf("INVALID DAG: antecedent %d of node %s is NULL\n", i, node->name); - retcode = 1; - } - acount[node->antecedents[i]->nodeNum]++; - } - for (i = 0; i < node->numSuccedents; i++) { - if (node->succedents[i]->visited == unvisited) { - if (rf_ValidateBranch(node->succedents[i], scount, - acount, nodes, unvisited)) { - retcode = 1; - } - } - } - return (retcode); -} - -static void -rf_ValidateBranchVisitedBits(node, unvisited, rl) - RF_DagNode_t *node; - int unvisited; - int rl; -{ - int i; - - RF_ASSERT(node->visited == unvisited); - for (i = 0; i < node->numSuccedents; i++) { - if (node->succedents[i] == NULL) { - printf("node=%lx node->succedents[%d] is NULL\n", (long) node, i); - RF_ASSERT(0); - } - rf_ValidateBranchVisitedBits(node->succedents[i], unvisited, rl + 1); - } -} -/* NOTE: never call this on a big dag, because it is exponential - * in execution time - */ -static void -rf_ValidateVisitedBits(dag) - RF_DagHeader_t *dag; -{ - int i, unvisited; - - unvisited = dag->succedents[0]->visited; - - for (i = 0; i < dag->numSuccedents; i++) { - if (dag->succedents[i] == NULL) { - printf("dag=%lx dag->succedents[%d] is NULL\n", (long) dag, i); - RF_ASSERT(0); - } - rf_ValidateBranchVisitedBits(dag->succedents[i], unvisited, 0); - } -} -/* validate a DAG. _at entry_ verify that: - * -- numNodesCompleted is zero - * -- node queue is null - * -- dag status is rf_enable - * -- next pointer is null on every node - * -- all nodes have status wait - * -- numAntDone is zero in all nodes - * -- terminator node has zero successors - * -- no other node besides terminator has zero successors - * -- no successor or antecedent pointer in a node is NULL - * -- number of times that each node appears as a successor of another node - * is equal to the antecedent count on that node - * -- number of times that each node appears as an antecedent of another node - * is equal to the succedent count on that node - * -- what else? - */ -int -rf_ValidateDAG(dag_h) - RF_DagHeader_t *dag_h; -{ - int i, nodecount; - int *scount, *acount;/* per-node successor and antecedent counts */ - RF_DagNode_t **nodes; /* array of ptrs to nodes in dag */ - int retcode = 0; - int unvisited; - int commitNodeCount = 0; - - if (rf_validateVisitedDebug) - rf_ValidateVisitedBits(dag_h); - - if (dag_h->numNodesCompleted != 0) { - printf("INVALID DAG: num nodes completed is %d, should be 0\n", dag_h->numNodesCompleted); - retcode = 1; - goto validate_dag_bad; - } - if (dag_h->status != rf_enable) { - printf("INVALID DAG: not enabled\n"); - retcode = 1; - goto validate_dag_bad; - } - if (dag_h->numCommits != 0) { - printf("INVALID DAG: numCommits != 0 (%d)\n", dag_h->numCommits); - retcode = 1; - goto validate_dag_bad; - } - if (dag_h->numSuccedents != 1) { - /* currently, all dags must have only one succedent */ - printf("INVALID DAG: numSuccedents !1 (%d)\n", dag_h->numSuccedents); - retcode = 1; - goto validate_dag_bad; - } - nodecount = rf_AssignNodeNums(dag_h); - - unvisited = dag_h->succedents[0]->visited; - - RF_Calloc(scount, nodecount, sizeof(int), (int *)); - RF_Calloc(acount, nodecount, sizeof(int), (int *)); - RF_Calloc(nodes, nodecount, sizeof(RF_DagNode_t *), (RF_DagNode_t **)); - for (i = 0; i < dag_h->numSuccedents; i++) { - if ((dag_h->succedents[i]->visited == unvisited) - && rf_ValidateBranch(dag_h->succedents[i], scount, - acount, nodes, unvisited)) { - retcode = 1; - } - } - /* start at 1 to skip the header node */ - for (i = 1; i < nodecount; i++) { - if (nodes[i]->commitNode) - commitNodeCount++; - if (nodes[i]->doFunc == NULL) { - printf("INVALID DAG: node %s has an undefined doFunc\n", nodes[i]->name); - retcode = 1; - goto validate_dag_out; - } - if (nodes[i]->undoFunc == NULL) { - printf("INVALID DAG: node %s has an undefined doFunc\n", nodes[i]->name); - retcode = 1; - goto validate_dag_out; - } - if (nodes[i]->numAntecedents != scount[nodes[i]->nodeNum]) { - printf("INVALID DAG: node %s has %d antecedents but appears as a succedent %d times\n", - nodes[i]->name, nodes[i]->numAntecedents, scount[nodes[i]->nodeNum]); - retcode = 1; - goto validate_dag_out; - } - if (nodes[i]->numSuccedents != acount[nodes[i]->nodeNum]) { - printf("INVALID DAG: node %s has %d succedents but appears as an antecedent %d times\n", - nodes[i]->name, nodes[i]->numSuccedents, acount[nodes[i]->nodeNum]); - retcode = 1; - goto validate_dag_out; - } - } - - if (dag_h->numCommitNodes != commitNodeCount) { - printf("INVALID DAG: incorrect commit node count. hdr->numCommitNodes (%d) found (%d) commit nodes in graph\n", - dag_h->numCommitNodes, commitNodeCount); - retcode = 1; - goto validate_dag_out; - } -validate_dag_out: - RF_Free(scount, nodecount * sizeof(int)); - RF_Free(acount, nodecount * sizeof(int)); - RF_Free(nodes, nodecount * sizeof(RF_DagNode_t *)); - if (retcode) - rf_PrintDAGList(dag_h); - - if (rf_validateVisitedDebug) - rf_ValidateVisitedBits(dag_h); - - return (retcode); - -validate_dag_bad: - rf_PrintDAGList(dag_h); - return (retcode); -} - - -/****************************************************************************** - * - * misc construction routines - * - *****************************************************************************/ - -void -rf_redirect_asm( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap) -{ - int ds = (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) ? 1 : 0; - int row = asmap->physInfo->row; - int fcol = raidPtr->reconControl[row]->fcol; - int srow = raidPtr->reconControl[row]->spareRow; - int scol = raidPtr->reconControl[row]->spareCol; - RF_PhysDiskAddr_t *pda; - - RF_ASSERT(raidPtr->status[row] == rf_rs_reconstructing); - for (pda = asmap->physInfo; pda; pda = pda->next) { - if (pda->col == fcol) { - if (rf_dagDebug) { - if (!rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, - pda->startSector)) { - RF_PANIC(); - } - } - /* printf("Remapped data for large write\n"); */ - if (ds) { - raidPtr->Layout.map->MapSector(raidPtr, pda->raidAddress, - &pda->row, &pda->col, &pda->startSector, RF_REMAP); - } else { - pda->row = srow; - pda->col = scol; - } - } - } - for (pda = asmap->parityInfo; pda; pda = pda->next) { - if (pda->col == fcol) { - if (rf_dagDebug) { - if (!rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, pda->startSector)) { - RF_PANIC(); - } - } - } - if (ds) { - (raidPtr->Layout.map->MapParity) (raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP); - } else { - pda->row = srow; - pda->col = scol; - } - } -} - - -/* this routine allocates read buffers and generates stripe maps for the - * regions of the array from the start of the stripe to the start of the - * access, and from the end of the access to the end of the stripe. It also - * computes and returns the number of DAG nodes needed to read all this data. - * Note that this routine does the wrong thing if the access is fully - * contained within one stripe unit, so we RF_ASSERT against this case at the - * start. - */ -void -rf_MapUnaccessedPortionOfStripe( - RF_Raid_t * raidPtr, - RF_RaidLayout_t * layoutPtr,/* in: layout information */ - RF_AccessStripeMap_t * asmap, /* in: access stripe map */ - RF_DagHeader_t * dag_h, /* in: header of the dag to create */ - RF_AccessStripeMapHeader_t ** new_asm_h, /* in: ptr to array of 2 - * headers, to be filled in */ - int *nRodNodes, /* out: num nodes to be generated to read - * unaccessed data */ - char **sosBuffer, /* out: pointers to newly allocated buffer */ - char **eosBuffer, - RF_AllocListElem_t * allocList) -{ - RF_RaidAddr_t sosRaidAddress, eosRaidAddress; - RF_SectorNum_t sosNumSector, eosNumSector; - - RF_ASSERT(asmap->numStripeUnitsAccessed > (layoutPtr->numDataCol / 2)); - /* generate an access map for the region of the array from start of - * stripe to start of access */ - new_asm_h[0] = new_asm_h[1] = NULL; - *nRodNodes = 0; - if (!rf_RaidAddressStripeAligned(layoutPtr, asmap->raidAddress)) { - sosRaidAddress = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - sosNumSector = asmap->raidAddress - sosRaidAddress; - RF_MallocAndAdd(*sosBuffer, rf_RaidAddressToByte(raidPtr, sosNumSector), (char *), allocList); - new_asm_h[0] = rf_MapAccess(raidPtr, sosRaidAddress, sosNumSector, *sosBuffer, RF_DONT_REMAP); - new_asm_h[0]->next = dag_h->asmList; - dag_h->asmList = new_asm_h[0]; - *nRodNodes += new_asm_h[0]->stripeMap->numStripeUnitsAccessed; - - RF_ASSERT(new_asm_h[0]->stripeMap->next == NULL); - /* we're totally within one stripe here */ - if (asmap->flags & RF_ASM_REDIR_LARGE_WRITE) - rf_redirect_asm(raidPtr, new_asm_h[0]->stripeMap); - } - /* generate an access map for the region of the array from end of - * access to end of stripe */ - if (!rf_RaidAddressStripeAligned(layoutPtr, asmap->endRaidAddress)) { - eosRaidAddress = asmap->endRaidAddress; - eosNumSector = rf_RaidAddressOfNextStripeBoundary(layoutPtr, eosRaidAddress) - eosRaidAddress; - RF_MallocAndAdd(*eosBuffer, rf_RaidAddressToByte(raidPtr, eosNumSector), (char *), allocList); - new_asm_h[1] = rf_MapAccess(raidPtr, eosRaidAddress, eosNumSector, *eosBuffer, RF_DONT_REMAP); - new_asm_h[1]->next = dag_h->asmList; - dag_h->asmList = new_asm_h[1]; - *nRodNodes += new_asm_h[1]->stripeMap->numStripeUnitsAccessed; - - RF_ASSERT(new_asm_h[1]->stripeMap->next == NULL); - /* we're totally within one stripe here */ - if (asmap->flags & RF_ASM_REDIR_LARGE_WRITE) - rf_redirect_asm(raidPtr, new_asm_h[1]->stripeMap); - } -} - - - -/* returns non-zero if the indicated ranges of stripe unit offsets overlap */ -int -rf_PDAOverlap( - RF_RaidLayout_t * layoutPtr, - RF_PhysDiskAddr_t * src, - RF_PhysDiskAddr_t * dest) -{ - RF_SectorNum_t soffs = rf_StripeUnitOffset(layoutPtr, src->startSector); - RF_SectorNum_t doffs = rf_StripeUnitOffset(layoutPtr, dest->startSector); - /* use -1 to be sure we stay within SU */ - RF_SectorNum_t send = rf_StripeUnitOffset(layoutPtr, src->startSector + src->numSector - 1); - RF_SectorNum_t dend = rf_StripeUnitOffset(layoutPtr, dest->startSector + dest->numSector - 1); - return ((RF_MAX(soffs, doffs) <= RF_MIN(send, dend)) ? 1 : 0); -} - - -/* GenerateFailedAccessASMs - * - * this routine figures out what portion of the stripe needs to be read - * to effect the degraded read or write operation. It's primary function - * is to identify everything required to recover the data, and then - * eliminate anything that is already being accessed by the user. - * - * The main result is two new ASMs, one for the region from the start of the - * stripe to the start of the access, and one for the region from the end of - * the access to the end of the stripe. These ASMs describe everything that - * needs to be read to effect the degraded access. Other results are: - * nXorBufs -- the total number of buffers that need to be XORed together to - * recover the lost data, - * rpBufPtr -- ptr to a newly-allocated buffer to hold the parity. If NULL - * at entry, not allocated. - * overlappingPDAs -- - * describes which of the non-failed PDAs in the user access - * overlap data that needs to be read to effect recovery. - * overlappingPDAs[i]==1 if and only if, neglecting the failed - * PDA, the ith pda in the input asm overlaps data that needs - * to be read for recovery. - */ - /* in: asm - ASM for the actual access, one stripe only */ - /* in: faildPDA - which component of the access has failed */ - /* in: dag_h - header of the DAG we're going to create */ - /* out: new_asm_h - the two new ASMs */ - /* out: nXorBufs - the total number of xor bufs required */ - /* out: rpBufPtr - a buffer for the parity read */ -void -rf_GenerateFailedAccessASMs( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_PhysDiskAddr_t * failedPDA, - RF_DagHeader_t * dag_h, - RF_AccessStripeMapHeader_t ** new_asm_h, - int *nXorBufs, - char **rpBufPtr, - char *overlappingPDAs, - RF_AllocListElem_t * allocList) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - - /* s=start, e=end, s=stripe, a=access, f=failed, su=stripe unit */ - RF_RaidAddr_t sosAddr, sosEndAddr, eosStartAddr, eosAddr; - - RF_SectorCount_t numSect[2], numParitySect; - RF_PhysDiskAddr_t *pda; - char *rdBuf, *bufP; - int foundit, i; - - bufP = NULL; - foundit = 0; - /* first compute the following raid addresses: start of stripe, - * (sosAddr) MIN(start of access, start of failed SU), (sosEndAddr) - * MAX(end of access, end of failed SU), (eosStartAddr) end of - * stripe (i.e. start of next stripe) (eosAddr) */ - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - sosEndAddr = RF_MIN(asmap->raidAddress, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, failedPDA->raidAddress)); - eosStartAddr = RF_MAX(asmap->endRaidAddress, rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, failedPDA->raidAddress)); - eosAddr = rf_RaidAddressOfNextStripeBoundary(layoutPtr, asmap->raidAddress); - - /* now generate access stripe maps for each of the above regions of - * the stripe. Use a dummy (NULL) buf ptr for now */ - - new_asm_h[0] = (sosAddr != sosEndAddr) ? rf_MapAccess(raidPtr, sosAddr, sosEndAddr - sosAddr, NULL, RF_DONT_REMAP) : NULL; - new_asm_h[1] = (eosStartAddr != eosAddr) ? rf_MapAccess(raidPtr, eosStartAddr, eosAddr - eosStartAddr, NULL, RF_DONT_REMAP) : NULL; - - /* walk through the PDAs and range-restrict each SU to the region of - * the SU touched on the failed PDA. also compute total data buffer - * space requirements in this step. Ignore the parity for now. */ - - numSect[0] = numSect[1] = 0; - if (new_asm_h[0]) { - new_asm_h[0]->next = dag_h->asmList; - dag_h->asmList = new_asm_h[0]; - for (pda = new_asm_h[0]->stripeMap->physInfo; pda; pda = pda->next) { - rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_NOBUFFER, 0); - numSect[0] += pda->numSector; - } - } - if (new_asm_h[1]) { - new_asm_h[1]->next = dag_h->asmList; - dag_h->asmList = new_asm_h[1]; - for (pda = new_asm_h[1]->stripeMap->physInfo; pda; pda = pda->next) { - rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_NOBUFFER, 0); - numSect[1] += pda->numSector; - } - } - numParitySect = failedPDA->numSector; - - /* allocate buffer space for the data & parity we have to read to - * recover from the failure */ - - if (numSect[0] + numSect[1] + ((rpBufPtr) ? numParitySect : 0)) { /* don't allocate parity - * buf if not needed */ - RF_MallocAndAdd(rdBuf, rf_RaidAddressToByte(raidPtr, numSect[0] + numSect[1] + numParitySect), (char *), allocList); - bufP = rdBuf; - if (rf_degDagDebug) - printf("Newly allocated buffer (%d bytes) is 0x%lx\n", - (int) rf_RaidAddressToByte(raidPtr, numSect[0] + numSect[1] + numParitySect), (unsigned long) bufP); - } - /* now walk through the pdas one last time and assign buffer pointers - * (ugh!). Again, ignore the parity. also, count nodes to find out - * how many bufs need to be xored together */ - (*nXorBufs) = 1; /* in read case, 1 is for parity. In write - * case, 1 is for failed data */ - if (new_asm_h[0]) { - for (pda = new_asm_h[0]->stripeMap->physInfo; pda; pda = pda->next) { - pda->bufPtr = bufP; - bufP += rf_RaidAddressToByte(raidPtr, pda->numSector); - } - *nXorBufs += new_asm_h[0]->stripeMap->numStripeUnitsAccessed; - } - if (new_asm_h[1]) { - for (pda = new_asm_h[1]->stripeMap->physInfo; pda; pda = pda->next) { - pda->bufPtr = bufP; - bufP += rf_RaidAddressToByte(raidPtr, pda->numSector); - } - (*nXorBufs) += new_asm_h[1]->stripeMap->numStripeUnitsAccessed; - } - if (rpBufPtr) - *rpBufPtr = bufP; /* the rest of the buffer is for - * parity */ - - /* the last step is to figure out how many more distinct buffers need - * to get xor'd to produce the missing unit. there's one for each - * user-data read node that overlaps the portion of the failed unit - * being accessed */ - - for (foundit = i = 0, pda = asmap->physInfo; pda; i++, pda = pda->next) { - if (pda == failedPDA) { - i--; - foundit = 1; - continue; - } - if (rf_PDAOverlap(layoutPtr, pda, failedPDA)) { - overlappingPDAs[i] = 1; - (*nXorBufs)++; - } - } - if (!foundit) { - RF_ERRORMSG("GenerateFailedAccessASMs: did not find failedPDA in asm list\n"); - RF_ASSERT(0); - } - if (rf_degDagDebug) { - if (new_asm_h[0]) { - printf("First asm:\n"); - rf_PrintFullAccessStripeMap(new_asm_h[0], 1); - } - if (new_asm_h[1]) { - printf("Second asm:\n"); - rf_PrintFullAccessStripeMap(new_asm_h[1], 1); - } - } -} - - -/* adjusts the offset and number of sectors in the destination pda so that - * it covers at most the region of the SU covered by the source PDA. This - * is exclusively a restriction: the number of sectors indicated by the - * target PDA can only shrink. - * - * For example: s = sectors within SU indicated by source PDA - * d = sectors within SU indicated by dest PDA - * r = results, stored in dest PDA - * - * |--------------- one stripe unit ---------------------| - * | sssssssssssssssssssssssssssssssss | - * | ddddddddddddddddddddddddddddddddddddddddddddd | - * | rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr | - * - * Another example: - * - * |--------------- one stripe unit ---------------------| - * | sssssssssssssssssssssssssssssssss | - * | ddddddddddddddddddddddd | - * | rrrrrrrrrrrrrrrr | - * - */ -void -rf_RangeRestrictPDA( - RF_Raid_t * raidPtr, - RF_PhysDiskAddr_t * src, - RF_PhysDiskAddr_t * dest, - int dobuffer, - int doraidaddr) -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_SectorNum_t soffs = rf_StripeUnitOffset(layoutPtr, src->startSector); - RF_SectorNum_t doffs = rf_StripeUnitOffset(layoutPtr, dest->startSector); - RF_SectorNum_t send = rf_StripeUnitOffset(layoutPtr, src->startSector + src->numSector - 1); /* use -1 to be sure we - * stay within SU */ - RF_SectorNum_t dend = rf_StripeUnitOffset(layoutPtr, dest->startSector + dest->numSector - 1); - RF_SectorNum_t subAddr = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, dest->startSector); /* stripe unit boundary */ - - dest->startSector = subAddr + RF_MAX(soffs, doffs); - dest->numSector = subAddr + RF_MIN(send, dend) + 1 - dest->startSector; - - if (dobuffer) - dest->bufPtr += (soffs > doffs) ? rf_RaidAddressToByte(raidPtr, soffs - doffs) : 0; - if (doraidaddr) { - dest->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, dest->raidAddress) + - rf_StripeUnitOffset(layoutPtr, dest->startSector); - } -} -/* - * Want the highest of these primes to be the largest one - * less than the max expected number of columns (won't hurt - * to be too small or too large, but won't be optimal, either) - * --jimz - */ -#define NLOWPRIMES 8 -static int lowprimes[NLOWPRIMES] = {2, 3, 5, 7, 11, 13, 17, 19}; -/***************************************************************************** - * compute the workload shift factor. (chained declustering) - * - * return nonzero if access should shift to secondary, otherwise, - * access is to primary - *****************************************************************************/ -int -rf_compute_workload_shift( - RF_Raid_t * raidPtr, - RF_PhysDiskAddr_t * pda) -{ - /* - * variables: - * d = column of disk containing primary - * f = column of failed disk - * n = number of disks in array - * sd = "shift distance" (number of columns that d is to the right of f) - * row = row of array the access is in - * v = numerator of redirection ratio - * k = denominator of redirection ratio - */ - RF_RowCol_t d, f, sd, row, n; - int k, v, ret, i; - - row = pda->row; - n = raidPtr->numCol; - - /* assign column of primary copy to d */ - d = pda->col; - - /* assign column of dead disk to f */ - for (f = 0; ((!RF_DEAD_DISK(raidPtr->Disks[row][f].status)) && (f < n)); f++); - - RF_ASSERT(f < n); - RF_ASSERT(f != d); - - sd = (f > d) ? (n + d - f) : (d - f); - RF_ASSERT(sd < n); - - /* - * v of every k accesses should be redirected - * - * v/k := (n-1-sd)/(n-1) - */ - v = (n - 1 - sd); - k = (n - 1); - -#if 1 - /* - * XXX - * Is this worth it? - * - * Now reduce the fraction, by repeatedly factoring - * out primes (just like they teach in elementary school!) - */ - for (i = 0; i < NLOWPRIMES; i++) { - if (lowprimes[i] > v) - break; - while (((v % lowprimes[i]) == 0) && ((k % lowprimes[i]) == 0)) { - v /= lowprimes[i]; - k /= lowprimes[i]; - } - } -#endif - - raidPtr->hist_diskreq[row][d]++; - if (raidPtr->hist_diskreq[row][d] > v) { - ret = 0; /* do not redirect */ - } else { - ret = 1; /* redirect */ - } - -#if 0 - printf("d=%d f=%d sd=%d v=%d k=%d ret=%d h=%d\n", d, f, sd, v, k, ret, - raidPtr->hist_diskreq[row][d]); -#endif - - if (raidPtr->hist_diskreq[row][d] >= k) { - /* reset counter */ - raidPtr->hist_diskreq[row][d] = 0; - } - return (ret); -} -/* - * Disk selection routines - */ - -/* - * Selects the disk with the shortest queue from a mirror pair. - * Both the disk I/Os queued in RAIDframe as well as those at the physical - * disk are counted as members of the "queue" - */ -void -rf_SelectMirrorDiskIdle(RF_DagNode_t * node) -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) node->dagHdr->raidPtr; - RF_RowCol_t rowData, colData, rowMirror, colMirror; - int dataQueueLength, mirrorQueueLength, usemirror; - RF_PhysDiskAddr_t *data_pda = (RF_PhysDiskAddr_t *) node->params[0].p; - RF_PhysDiskAddr_t *mirror_pda = (RF_PhysDiskAddr_t *) node->params[4].p; - RF_PhysDiskAddr_t *tmp_pda; - RF_RaidDisk_t **disks = raidPtr->Disks; - RF_DiskQueue_t **dqs = raidPtr->Queues, *dataQueue, *mirrorQueue; - - /* return the [row col] of the disk with the shortest queue */ - rowData = data_pda->row; - colData = data_pda->col; - rowMirror = mirror_pda->row; - colMirror = mirror_pda->col; - dataQueue = &(dqs[rowData][colData]); - mirrorQueue = &(dqs[rowMirror][colMirror]); - -#ifdef RF_LOCK_QUEUES_TO_READ_LEN - RF_LOCK_QUEUE_MUTEX(dataQueue, "SelectMirrorDiskIdle"); -#endif /* RF_LOCK_QUEUES_TO_READ_LEN */ - dataQueueLength = dataQueue->queueLength + dataQueue->numOutstanding; -#ifdef RF_LOCK_QUEUES_TO_READ_LEN - RF_UNLOCK_QUEUE_MUTEX(dataQueue, "SelectMirrorDiskIdle"); - RF_LOCK_QUEUE_MUTEX(mirrorQueue, "SelectMirrorDiskIdle"); -#endif /* RF_LOCK_QUEUES_TO_READ_LEN */ - mirrorQueueLength = mirrorQueue->queueLength + mirrorQueue->numOutstanding; -#ifdef RF_LOCK_QUEUES_TO_READ_LEN - RF_UNLOCK_QUEUE_MUTEX(mirrorQueue, "SelectMirrorDiskIdle"); -#endif /* RF_LOCK_QUEUES_TO_READ_LEN */ - - usemirror = 0; - if (RF_DEAD_DISK(disks[rowMirror][colMirror].status)) { - usemirror = 0; - } else - if (RF_DEAD_DISK(disks[rowData][colData].status)) { - usemirror = 1; - } else - if (raidPtr->parity_good == RF_RAID_DIRTY) { - /* Trust only the main disk */ - usemirror = 0; - } else - if (dataQueueLength < mirrorQueueLength) { - usemirror = 0; - } else - if (mirrorQueueLength < dataQueueLength) { - usemirror = 1; - } else { - /* queues are equal length. attempt - * cleverness. */ - if (SNUM_DIFF(dataQueue->last_deq_sector, data_pda->startSector) - <= SNUM_DIFF(mirrorQueue->last_deq_sector, mirror_pda->startSector)) { - usemirror = 0; - } else { - usemirror = 1; - } - } - - if (usemirror) { - /* use mirror (parity) disk, swap params 0 & 4 */ - tmp_pda = data_pda; - node->params[0].p = mirror_pda; - node->params[4].p = tmp_pda; - } else { - /* use data disk, leave param 0 unchanged */ - } - /* printf("dataQueueLength %d, mirrorQueueLength - * %d\n",dataQueueLength, mirrorQueueLength); */ -} -/* - * Do simple partitioning. This assumes that - * the data and parity disks are laid out identically. - */ -void -rf_SelectMirrorDiskPartition(RF_DagNode_t * node) -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) node->dagHdr->raidPtr; - RF_RowCol_t rowData, colData, rowMirror, colMirror; - RF_PhysDiskAddr_t *data_pda = (RF_PhysDiskAddr_t *) node->params[0].p; - RF_PhysDiskAddr_t *mirror_pda = (RF_PhysDiskAddr_t *) node->params[4].p; - RF_PhysDiskAddr_t *tmp_pda; - RF_RaidDisk_t **disks = raidPtr->Disks; - RF_DiskQueue_t **dqs = raidPtr->Queues, *dataQueue, *mirrorQueue; - int usemirror; - - /* return the [row col] of the disk with the shortest queue */ - rowData = data_pda->row; - colData = data_pda->col; - rowMirror = mirror_pda->row; - colMirror = mirror_pda->col; - dataQueue = &(dqs[rowData][colData]); - mirrorQueue = &(dqs[rowMirror][colMirror]); - - usemirror = 0; - if (RF_DEAD_DISK(disks[rowMirror][colMirror].status)) { - usemirror = 0; - } else - if (RF_DEAD_DISK(disks[rowData][colData].status)) { - usemirror = 1; - } else - if (raidPtr->parity_good == RF_RAID_DIRTY) { - /* Trust only the main disk */ - usemirror = 0; - } else - if (data_pda->startSector < - (disks[rowData][colData].numBlocks / 2)) { - usemirror = 0; - } else { - usemirror = 1; - } - - if (usemirror) { - /* use mirror (parity) disk, swap params 0 & 4 */ - tmp_pda = data_pda; - node->params[0].p = mirror_pda; - node->params[4].p = tmp_pda; - } else { - /* use data disk, leave param 0 unchanged */ - } -} diff --git a/sys/dev/raidframe/rf_dagutils.h b/sys/dev/raidframe/rf_dagutils.h deleted file mode 100644 index bad2c76..0000000 --- a/sys/dev/raidframe/rf_dagutils.h +++ /dev/null @@ -1,121 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_dagutils.h,v 1.3 1999/02/05 00:06:08 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/************************************************************************* - * - * rf_dagutils.h -- header file for utility routines for manipulating DAGs - * - *************************************************************************/ - - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_general.h> - -#ifndef _RF__RF_DAGUTILS_H_ -#define _RF__RF_DAGUTILS_H_ - -struct RF_RedFuncs_s { - int (*regular) (RF_DagNode_t *); - char *RegularName; - int (*simple) (RF_DagNode_t *); - char *SimpleName; -}; - -extern RF_RedFuncs_t rf_xorFuncs; -extern RF_RedFuncs_t rf_xorRecoveryFuncs; - -void -rf_InitNode(RF_DagNode_t * node, RF_NodeStatus_t initstatus, - int commit, - int (*doFunc) (RF_DagNode_t * node), - int (*undoFunc) (RF_DagNode_t * node), - int (*wakeFunc) (RF_DagNode_t * node, int status), - int nSucc, int nAnte, int nParam, int nResult, - RF_DagHeader_t * hdr, char *name, RF_AllocListElem_t * alist); - - void rf_FreeDAG(RF_DagHeader_t * dag_h); - - RF_PropHeader_t *rf_MakePropListEntry(RF_DagHeader_t * dag_h, int resultNum, - int paramNum, RF_PropHeader_t * next, RF_AllocListElem_t * allocList); - - int rf_ConfigureDAGs(RF_ShutdownList_t ** listp); - - RF_DagHeader_t *rf_AllocDAGHeader(void); - - void rf_FreeDAGHeader(RF_DagHeader_t * dh); - - void *rf_AllocBuffer(RF_Raid_t * raidPtr, RF_DagHeader_t * dag_h, - RF_PhysDiskAddr_t * pda, RF_AllocListElem_t * allocList); - - char *rf_NodeStatusString(RF_DagNode_t * node); - - void rf_PrintNodeInfoString(RF_DagNode_t * node); - - int rf_AssignNodeNums(RF_DagHeader_t * dag_h); - - int rf_RecurAssignNodeNums(RF_DagNode_t * node, int num, int unvisited); - - void rf_ResetDAGHeaderPointers(RF_DagHeader_t * dag_h, RF_DagHeader_t * newptr); - - void rf_RecurResetDAGHeaderPointers(RF_DagNode_t * node, RF_DagHeader_t * newptr); - - void rf_PrintDAGList(RF_DagHeader_t * dag_h); - - int rf_ValidateDAG(RF_DagHeader_t * dag_h); - - void rf_redirect_asm(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap); - - void rf_MapUnaccessedPortionOfStripe(RF_Raid_t * raidPtr, - RF_RaidLayout_t * layoutPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, - RF_AccessStripeMapHeader_t ** new_asm_h, int *nRodNodes, char **sosBuffer, - char **eosBuffer, RF_AllocListElem_t * allocList); - - int rf_PDAOverlap(RF_RaidLayout_t * layoutPtr, RF_PhysDiskAddr_t * src, - RF_PhysDiskAddr_t * dest); - - void rf_GenerateFailedAccessASMs(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_PhysDiskAddr_t * failedPDA, - RF_DagHeader_t * dag_h, RF_AccessStripeMapHeader_t ** new_asm_h, - int *nXorBufs, char **rpBufPtr, char *overlappingPDAs, - RF_AllocListElem_t * allocList); - -/* flags used by RangeRestrictPDA */ -#define RF_RESTRICT_NOBUFFER 0 -#define RF_RESTRICT_DOBUFFER 1 - - void rf_RangeRestrictPDA(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * src, - RF_PhysDiskAddr_t * dest, int dobuffer, int doraidaddr); - - int rf_compute_workload_shift(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * pda); - void rf_SelectMirrorDiskIdle(RF_DagNode_t * node); - void rf_SelectMirrorDiskPartition(RF_DagNode_t * node); - -#endif /* !_RF__RF_DAGUTILS_H_ */ diff --git a/sys/dev/raidframe/rf_debugMem.c b/sys/dev/raidframe/rf_debugMem.c deleted file mode 100644 index a138021..0000000 --- a/sys/dev/raidframe/rf_debugMem.c +++ /dev/null @@ -1,208 +0,0 @@ -/* $NetBSD: rf_debugMem.c,v 1.7 2000/01/07 03:40:59 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Daniel Stodolsky, Mark Holland, Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* debugMem.c: memory usage debugging stuff. - * Malloc, Calloc, and Free are #defined everywhere - * to do_malloc, do_calloc, and do_free. - * - * if RF_UTILITY is nonzero, it means were compiling one of the - * raidframe utility programs, such as rfctrl or smd. In this - * case, we eliminate all references to the threads package - * and to the allocation list stuff. - */ - -#include <dev/raidframe/rf_types.h> - -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_options.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_general.h> - -#if defined(__FreeBSD__) -#include <sys/kernel.h> -MALLOC_DEFINE(M_RAIDFRAME, "rfbuf", "Buffers for RAIDframe operation"); -#endif - -static long tot_mem_in_use = 0; - -/* Hash table of information about memory allocations */ -#define RF_MH_TABLESIZE 1000 - -struct mh_struct { - void *address; - int size; - int line; - char *filen; - char allocated; - struct mh_struct *next; -}; -static struct mh_struct *mh_table[RF_MH_TABLESIZE]; -RF_DECLARE_MUTEX(rf_debug_mem_mutex) - static int mh_table_initialized = 0; - - static void memory_hash_insert(void *addr, int size, int line, char *filen); - static int memory_hash_remove(void *addr, int sz); - -void -rf_record_malloc(p, size, line, filen) - void *p; - int size, line; - char *filen; -{ - RF_ASSERT(size != 0); - - /* RF_LOCK_MUTEX(rf_debug_mem_mutex); */ - memory_hash_insert(p, size, line, filen); - tot_mem_in_use += size; - /* RF_UNLOCK_MUTEX(rf_debug_mem_mutex); */ - if ((long) p == rf_memDebugAddress) { - printf("Allocate: debug address allocated from line %d file %s\n", line, filen); - } -} - -void -rf_unrecord_malloc(p, sz) - void *p; - int sz; -{ - int size; - - /* RF_LOCK_MUTEX(rf_debug_mem_mutex); */ - size = memory_hash_remove(p, sz); - tot_mem_in_use -= size; - /* RF_UNLOCK_MUTEX(rf_debug_mem_mutex); */ - if ((long) p == rf_memDebugAddress) { - printf("Free: Found debug address\n"); /* this is really only a - * flag line for gdb */ - } -} - -void -rf_print_unfreed() -{ - int i, foundone = 0; - struct mh_struct *p; - - for (i = 0; i < RF_MH_TABLESIZE; i++) { - for (p = mh_table[i]; p; p = p->next) - if (p->allocated) { - if (!foundone) - printf("\n\nThere are unfreed memory locations at program shutdown:\n"); - foundone = 1; - printf("Addr 0x%lx Size %d line %d file %s\n", - (long) p->address, p->size, p->line, p->filen); - } - } - if (tot_mem_in_use) { - printf("%ld total bytes in use\n", tot_mem_in_use); - } -} - -int -rf_ConfigureDebugMem(listp) - RF_ShutdownList_t **listp; -{ - int i, rc; - - rc = rf_create_managed_mutex(listp, &rf_debug_mem_mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return (rc); - } - if (rf_memDebug) { - for (i = 0; i < RF_MH_TABLESIZE; i++) - mh_table[i] = NULL; - mh_table_initialized = 1; - } - return (0); -} -#define HASHADDR(_a_) ( (((unsigned long) _a_)>>3) % RF_MH_TABLESIZE ) - -static void -memory_hash_insert(addr, size, line, filen) - void *addr; - int size, line; - char *filen; -{ - unsigned long bucket = HASHADDR(addr); - struct mh_struct *p; - - RF_ASSERT(mh_table_initialized); - - /* search for this address in the hash table */ - for (p = mh_table[bucket]; p && (p->address != addr); p = p->next); - if (!p) { - RF_Malloc(p, sizeof(struct mh_struct), (struct mh_struct *)); - RF_ASSERT(p); - p->next = mh_table[bucket]; - mh_table[bucket] = p; - p->address = addr; - p->allocated = 0; - } - if (p->allocated) { - printf("ERROR: reallocated address 0x%lx from line %d, file %s without intervening free\n", (long) addr, line, filen); - printf(" last allocated from line %d file %s\n", p->line, p->filen); - RF_ASSERT(0); - } - p->size = size; - p->line = line; - p->filen = filen; - p->allocated = 1; -} - -static int -memory_hash_remove(addr, sz) - void *addr; - int sz; -{ - unsigned long bucket = HASHADDR(addr); - struct mh_struct *p; - - RF_ASSERT(mh_table_initialized); - for (p = mh_table[bucket]; p && (p->address != addr); p = p->next); - if (!p) { - printf("ERROR: freeing never-allocated address 0x%lx\n", (long) addr); - RF_PANIC(); - } - if (!p->allocated) { - printf("ERROR: freeing unallocated address 0x%lx. Last allocation line %d file %s\n", (long) addr, p->line, p->filen); - RF_PANIC(); - } - if (sz > 0 && p->size != sz) { /* you can suppress this error by - * using a negative value as the size - * to free */ - printf("ERROR: incorrect size at free for address 0x%lx: is %d should be %d. Alloc at line %d of file %s\n", (unsigned long) addr, sz, p->size, p->line, p->filen); - RF_PANIC(); - } - p->allocated = 0; - return (p->size); -} diff --git a/sys/dev/raidframe/rf_debugMem.h b/sys/dev/raidframe/rf_debugMem.h deleted file mode 100644 index e6d8c60..0000000 --- a/sys/dev/raidframe/rf_debugMem.h +++ /dev/null @@ -1,88 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_debugMem.h,v 1.7 1999/09/05 01:58:11 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Daniel Stodolsky, Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * rf_debugMem.h -- memory leak debugging module - * - * IMPORTANT: if you put the lock/unlock mutex stuff back in here, you - * need to take it out of the routines in debugMem.c - * - */ - -#ifndef _RF__RF_DEBUGMEM_H_ -#define _RF__RF_DEBUGMEM_H_ - -#include <dev/raidframe/rf_alloclist.h> - -#ifdef _KERNEL -#include <sys/types.h> -#include <sys/malloc.h> - -#if defined(__FreeBSD__) -MALLOC_DECLARE(M_RAIDFRAME); -#endif - -#define RF_Malloc(_p_, _size_, _cast_) \ - { \ - _p_ = _cast_ malloc((u_long)_size_, M_RAIDFRAME, M_NOWAIT | M_ZERO); \ - if (_p_ == NULL) panic("out of memory\n"); \ - if (rf_memDebug) rf_record_malloc(_p_, _size_, __LINE__, __FILE__); \ - } - -#define RF_MallocAndAdd(__p_, __size_, __cast_, __alist_) \ - { \ - RF_Malloc(__p_, __size_, __cast_); \ - if (__alist_) rf_AddToAllocList(__alist_, __p_, __size_); \ - } - -#define RF_Calloc(_p_, _nel_, _elsz_, _cast_) \ - { \ - RF_Malloc( _p_, (_nel_) * (_elsz_), _cast_); \ - } - -#define RF_CallocAndAdd(__p,__nel,__elsz,__cast,__alist) \ - { \ - RF_Calloc(__p, __nel, __elsz, __cast); \ - if (__alist) rf_AddToAllocList(__alist, __p, (__nel)*(__elsz)); \ - } - -#define RF_Free(_p_, _sz_) \ - { \ - free((void *)(_p_), M_RAIDFRAME); \ - if (rf_memDebug) rf_unrecord_malloc(_p_, (u_int32_t) (_sz_)); \ - } - -#endif /* _KERNEL */ - -void rf_record_malloc(void *p, int size, int line, char *filen); -void rf_unrecord_malloc(void *p, int sz); -void rf_print_unfreed(void); -int rf_ConfigureDebugMem(RF_ShutdownList_t ** listp); - -#endif /* !_RF__RF_DEBUGMEM_H_ */ diff --git a/sys/dev/raidframe/rf_debugprint.c b/sys/dev/raidframe/rf_debugprint.c deleted file mode 100644 index 02adee7..0000000 --- a/sys/dev/raidframe/rf_debugprint.c +++ /dev/null @@ -1,136 +0,0 @@ -/* $NetBSD: rf_debugprint.c,v 1.3 1999/02/05 00:06:08 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * Code to do debug printfs. Calls to rf_debug_printf cause the corresponding - * information to be printed to a circular buffer rather than the screen. - * The point is to try and minimize the timing variations induced by the - * printfs, and to capture only the printf's immediately preceding a failure. - */ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_debugprint.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_options.h> - -#include <sys/param.h> - -struct RF_Entry_s { - char *cstring; - void *a1, *a2, *a3, *a4, *a5, *a6, *a7, *a8; -}; -/* space for 1k lines */ -#define BUFSHIFT 10 -#define BUFSIZE (1<<BUFSHIFT) -#define BUFMASK (BUFSIZE-1) - -static struct RF_Entry_s rf_debugprint_buf[BUFSIZE]; -static int rf_debugprint_index = 0; -RF_DECLARE_STATIC_MUTEX(rf_debug_print_mutex) - int rf_ConfigureDebugPrint(listp) - RF_ShutdownList_t **listp; -{ - int rc; - - rc = rf_create_managed_mutex(listp, &rf_debug_print_mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return (rc); - } - rf_clear_debug_print_buffer(); - return (0); -} - -void -rf_clear_debug_print_buffer() -{ - int i; - - for (i = 0; i < BUFSIZE; i++) - rf_debugprint_buf[i].cstring = NULL; - rf_debugprint_index = 0; -} - -void -rf_debug_printf(s, a1, a2, a3, a4, a5, a6, a7, a8) - char *s; - void *a1, *a2, *a3, *a4, *a5, *a6, *a7, *a8; -{ - int idx; - - if (rf_debugPrintUseBuffer) { - - RF_LOCK_MUTEX(rf_debug_print_mutex); - idx = rf_debugprint_index; - rf_debugprint_index = (rf_debugprint_index + 1) & BUFMASK; - RF_UNLOCK_MUTEX(rf_debug_print_mutex); - - rf_debugprint_buf[idx].cstring = s; - rf_debugprint_buf[idx].a1 = a1; - rf_debugprint_buf[idx].a2 = a2; - rf_debugprint_buf[idx].a3 = a3; - rf_debugprint_buf[idx].a4 = a4; - rf_debugprint_buf[idx].a5 = a5; - rf_debugprint_buf[idx].a6 = a6; - rf_debugprint_buf[idx].a7 = a7; - rf_debugprint_buf[idx].a8 = a8; - } else { - printf(s, a1, a2, a3, a4, a5, a6, a7, a8); - } -} - -void -rf_print_debug_buffer() -{ - rf_spill_debug_buffer(NULL); -} - -void -rf_spill_debug_buffer(fname) - char *fname; -{ - int i; - - if (!rf_debugPrintUseBuffer) - return; - - RF_LOCK_MUTEX(rf_debug_print_mutex); - - for (i = rf_debugprint_index + 1; i != rf_debugprint_index; i = (i + 1) & BUFMASK) - if (rf_debugprint_buf[i].cstring) - printf(rf_debugprint_buf[i].cstring, rf_debugprint_buf[i].a1, rf_debugprint_buf[i].a2, rf_debugprint_buf[i].a3, - rf_debugprint_buf[i].a4, rf_debugprint_buf[i].a5, rf_debugprint_buf[i].a6, rf_debugprint_buf[i].a7, rf_debugprint_buf[i].a8); - printf(rf_debugprint_buf[i].cstring, rf_debugprint_buf[i].a1, rf_debugprint_buf[i].a2, rf_debugprint_buf[i].a3, - rf_debugprint_buf[i].a4, rf_debugprint_buf[i].a5, rf_debugprint_buf[i].a6, rf_debugprint_buf[i].a7, rf_debugprint_buf[i].a8); - RF_UNLOCK_MUTEX(rf_debug_print_mutex); -} diff --git a/sys/dev/raidframe/rf_debugprint.h b/sys/dev/raidframe/rf_debugprint.h deleted file mode 100644 index 318f620..0000000 --- a/sys/dev/raidframe/rf_debugprint.h +++ /dev/null @@ -1,44 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_debugprint.h,v 1.3 1999/02/05 00:06:08 oster Exp $ */ -/* - * rf_debugprint.h - */ -/* - * Copyright (c) 1996 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_DEBUGPRINT_H_ -#define _RF__RF_DEBUGPRINT_H_ - -int rf_ConfigureDebugPrint(RF_ShutdownList_t ** listp); -void rf_clear_debug_print_buffer(void); -void -rf_debug_printf(char *s, void *a1, void *a2, void *a3, void *a4, - void *a5, void *a6, void *a7, void *a8); -void rf_print_debug_buffer(void); -void rf_spill_debug_buffer(char *fname); - -#endif /* !_RF__RF_DEBUGPRINT_H_ */ diff --git a/sys/dev/raidframe/rf_decluster.c b/sys/dev/raidframe/rf_decluster.c deleted file mode 100644 index 646a5ad..0000000 --- a/sys/dev/raidframe/rf_decluster.c +++ /dev/null @@ -1,747 +0,0 @@ -/* $NetBSD: rf_decluster.c,v 1.6 2001/01/26 04:40:03 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/*---------------------------------------------------------------------- - * - * rf_decluster.c -- code related to the declustered layout - * - * Created 10-21-92 (MCH) - * - * Nov 93: adding support for distributed sparing. This code is a little - * complex: the basic layout used is as follows: - * let F = (v-1)/GCD(r,v-1). The spare space for each set of - * F consecutive fulltables is grouped together and placed after - * that set of tables. - * +------------------------------+ - * | F fulltables | - * | Spare Space | - * | F fulltables | - * | Spare Space | - * | ... | - * +------------------------------+ - * - *--------------------------------------------------------------------*/ - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_raidframe.h> -#include <dev/raidframe/rf_configure.h> -#include <dev/raidframe/rf_decluster.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_utils.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_shutdown.h> - - -extern int rf_copyback_in_progress; /* debug only */ - -/* found in rf_kintf.c */ -extern int rf_GetSpareTableFromDaemon(RF_SparetWait_t * req); - -#if (RF_INCLUDE_PARITY_DECLUSTERING > 0) || (RF_INCLUDE_PARITY_DECLUSTERING_PQ > 0) - -/* configuration code */ - -int -rf_ConfigureDeclustered( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - int b, v, k, r, lambda; /* block design params */ - int i, j; - RF_RowCol_t *first_avail_slot; - RF_StripeCount_t complete_FT_count, numCompleteFullTablesPerDisk; - RF_DeclusteredConfigInfo_t *info; - RF_StripeCount_t PUsPerDisk, spareRegionDepthInPUs, numCompleteSpareRegionsPerDisk, - extraPUsPerDisk; - RF_StripeCount_t totSparePUsPerDisk; - RF_SectorNum_t diskOffsetOfLastFullTableInSUs; - RF_SectorCount_t SpareSpaceInSUs; - char *cfgBuf = (char *) (cfgPtr->layoutSpecific); - RF_StripeNum_t l, SUID; - - SUID = l = 0; - numCompleteSpareRegionsPerDisk = 0; - - /* 1. create layout specific structure */ - RF_MallocAndAdd(info, sizeof(RF_DeclusteredConfigInfo_t), (RF_DeclusteredConfigInfo_t *), raidPtr->cleanupList); - if (info == NULL) - return (ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - info->SpareTable = NULL; - - /* 2. extract parameters from the config structure */ - if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) { - (void) bcopy(cfgBuf, info->sparemap_fname, RF_SPAREMAP_NAME_LEN); - } - cfgBuf += RF_SPAREMAP_NAME_LEN; - - b = *((int *) cfgBuf); - cfgBuf += sizeof(int); - v = *((int *) cfgBuf); - cfgBuf += sizeof(int); - k = *((int *) cfgBuf); - cfgBuf += sizeof(int); - r = *((int *) cfgBuf); - cfgBuf += sizeof(int); - lambda = *((int *) cfgBuf); - cfgBuf += sizeof(int); - raidPtr->noRotate = *((int *) cfgBuf); - cfgBuf += sizeof(int); - - /* the sparemaps are generated assuming that parity is rotated, so we - * issue a warning if both distributed sparing and no-rotate are on at - * the same time */ - if ((layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) && raidPtr->noRotate) { - RF_ERRORMSG("Warning: distributed sparing specified without parity rotation.\n"); - } - if (raidPtr->numCol != v) { - RF_ERRORMSG2("RAID: config error: table element count (%d) not equal to no. of cols (%d)\n", v, raidPtr->numCol); - return (EINVAL); - } - /* 3. set up the values used in the mapping code */ - info->BlocksPerTable = b; - info->Lambda = lambda; - info->NumParityReps = info->groupSize = k; - info->SUsPerTable = b * (k - 1) * layoutPtr->SUsPerPU; /* b blks, k-1 SUs each */ - info->SUsPerFullTable = k * info->SUsPerTable; /* rot k times */ - info->PUsPerBlock = k - 1; - info->SUsPerBlock = info->PUsPerBlock * layoutPtr->SUsPerPU; - info->TableDepthInPUs = (b * k) / v; - info->FullTableDepthInPUs = info->TableDepthInPUs * k; /* k repetitions */ - - /* used only in distributed sparing case */ - info->FullTablesPerSpareRegion = (v - 1) / rf_gcd(r, v - 1); /* (v-1)/gcd fulltables */ - info->TablesPerSpareRegion = k * info->FullTablesPerSpareRegion; - info->SpareSpaceDepthPerRegionInSUs = (r * info->TablesPerSpareRegion / (v - 1)) * layoutPtr->SUsPerPU; - - /* check to make sure the block design is sufficiently small */ - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - if (info->FullTableDepthInPUs * layoutPtr->SUsPerPU + info->SpareSpaceDepthPerRegionInSUs > layoutPtr->stripeUnitsPerDisk) { - RF_ERRORMSG3("RAID: config error: Full Table depth (%d) + Spare Space (%d) larger than disk size (%d) (BD too big)\n", - (int) info->FullTableDepthInPUs, - (int) info->SpareSpaceDepthPerRegionInSUs, - (int) layoutPtr->stripeUnitsPerDisk); - return (EINVAL); - } - } else { - if (info->TableDepthInPUs * layoutPtr->SUsPerPU > layoutPtr->stripeUnitsPerDisk) { - RF_ERRORMSG2("RAID: config error: Table depth (%d) larger than disk size (%d) (BD too big)\n", - (int) (info->TableDepthInPUs * layoutPtr->SUsPerPU), \ - (int) layoutPtr->stripeUnitsPerDisk); - return (EINVAL); - } - } - - - /* compute the size of each disk, and the number of tables in the last - * fulltable (which need not be complete) */ - if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { - - PUsPerDisk = layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU; - spareRegionDepthInPUs = (info->TablesPerSpareRegion * info->TableDepthInPUs + - (info->TablesPerSpareRegion * info->TableDepthInPUs) / (v - 1)); - info->SpareRegionDepthInSUs = spareRegionDepthInPUs * layoutPtr->SUsPerPU; - - numCompleteSpareRegionsPerDisk = PUsPerDisk / spareRegionDepthInPUs; - info->NumCompleteSRs = numCompleteSpareRegionsPerDisk; - extraPUsPerDisk = PUsPerDisk % spareRegionDepthInPUs; - - /* assume conservatively that we need the full amount of spare - * space in one region in order to provide spares for the - * partial spare region at the end of the array. We set "i" - * to the number of tables in the partial spare region. This - * may actually include some fulltables. */ - extraPUsPerDisk -= (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU); - if (extraPUsPerDisk <= 0) - i = 0; - else - i = extraPUsPerDisk / info->TableDepthInPUs; - - complete_FT_count = raidPtr->numRow * (numCompleteSpareRegionsPerDisk * (info->TablesPerSpareRegion / k) + i / k); - info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable; - info->ExtraTablesPerDisk = i % k; - - /* note that in the last spare region, the spare space is - * complete even though data/parity space is not */ - totSparePUsPerDisk = (numCompleteSpareRegionsPerDisk + 1) * (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU); - info->TotSparePUsPerDisk = totSparePUsPerDisk; - - layoutPtr->stripeUnitsPerDisk = - ((complete_FT_count / raidPtr->numRow) * info->FullTableDepthInPUs + /* data & parity space */ - info->ExtraTablesPerDisk * info->TableDepthInPUs + - totSparePUsPerDisk /* spare space */ - ) * layoutPtr->SUsPerPU; - layoutPtr->dataStripeUnitsPerDisk = - (complete_FT_count * info->FullTableDepthInPUs + info->ExtraTablesPerDisk * info->TableDepthInPUs) - * layoutPtr->SUsPerPU * (k - 1) / k; - - } else { - /* non-dist spare case: force each disk to contain an - * integral number of tables */ - layoutPtr->stripeUnitsPerDisk /= (info->TableDepthInPUs * layoutPtr->SUsPerPU); - layoutPtr->stripeUnitsPerDisk *= (info->TableDepthInPUs * layoutPtr->SUsPerPU); - - /* compute the number of tables in the last fulltable, which - * need not be complete */ - complete_FT_count = - ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) / info->FullTableDepthInPUs) * raidPtr->numRow; - - info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable; - info->ExtraTablesPerDisk = - ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) / info->TableDepthInPUs) % k; - } - - raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; - - /* find the disk offset of the stripe unit where the last fulltable - * starts */ - numCompleteFullTablesPerDisk = complete_FT_count / raidPtr->numRow; - diskOffsetOfLastFullTableInSUs = numCompleteFullTablesPerDisk * info->FullTableDepthInPUs * layoutPtr->SUsPerPU; - if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { - SpareSpaceInSUs = numCompleteSpareRegionsPerDisk * info->SpareSpaceDepthPerRegionInSUs; - diskOffsetOfLastFullTableInSUs += SpareSpaceInSUs; - info->DiskOffsetOfLastSpareSpaceChunkInSUs = - diskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU; - } - info->DiskOffsetOfLastFullTableInSUs = diskOffsetOfLastFullTableInSUs; - info->numCompleteFullTablesPerDisk = numCompleteFullTablesPerDisk; - - /* 4. create and initialize the lookup tables */ - info->LayoutTable = rf_make_2d_array(b, k, raidPtr->cleanupList); - if (info->LayoutTable == NULL) - return (ENOMEM); - info->OffsetTable = rf_make_2d_array(b, k, raidPtr->cleanupList); - if (info->OffsetTable == NULL) - return (ENOMEM); - info->BlockTable = rf_make_2d_array(info->TableDepthInPUs * layoutPtr->SUsPerPU, raidPtr->numCol, raidPtr->cleanupList); - if (info->BlockTable == NULL) - return (ENOMEM); - - first_avail_slot = rf_make_1d_array(v, NULL); - if (first_avail_slot == NULL) - return (ENOMEM); - - for (i = 0; i < b; i++) - for (j = 0; j < k; j++) - info->LayoutTable[i][j] = *cfgBuf++; - - /* initialize offset table */ - for (i = 0; i < b; i++) - for (j = 0; j < k; j++) { - info->OffsetTable[i][j] = first_avail_slot[info->LayoutTable[i][j]]; - first_avail_slot[info->LayoutTable[i][j]]++; - } - - /* initialize block table */ - for (SUID = l = 0; l < layoutPtr->SUsPerPU; l++) { - for (i = 0; i < b; i++) { - for (j = 0; j < k; j++) { - info->BlockTable[(info->OffsetTable[i][j] * layoutPtr->SUsPerPU) + l] - [info->LayoutTable[i][j]] = SUID; - } - SUID++; - } - } - - rf_free_1d_array(first_avail_slot, v); - - /* 5. set up the remaining redundant-but-useful parameters */ - - raidPtr->totalSectors = (k * complete_FT_count + raidPtr->numRow * info->ExtraTablesPerDisk) * - info->SUsPerTable * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numStripe = (raidPtr->totalSectors / layoutPtr->sectorsPerStripeUnit) / (k - 1); - - /* strange evaluation order below to try and minimize overflow - * problems */ - - layoutPtr->dataSectorsPerStripe = (k - 1) * layoutPtr->sectorsPerStripeUnit; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = k - 1; - layoutPtr->numParityCol = 1; - - return (0); -} -/* declustering with distributed sparing */ -static void rf_ShutdownDeclusteredDS(RF_ThreadArg_t); -static void -rf_ShutdownDeclusteredDS(arg) - RF_ThreadArg_t arg; -{ - RF_DeclusteredConfigInfo_t *info; - RF_Raid_t *raidPtr; - - raidPtr = (RF_Raid_t *) arg; - info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - if (info->SpareTable) - rf_FreeSpareTable(raidPtr); -} - -int -rf_ConfigureDeclusteredDS( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - int rc; - - rc = rf_ConfigureDeclustered(listp, raidPtr, cfgPtr); - if (rc) - return (rc); - rc = rf_ShutdownCreate(listp, rf_ShutdownDeclusteredDS, raidPtr); - if (rc) { - RF_ERRORMSG1("Got %d adding shutdown event for DeclusteredDS\n", rc); - rf_ShutdownDeclusteredDS(raidPtr); - return (rc); - } - return (0); -} - -void -rf_MapSectorDeclustered(raidPtr, raidSector, row, col, diskSector, remap) - RF_Raid_t *raidPtr; - RF_RaidAddr_t raidSector; - RF_RowCol_t *row; - RF_RowCol_t *col; - RF_SectorNum_t *diskSector; - int remap; -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit; - RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset; - RF_StripeNum_t BlockID, BlockOffset, RepIndex; - RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; - RF_StripeNum_t base_suid = 0, outSU, SpareRegion = 0, SpareSpace = 0; - - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); - - FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array - * (across rows) */ - if (raidPtr->numRow == 1) - *row = 0; /* avoid a mod and a div in the common case */ - else { - *row = FullTableID % raidPtr->numRow; - FullTableID /= raidPtr->numRow; /* convert to fulltable ID on - * this disk */ - } - if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { - SpareRegion = FullTableID / info->FullTablesPerSpareRegion; - SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs; - } - FullTableOffset = SUID % sus_per_fulltable; - TableID = FullTableOffset / info->SUsPerTable; - TableOffset = FullTableOffset - TableID * info->SUsPerTable; - BlockID = TableOffset / info->PUsPerBlock; - BlockOffset = TableOffset - BlockID * info->PUsPerBlock; - BlockID %= info->BlocksPerTable; - RepIndex = info->PUsPerBlock - TableID; - if (!raidPtr->noRotate) - BlockOffset += ((BlockOffset >= RepIndex) ? 1 : 0); - *col = info->LayoutTable[BlockID][BlockOffset]; - - /* remap to distributed spare space if indicated */ - if (remap) { - RF_ASSERT(raidPtr->Disks[*row][*col].status == rf_ds_reconstructing || raidPtr->Disks[*row][*col].status == rf_ds_dist_spared || - (rf_copyback_in_progress && raidPtr->Disks[*row][*col].status == rf_ds_optimal)); - rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU); - } else { - - outSU = base_suid; - outSU += FullTableID * fulltable_depth; /* offs to strt of FT */ - outSU += SpareSpace; /* skip rsvd spare space */ - outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; /* offs to strt of tble */ - outSU += info->OffsetTable[BlockID][BlockOffset] * layoutPtr->SUsPerPU; /* offs to the PU */ - } - outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); /* offs to the SU within - * a PU */ - - /* convert SUs to sectors, and, if not aligned to SU boundary, add in - * offset to sector. */ - *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit); - - RF_ASSERT(*col != -1); -} - - -/* prototyping this inexplicably causes the compile of the layout table (rf_layout.c) to fail */ -void -rf_MapParityDeclustered( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit; - RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset; - RF_StripeNum_t BlockID, BlockOffset, RepIndex; - RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; - RF_StripeNum_t base_suid = 0, outSU, SpareRegion = 0, SpareSpace = 0; - - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); - - /* compute row & (possibly) spare space exactly as before */ - FullTableID = SUID / sus_per_fulltable; - if (raidPtr->numRow == 1) - *row = 0; /* avoid a mod and a div in the common case */ - else { - *row = FullTableID % raidPtr->numRow; - FullTableID /= raidPtr->numRow; /* convert to fulltable ID on - * this disk */ - } - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - SpareRegion = FullTableID / info->FullTablesPerSpareRegion; - SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs; - } - /* compute BlockID and RepIndex exactly as before */ - FullTableOffset = SUID % sus_per_fulltable; - TableID = FullTableOffset / info->SUsPerTable; - TableOffset = FullTableOffset - TableID * info->SUsPerTable; - /* TableOffset = FullTableOffset % info->SUsPerTable; */ - /* BlockID = (TableOffset / info->PUsPerBlock) % - * info->BlocksPerTable; */ - BlockID = TableOffset / info->PUsPerBlock; - /* BlockOffset = TableOffset % info->PUsPerBlock; */ - BlockOffset = TableOffset - BlockID * info->PUsPerBlock; - BlockID %= info->BlocksPerTable; - - /* the parity block is in the position indicated by RepIndex */ - RepIndex = (raidPtr->noRotate) ? info->PUsPerBlock : info->PUsPerBlock - TableID; - *col = info->LayoutTable[BlockID][RepIndex]; - - if (remap) { - RF_ASSERT(raidPtr->Disks[*row][*col].status == rf_ds_reconstructing || raidPtr->Disks[*row][*col].status == rf_ds_dist_spared || - (rf_copyback_in_progress && raidPtr->Disks[*row][*col].status == rf_ds_optimal)); - rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU); - } else { - - /* compute sector as before, except use RepIndex instead of - * BlockOffset */ - outSU = base_suid; - outSU += FullTableID * fulltable_depth; - outSU += SpareSpace; /* skip rsvd spare space */ - outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; - outSU += info->OffsetTable[BlockID][RepIndex] * layoutPtr->SUsPerPU; - } - - outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); - *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit); - - RF_ASSERT(*col != -1); -} -/* returns an array of ints identifying the disks that comprise the stripe containing the indicated address. - * the caller must _never_ attempt to modify this array. - */ -void -rf_IdentifyStripeDeclustered( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; - RF_StripeNum_t base_suid = 0; - RF_StripeNum_t SUID = rf_RaidAddressToStripeUnitID(layoutPtr, addr); - RF_StripeNum_t stripeID, FullTableID; - int tableOffset; - - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); - FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array - * (across rows) */ - *outRow = FullTableID % raidPtr->numRow; - stripeID = rf_StripeUnitIDToStripeID(layoutPtr, SUID); /* find stripe offset - * into array */ - tableOffset = (stripeID % info->BlocksPerTable); /* find offset into - * block design table */ - *diskids = info->LayoutTable[tableOffset]; -} -/* This returns the default head-separation limit, which is measured - * in "required units for reconstruction". Each time a disk fetches - * a unit, it bumps a counter. The head-sep code prohibits any disk - * from getting more than headSepLimit counter values ahead of any - * other. - * - * We assume here that the number of floating recon buffers is already - * set. There are r stripes to be reconstructed in each table, and so - * if we have a total of B buffers, we can have at most B/r tables - * under recon at any one time. In each table, lambda units are required - * from each disk, so given B buffers, the head sep limit has to be - * (lambda*B)/r units. We subtract one to avoid weird boundary cases. - * - * for example, suppose were given 50 buffers, r=19, and lambda=4 as in - * the 20.5 design. There are 19 stripes/table to be reconstructed, so - * we can have 50/19 tables concurrently under reconstruction, which means - * we can allow the fastest disk to get 50/19 tables ahead of the slower - * disk. There are lambda "required units" for each disk, so the fastest - * disk can get 4*50/19 = 10 counter values ahead of the slowest. - * - * If numBufsToAccumulate is not 1, we need to limit the head sep further - * because multiple bufs will be required for each stripe under recon. - */ -RF_HeadSepLimit_t -rf_GetDefaultHeadSepLimitDeclustered( - RF_Raid_t * raidPtr) -{ - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - - return (info->Lambda * raidPtr->numFloatingReconBufs / info->TableDepthInPUs / rf_numBufsToAccumulate); -} -/* returns the default number of recon buffers to use. The value - * is somewhat arbitrary...it's intended to be large enough to allow - * for a reasonably large head-sep limit, but small enough that you - * don't use up all your system memory with buffers. - */ -int -rf_GetDefaultNumFloatingReconBuffersDeclustered(RF_Raid_t * raidPtr) -{ - return (100 * rf_numBufsToAccumulate); -} -/* sectors in the last fulltable of the array need to be handled - * specially since this fulltable can be incomplete. this function - * changes the values of certain params to handle this. - * - * the idea here is that MapSector et. al. figure out which disk the - * addressed unit lives on by computing the modulos of the unit number - * with the number of units per fulltable, table, etc. In the last - * fulltable, there are fewer units per fulltable, so we need to adjust - * the number of user data units per fulltable to reflect this. - * - * so, we (1) convert the fulltable size and depth parameters to - * the size of the partial fulltable at the end, (2) compute the - * disk sector offset where this fulltable starts, and (3) convert - * the users stripe unit number from an offset into the array to - * an offset into the last fulltable. - */ -void -rf_decluster_adjust_params( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t * SUID, - RF_StripeCount_t * sus_per_fulltable, - RF_StripeCount_t * fulltable_depth, - RF_StripeNum_t * base_suid) -{ - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - - if (*SUID >= info->FullTableLimitSUID) { - /* new full table size is size of last full table on disk */ - *sus_per_fulltable = info->ExtraTablesPerDisk * info->SUsPerTable; - - /* new full table depth is corresponding depth */ - *fulltable_depth = info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU; - - /* set up the new base offset */ - *base_suid = info->DiskOffsetOfLastFullTableInSUs; - - /* convert users array address to an offset into the last - * fulltable */ - *SUID -= info->FullTableLimitSUID; - } -} -/* - * map a stripe ID to a parity stripe ID. - * See comment above RaidAddressToParityStripeID in layout.c. - */ -void -rf_MapSIDToPSIDDeclustered( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru) -{ - RF_DeclusteredConfigInfo_t *info; - - info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - - *psID = (stripeID / (layoutPtr->SUsPerPU * info->BlocksPerTable)) - * info->BlocksPerTable + (stripeID % info->BlocksPerTable); - *which_ru = (stripeID % (info->BlocksPerTable * layoutPtr->SUsPerPU)) - / info->BlocksPerTable; - RF_ASSERT((*which_ru) < layoutPtr->SUsPerPU / layoutPtr->SUsPerRU); -} -/* - * Called from MapSector and MapParity to retarget an access at the spare unit. - * Modifies the "col" and "outSU" parameters only. - */ -void -rf_remap_to_spare_space( - RF_RaidLayout_t * layoutPtr, - RF_DeclusteredConfigInfo_t * info, - RF_RowCol_t row, - RF_StripeNum_t FullTableID, - RF_StripeNum_t TableID, - RF_SectorNum_t BlockID, - RF_StripeNum_t base_suid, - RF_StripeNum_t SpareRegion, - RF_RowCol_t * outCol, - RF_StripeNum_t * outSU) -{ - RF_StripeNum_t ftID, spareTableStartSU, TableInSpareRegion, lastSROffset, - which_ft; - - /* - * note that FullTableID and hence SpareRegion may have gotten - * tweaked by rf_decluster_adjust_params. We detect this by - * noticing that base_suid is not 0. - */ - if (base_suid == 0) { - ftID = FullTableID; - } else { - /* - * There may be > 1.0 full tables in the last (i.e. partial) - * spare region. find out which of these we're in. - */ - lastSROffset = info->NumCompleteSRs * info->SpareRegionDepthInSUs; - which_ft = (info->DiskOffsetOfLastFullTableInSUs - lastSROffset) / (info->FullTableDepthInPUs * layoutPtr->SUsPerPU); - - /* compute the actual full table ID */ - ftID = info->DiskOffsetOfLastFullTableInSUs / (info->FullTableDepthInPUs * layoutPtr->SUsPerPU) + which_ft; - SpareRegion = info->NumCompleteSRs; - } - TableInSpareRegion = (ftID * info->NumParityReps + TableID) % info->TablesPerSpareRegion; - - *outCol = info->SpareTable[TableInSpareRegion][BlockID].spareDisk; - RF_ASSERT(*outCol != -1); - - spareTableStartSU = (SpareRegion == info->NumCompleteSRs) ? - info->DiskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU : - (SpareRegion + 1) * info->SpareRegionDepthInSUs - info->SpareSpaceDepthPerRegionInSUs; - *outSU = spareTableStartSU + info->SpareTable[TableInSpareRegion][BlockID].spareBlockOffsetInSUs; - if (*outSU >= layoutPtr->stripeUnitsPerDisk) { - printf("rf_remap_to_spare_space: invalid remapped disk SU offset %ld\n", (long) *outSU); - } -} - -#endif /* (RF_INCLUDE_PARITY_DECLUSTERING > 0) || (RF_INCLUDE_PARITY_DECLUSTERING_PQ > 0) */ - - -int -rf_InstallSpareTable( - RF_Raid_t * raidPtr, - RF_RowCol_t frow, - RF_RowCol_t fcol) -{ - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - RF_SparetWait_t *req; - int retcode; - - RF_Malloc(req, sizeof(*req), (RF_SparetWait_t *)); - req->C = raidPtr->numCol; - req->G = raidPtr->Layout.numDataCol + raidPtr->Layout.numParityCol; - req->fcol = fcol; - req->SUsPerPU = raidPtr->Layout.SUsPerPU; - req->TablesPerSpareRegion = info->TablesPerSpareRegion; - req->BlocksPerTable = info->BlocksPerTable; - req->TableDepthInPUs = info->TableDepthInPUs; - req->SpareSpaceDepthPerRegionInSUs = info->SpareSpaceDepthPerRegionInSUs; - - retcode = rf_GetSpareTableFromDaemon(req); - RF_ASSERT(!retcode); /* XXX -- fix this to recover gracefully -- - * XXX */ - return (retcode); -} -/* - * Invoked via ioctl to install a spare table in the kernel. - */ -int -rf_SetSpareTable(raidPtr, data) - RF_Raid_t *raidPtr; - void *data; -{ - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - RF_SpareTableEntry_t **ptrs; - int i, retcode; - - /* what we need to copyin is a 2-d array, so first copyin the user - * pointers to the rows in the table */ - RF_Malloc(ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **)); - retcode = copyin((caddr_t) data, (caddr_t) ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *)); - - if (retcode) - return (retcode); - - /* now allocate kernel space for the row pointers */ - RF_Malloc(info->SpareTable, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **)); - - /* now allocate kernel space for each row in the table, and copy it in - * from user space */ - for (i = 0; i < info->TablesPerSpareRegion; i++) { - RF_Malloc(info->SpareTable[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t), (RF_SpareTableEntry_t *)); - retcode = copyin(ptrs[i], info->SpareTable[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t)); - if (retcode) { - info->SpareTable = NULL; /* blow off the memory - * we've allocated */ - return (retcode); - } - } - - /* free up the temporary array we used */ - RF_Free(ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *)); - - return (0); -} - -RF_ReconUnitCount_t -rf_GetNumSpareRUsDeclustered(raidPtr) - RF_Raid_t *raidPtr; -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - - return (((RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo)->TotSparePUsPerDisk); -} - -void -rf_FreeSpareTable(raidPtr) - RF_Raid_t *raidPtr; -{ - long i; - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - RF_SpareTableEntry_t **table = info->SpareTable; - - for (i = 0; i < info->TablesPerSpareRegion; i++) { - RF_Free(table[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t)); - } - RF_Free(table, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *)); - info->SpareTable = (RF_SpareTableEntry_t **) NULL; -} diff --git a/sys/dev/raidframe/rf_decluster.h b/sys/dev/raidframe/rf_decluster.h deleted file mode 100644 index a630298..0000000 --- a/sys/dev/raidframe/rf_decluster.h +++ /dev/null @@ -1,141 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_decluster.h,v 1.3 1999/02/05 00:06:09 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/*---------------------------------------------------------------------- - * - * decluster.h -- header file for declustered layout code - * - * Adapted from raidSim version July 1994 - * Created 10-21-92 (MCH) - * - *--------------------------------------------------------------------*/ - -#ifndef _RF__RF_DECLUSTER_H_ -#define _RF__RF_DECLUSTER_H_ - -#include <dev/raidframe/rf_types.h> - -/* - * These structures define the tables used to locate the spare unit - * associated with a particular data or parity unit, and to perform - * the associated inverse mapping. - */ -struct RF_SpareTableEntry_s { - u_int spareDisk; /* disk to which this block is spared */ - u_int spareBlockOffsetInSUs; /* offset into spare table for that - * disk */ -}; -#define RF_SPAREMAP_NAME_LEN 128 - -/* this is the layout-specific info structure for the declustered layout. - */ -struct RF_DeclusteredConfigInfo_s { - RF_StripeCount_t groupSize; /* no. of stripe units per parity - * stripe */ - RF_RowCol_t **LayoutTable; /* the block design table */ - RF_RowCol_t **OffsetTable; /* the sector offset table */ - RF_RowCol_t **BlockTable; /* the block membership table */ - RF_StripeCount_t SUsPerFullTable; /* stripe units per full table */ - RF_StripeCount_t SUsPerTable; /* stripe units per table */ - RF_StripeCount_t PUsPerBlock; /* parity units per block */ - RF_StripeCount_t SUsPerBlock; /* stripe units per block */ - RF_StripeCount_t BlocksPerTable; /* block design tuples per - * table */ - RF_StripeCount_t NumParityReps; /* tables per full table */ - RF_StripeCount_t TableDepthInPUs; /* PUs on one disk in 1 table */ - RF_StripeCount_t FullTableDepthInPUs; /* PUs on one disk in 1 - * fulltable */ - RF_StripeCount_t FullTableLimitSUID; /* SU where partial fulltables - * start */ - RF_StripeCount_t ExtraTablesPerDisk; /* # of tables in last - * fulltable */ - RF_SectorNum_t DiskOffsetOfLastFullTableInSUs; /* disk offs of partial - * ft, if any */ - RF_StripeCount_t numCompleteFullTablesPerDisk; /* ft identifier of - * partial ft, if any */ - u_int Lambda; /* the pair count in the block design */ - - /* these are used only in the distributed-sparing case */ - RF_StripeCount_t FullTablesPerSpareRegion; /* # of ft's comprising - * 1 spare region */ - RF_StripeCount_t TablesPerSpareRegion; /* # of tables */ - RF_SectorCount_t SpareSpaceDepthPerRegionInSUs; /* spare - * space/disk/region */ - RF_SectorCount_t SpareRegionDepthInSUs; /* # of units/disk/region */ - RF_SectorNum_t DiskOffsetOfLastSpareSpaceChunkInSUs; /* locates sp space - * after partial ft */ - RF_StripeCount_t TotSparePUsPerDisk; /* total number of spare PUs - * per disk */ - RF_StripeCount_t NumCompleteSRs; - RF_SpareTableEntry_t **SpareTable; /* remap table for spare space */ - char sparemap_fname[RF_SPAREMAP_NAME_LEN]; /* where to find - * sparemap. not used in - * kernel */ -}; - -int -rf_ConfigureDeclustered(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int -rf_ConfigureDeclusteredDS(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); - -void -rf_MapSectorDeclustered(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapParityDeclustered(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_IdentifyStripeDeclustered(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -void -rf_MapSIDToPSIDDeclustered(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru); -int rf_InstallSpareTable(RF_Raid_t * raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol); -void rf_FreeSpareTable(RF_Raid_t * raidPtr); - -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitDeclustered(RF_Raid_t * raidPtr); -int rf_GetDefaultNumFloatingReconBuffersDeclustered(RF_Raid_t * raidPtr); - -void -rf_decluster_adjust_params(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t * SUID, RF_StripeCount_t * sus_per_fulltable, - RF_StripeCount_t * fulltable_depth, RF_StripeNum_t * base_suid); -void -rf_remap_to_spare_space( - RF_RaidLayout_t * layoutPtr, - RF_DeclusteredConfigInfo_t * info, RF_RowCol_t row, RF_StripeNum_t FullTableID, - RF_StripeNum_t TableID, RF_SectorNum_t BlockID, RF_StripeNum_t base_suid, - RF_StripeNum_t SpareRegion, RF_RowCol_t * outCol, RF_StripeNum_t * outSU); -int rf_SetSpareTable(RF_Raid_t * raidPtr, void *data); -RF_ReconUnitCount_t rf_GetNumSpareRUsDeclustered(RF_Raid_t * raidPtr); - -#endif /* !_RF__RF_DECLUSTER_H_ */ diff --git a/sys/dev/raidframe/rf_declusterPQ.c b/sys/dev/raidframe/rf_declusterPQ.c deleted file mode 100644 index dc539a3..0000000 --- a/sys/dev/raidframe/rf_declusterPQ.c +++ /dev/null @@ -1,493 +0,0 @@ -/* $NetBSD: rf_declusterPQ.c,v 1.5 2001/01/26 14:06:17 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Authors: Daniel Stodolsky, Mark Holland, Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/*-------------------------------------------------- - * rf_declusterPQ.c - * - * mapping code for declustered P & Q or declustered EvenOdd - * much code borrowed from rf_decluster.c - * - *--------------------------------------------------*/ - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_configure.h> -#include <dev/raidframe/rf_decluster.h> -#include <dev/raidframe/rf_declusterPQ.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_utils.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_general.h> - -#if (RF_INCLUDE_PARITY_DECLUSTERING_PQ > 0) || (RF_INCLUDE_EVENODD > 0) -/* configuration code */ - -int -rf_ConfigureDeclusteredPQ( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - int b, v, k, r, lambda; /* block design params */ - int i, j, l; - int *first_avail_slot; - int complete_FT_count, SUID; - RF_DeclusteredConfigInfo_t *info; - int numCompleteFullTablesPerDisk; - int PUsPerDisk, spareRegionDepthInPUs, numCompleteSpareRegionsPerDisk = 0, - extraPUsPerDisk; - int totSparePUsPerDisk; - int diskOffsetOfLastFullTableInSUs, SpareSpaceInSUs; - char *cfgBuf = (char *) (cfgPtr->layoutSpecific); - - cfgBuf += RF_SPAREMAP_NAME_LEN; - - b = *((int *) cfgBuf); - cfgBuf += sizeof(int); - v = *((int *) cfgBuf); - cfgBuf += sizeof(int); - k = *((int *) cfgBuf); - cfgBuf += sizeof(int); - r = *((int *) cfgBuf); - cfgBuf += sizeof(int); - lambda = *((int *) cfgBuf); - cfgBuf += sizeof(int); - raidPtr->noRotate = *((int *) cfgBuf); - cfgBuf += sizeof(int); - - if (k <= 2) { - printf("RAIDFRAME: k=%d, minimum value 2\n", k); - return (EINVAL); - } - /* 1. create layout specific structure */ - RF_MallocAndAdd(info, sizeof(RF_DeclusteredConfigInfo_t), (RF_DeclusteredConfigInfo_t *), raidPtr->cleanupList); - if (info == NULL) - return (ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - - /* the sparemaps are generated assuming that parity is rotated, so we - * issue a warning if both distributed sparing and no-rotate are on at - * the same time */ - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) && raidPtr->noRotate) { - RF_ERRORMSG("Warning: distributed sparing specified without parity rotation.\n"); - } - if (raidPtr->numCol != v) { - RF_ERRORMSG2("RAID: config error: table element count (%d) not equal to no. of cols (%d)\n", v, raidPtr->numCol); - return (EINVAL); - } - /* 3. set up the values used in devRaidMap */ - info->BlocksPerTable = b; - info->NumParityReps = info->groupSize = k; - info->PUsPerBlock = k - 2; /* PQ */ - info->SUsPerTable = b * info->PUsPerBlock * layoutPtr->SUsPerPU; /* b blks, k-1 SUs each */ - info->SUsPerFullTable = k * info->SUsPerTable; /* rot k times */ - info->SUsPerBlock = info->PUsPerBlock * layoutPtr->SUsPerPU; - info->TableDepthInPUs = (b * k) / v; - info->FullTableDepthInPUs = info->TableDepthInPUs * k; /* k repetitions */ - - /* used only in distributed sparing case */ - info->FullTablesPerSpareRegion = (v - 1) / rf_gcd(r, v - 1); /* (v-1)/gcd fulltables */ - info->TablesPerSpareRegion = k * info->FullTablesPerSpareRegion; - info->SpareSpaceDepthPerRegionInSUs = (r * info->TablesPerSpareRegion / (v - 1)) * layoutPtr->SUsPerPU; - - /* check to make sure the block design is sufficiently small */ - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - if (info->FullTableDepthInPUs * layoutPtr->SUsPerPU + info->SpareSpaceDepthPerRegionInSUs > layoutPtr->stripeUnitsPerDisk) { - RF_ERRORMSG3("RAID: config error: Full Table depth (%d) + Spare Space (%d) larger than disk size (%d) (BD too big)\n", - (int) info->FullTableDepthInPUs, - (int) info->SpareSpaceDepthPerRegionInSUs, - (int) layoutPtr->stripeUnitsPerDisk); - return (EINVAL); - } - } else { - if (info->TableDepthInPUs * layoutPtr->SUsPerPU > layoutPtr->stripeUnitsPerDisk) { - RF_ERRORMSG2("RAID: config error: Table depth (%d) larger than disk size (%d) (BD too big)\n", - (int) (info->TableDepthInPUs * layoutPtr->SUsPerPU), - (int) layoutPtr->stripeUnitsPerDisk); - return (EINVAL); - } - } - - - /* compute the size of each disk, and the number of tables in the last - * fulltable (which need not be complete) */ - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - - PUsPerDisk = layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU; - spareRegionDepthInPUs = (info->TablesPerSpareRegion * info->TableDepthInPUs + - (info->TablesPerSpareRegion * info->TableDepthInPUs) / (v - 1)); - info->SpareRegionDepthInSUs = spareRegionDepthInPUs * layoutPtr->SUsPerPU; - - numCompleteSpareRegionsPerDisk = PUsPerDisk / spareRegionDepthInPUs; - info->NumCompleteSRs = numCompleteSpareRegionsPerDisk; - extraPUsPerDisk = PUsPerDisk % spareRegionDepthInPUs; - - /* assume conservatively that we need the full amount of spare - * space in one region in order to provide spares for the - * partial spare region at the end of the array. We set "i" - * to the number of tables in the partial spare region. This - * may actually include some fulltables. */ - extraPUsPerDisk -= (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU); - if (extraPUsPerDisk <= 0) - i = 0; - else - i = extraPUsPerDisk / info->TableDepthInPUs; - - complete_FT_count = raidPtr->numRow * (numCompleteSpareRegionsPerDisk * (info->TablesPerSpareRegion / k) + i / k); - info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable; - info->ExtraTablesPerDisk = i % k; - - /* note that in the last spare region, the spare space is - * complete even though data/parity space is not */ - totSparePUsPerDisk = (numCompleteSpareRegionsPerDisk + 1) * (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU); - info->TotSparePUsPerDisk = totSparePUsPerDisk; - - layoutPtr->stripeUnitsPerDisk = - ((complete_FT_count / raidPtr->numRow) * info->FullTableDepthInPUs + /* data & parity space */ - info->ExtraTablesPerDisk * info->TableDepthInPUs + - totSparePUsPerDisk /* spare space */ - ) * layoutPtr->SUsPerPU; - layoutPtr->dataStripeUnitsPerDisk = - (complete_FT_count * info->FullTableDepthInPUs + info->ExtraTablesPerDisk * info->TableDepthInPUs) - * layoutPtr->SUsPerPU * (k - 1) / k; - - } else { - /* non-dist spare case: force each disk to contain an - * integral number of tables */ - layoutPtr->stripeUnitsPerDisk /= (info->TableDepthInPUs * layoutPtr->SUsPerPU); - layoutPtr->stripeUnitsPerDisk *= (info->TableDepthInPUs * layoutPtr->SUsPerPU); - - /* compute the number of tables in the last fulltable, which - * need not be complete */ - complete_FT_count = - ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) / info->FullTableDepthInPUs) * raidPtr->numRow; - - info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable; - info->ExtraTablesPerDisk = - ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) / info->TableDepthInPUs) % k; - } - - raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; - - /* find the disk offset of the stripe unit where the last fulltable - * starts */ - numCompleteFullTablesPerDisk = complete_FT_count / raidPtr->numRow; - diskOffsetOfLastFullTableInSUs = numCompleteFullTablesPerDisk * info->FullTableDepthInPUs * layoutPtr->SUsPerPU; - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - SpareSpaceInSUs = numCompleteSpareRegionsPerDisk * info->SpareSpaceDepthPerRegionInSUs; - diskOffsetOfLastFullTableInSUs += SpareSpaceInSUs; - info->DiskOffsetOfLastSpareSpaceChunkInSUs = - diskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU; - } - info->DiskOffsetOfLastFullTableInSUs = diskOffsetOfLastFullTableInSUs; - info->numCompleteFullTablesPerDisk = numCompleteFullTablesPerDisk; - - /* 4. create and initialize the lookup tables */ - info->LayoutTable = rf_make_2d_array(b, k, raidPtr->cleanupList); - if (info->LayoutTable == NULL) - return (ENOMEM); - info->OffsetTable = rf_make_2d_array(b, k, raidPtr->cleanupList); - if (info->OffsetTable == NULL) - return (ENOMEM); - info->BlockTable = rf_make_2d_array(info->TableDepthInPUs * layoutPtr->SUsPerPU, raidPtr->numCol, raidPtr->cleanupList); - if (info->BlockTable == NULL) - return (ENOMEM); - - first_avail_slot = (int *) rf_make_1d_array(v, NULL); - if (first_avail_slot == NULL) - return (ENOMEM); - - for (i = 0; i < b; i++) - for (j = 0; j < k; j++) - info->LayoutTable[i][j] = *cfgBuf++; - - /* initialize offset table */ - for (i = 0; i < b; i++) - for (j = 0; j < k; j++) { - info->OffsetTable[i][j] = first_avail_slot[info->LayoutTable[i][j]]; - first_avail_slot[info->LayoutTable[i][j]]++; - } - - /* initialize block table */ - for (SUID = l = 0; l < layoutPtr->SUsPerPU; l++) { - for (i = 0; i < b; i++) { - for (j = 0; j < k; j++) { - info->BlockTable[(info->OffsetTable[i][j] * layoutPtr->SUsPerPU) + l] - [info->LayoutTable[i][j]] = SUID; - } - SUID++; - } - } - - rf_free_1d_array(first_avail_slot, v); - - /* 5. set up the remaining redundant-but-useful parameters */ - - raidPtr->totalSectors = (k * complete_FT_count + raidPtr->numRow * info->ExtraTablesPerDisk) * - info->SUsPerTable * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numStripe = (raidPtr->totalSectors / layoutPtr->sectorsPerStripeUnit) / (k - 2); - - /* strange evaluation order below to try and minimize overflow - * problems */ - - layoutPtr->dataSectorsPerStripe = (k - 2) * layoutPtr->sectorsPerStripeUnit; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = k - 2; - layoutPtr->numParityCol = 2; - - return (0); -} - -int -rf_GetDefaultNumFloatingReconBuffersPQ(RF_Raid_t * raidPtr) -{ - int def_decl; - - def_decl = rf_GetDefaultNumFloatingReconBuffersDeclustered(raidPtr); - return (RF_MAX(3 * raidPtr->numCol, def_decl)); -} - -void -rf_MapSectorDeclusteredPQ( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit; - RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset; - RF_StripeNum_t BlockID, BlockOffset, RepIndex; - RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; - RF_StripeNum_t base_suid = 0, outSU, SpareRegion = 0, SpareSpace = 0; - - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); - - FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array - * (across rows) */ - *row = FullTableID % raidPtr->numRow; - FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this - * disk */ - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - SpareRegion = FullTableID / info->FullTablesPerSpareRegion; - SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs; - } - FullTableOffset = SUID % sus_per_fulltable; - TableID = FullTableOffset / info->SUsPerTable; - TableOffset = FullTableOffset - TableID * info->SUsPerTable; - BlockID = TableOffset / info->PUsPerBlock; - BlockOffset = TableOffset - BlockID * info->PUsPerBlock; - BlockID %= info->BlocksPerTable; - RF_ASSERT(BlockOffset < info->groupSize - 2); - /* - TableIDs go from 0 .. GroupSize-1 inclusive. - PUsPerBlock is k-2. - We want the tableIDs to rotate from the - right, so use GroupSize - */ - RepIndex = info->groupSize - 1 - TableID; - RF_ASSERT(RepIndex >= 0); - if (!raidPtr->noRotate) { - if (TableID == 0) - BlockOffset++; /* P on last drive, Q on first */ - else - BlockOffset += ((BlockOffset >= RepIndex) ? 2 : 0); /* skip over PQ */ - RF_ASSERT(BlockOffset < info->groupSize); - *col = info->LayoutTable[BlockID][BlockOffset]; - } - /* remap to distributed spare space if indicated */ - if (remap) { - rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU); - } else { - - outSU = base_suid; - outSU += FullTableID * fulltable_depth; /* offs to strt of FT */ - outSU += SpareSpace; /* skip rsvd spare space */ - outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; /* offs to strt of tble */ - outSU += info->OffsetTable[BlockID][BlockOffset] * layoutPtr->SUsPerPU; /* offs to the PU */ - } - outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); /* offs to the SU within - * a PU */ - - /* convert SUs to sectors, and, if not aligned to SU boundary, add in - * offset to sector */ - *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit); -} - - -void -rf_MapParityDeclusteredPQ( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit; - RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset; - RF_StripeNum_t BlockID, BlockOffset, RepIndex; - RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; - RF_StripeNum_t base_suid = 0, outSU, SpareRegion, SpareSpace = 0; - - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); - - /* compute row & (possibly) spare space exactly as before */ - FullTableID = SUID / sus_per_fulltable; - *row = FullTableID % raidPtr->numRow; - FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this - * disk */ - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - SpareRegion = FullTableID / info->FullTablesPerSpareRegion; - SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs; - } - /* compute BlockID and RepIndex exactly as before */ - FullTableOffset = SUID % sus_per_fulltable; - TableID = FullTableOffset / info->SUsPerTable; - TableOffset = FullTableOffset - TableID * info->SUsPerTable; - BlockID = TableOffset / info->PUsPerBlock; - BlockOffset = TableOffset - BlockID * info->PUsPerBlock; - BlockID %= info->BlocksPerTable; - - /* the parity block is in the position indicated by RepIndex */ - RepIndex = (raidPtr->noRotate) ? info->PUsPerBlock : info->groupSize - 1 - TableID; - *col = info->LayoutTable[BlockID][RepIndex]; - - if (remap) - RF_PANIC(); - - /* compute sector as before, except use RepIndex instead of - * BlockOffset */ - outSU = base_suid; - outSU += FullTableID * fulltable_depth; - outSU += SpareSpace; /* skip rsvd spare space */ - outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; - outSU += info->OffsetTable[BlockID][RepIndex] * layoutPtr->SUsPerPU; - outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); - - *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit); -} - -void -rf_MapQDeclusteredPQ( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit; - RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset; - RF_StripeNum_t BlockID, BlockOffset, RepIndex, RepIndexQ; - RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; - RF_StripeNum_t base_suid = 0, outSU, SpareRegion, SpareSpace = 0; - - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); - - /* compute row & (possibly) spare space exactly as before */ - FullTableID = SUID / sus_per_fulltable; - *row = FullTableID % raidPtr->numRow; - FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this - * disk */ - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - SpareRegion = FullTableID / info->FullTablesPerSpareRegion; - SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs; - } - /* compute BlockID and RepIndex exactly as before */ - FullTableOffset = SUID % sus_per_fulltable; - TableID = FullTableOffset / info->SUsPerTable; - TableOffset = FullTableOffset - TableID * info->SUsPerTable; - BlockID = TableOffset / info->PUsPerBlock; - BlockOffset = TableOffset - BlockID * info->PUsPerBlock; - BlockID %= info->BlocksPerTable; - - /* the q block is in the position indicated by RepIndex */ - RepIndex = (raidPtr->noRotate) ? info->PUsPerBlock : info->groupSize - 1 - TableID; - RepIndexQ = ((RepIndex == (info->groupSize - 1)) ? 0 : RepIndex + 1); - *col = info->LayoutTable[BlockID][RepIndexQ]; - - if (remap) - RF_PANIC(); - - /* compute sector as before, except use RepIndex instead of - * BlockOffset */ - outSU = base_suid; - outSU += FullTableID * fulltable_depth; - outSU += SpareSpace; /* skip rsvd spare space */ - outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; - outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); - - outSU += info->OffsetTable[BlockID][RepIndexQ] * layoutPtr->SUsPerPU; - *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit); -} -/* returns an array of ints identifying the disks that comprise the stripe containing the indicated address. - * the caller must _never_ attempt to modify this array. - */ -void -rf_IdentifyStripeDeclusteredPQ( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; - RF_StripeNum_t base_suid = 0; - RF_StripeNum_t SUID = rf_RaidAddressToStripeUnitID(layoutPtr, addr); - RF_StripeNum_t stripeID, FullTableID; - int tableOffset; - - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); - FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array - * (across rows) */ - *outRow = FullTableID % raidPtr->numRow; - stripeID = rf_StripeUnitIDToStripeID(layoutPtr, SUID); /* find stripe offset - * into array */ - tableOffset = (stripeID % info->BlocksPerTable); /* find offset into - * block design table */ - *diskids = info->LayoutTable[tableOffset]; -} -#endif /* (RF_INCLUDE_PARITY_DECLUSTERING_PQ > 0) || (RF_INCLUDE_EVENODD > 0) */ diff --git a/sys/dev/raidframe/rf_declusterPQ.h b/sys/dev/raidframe/rf_declusterPQ.h deleted file mode 100644 index 6edef0b..0000000 --- a/sys/dev/raidframe/rf_declusterPQ.h +++ /dev/null @@ -1,52 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_declusterPQ.h,v 1.3 1999/02/05 00:06:09 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Daniel Stodolsky, Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_DECLUSTERPQ_H_ -#define _RF__RF_DECLUSTERPQ_H_ - -#include <dev/raidframe/rf_types.h> - -int -rf_ConfigureDeclusteredPQ(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int rf_GetDefaultNumFloatingReconBuffersPQ(RF_Raid_t * raidPtr); -void -rf_MapSectorDeclusteredPQ(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapParityDeclusteredPQ(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapQDeclusteredPQ(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_IdentifyStripeDeclusteredPQ(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); - -#endif /* !_RF__RF_DECLUSTERPQ_H_ */ diff --git a/sys/dev/raidframe/rf_desc.h b/sys/dev/raidframe/rf_desc.h deleted file mode 100644 index 8a6951b..0000000 --- a/sys/dev/raidframe/rf_desc.h +++ /dev/null @@ -1,113 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_desc.h,v 1.5 2000/01/09 00:00:18 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_DESC_H_ -#define _RF__RF_DESC_H_ - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_dag.h> - -struct RF_RaidReconDesc_s { - RF_Raid_t *raidPtr; /* raid device descriptor */ - RF_RowCol_t row; /* row of failed disk */ - RF_RowCol_t col; /* col of failed disk */ - int state; /* how far along the reconstruction operation - * has gotten */ - RF_RaidDisk_t *spareDiskPtr; /* describes target disk for recon - * (not used in dist sparing) */ - int numDisksDone; /* the number of surviving disks that have - * completed their work */ - RF_RowCol_t srow; /* row ID of the spare disk (not used in dist - * sparing) */ - RF_RowCol_t scol; /* col ID of the spare disk (not used in dist - * sparing) */ - /* - * Prevent recon from hogging CPU - */ - RF_Etimer_t recon_exec_timer; - RF_uint64 reconExecTimerRunning; - RF_uint64 reconExecTicks; - RF_uint64 maxReconExecTicks; - -#if RF_RECON_STATS > 0 - RF_uint64 hsStallCount; /* head sep stall count */ - RF_uint64 numReconExecDelays; - RF_uint64 numReconEventWaits; -#endif /* RF_RECON_STATS > 0 */ - RF_RaidReconDesc_t *next; -}; - -struct RF_RaidAccessDesc_s { - RF_Raid_t *raidPtr; /* raid device descriptor */ - RF_IoType_t type; /* read or write */ - RF_RaidAddr_t raidAddress; /* starting address in raid address - * space */ - RF_SectorCount_t numBlocks; /* number of blocks (sectors) to - * transfer */ - RF_StripeCount_t numStripes; /* number of stripes involved in - * access */ - caddr_t bufPtr; /* pointer to data buffer */ - RF_RaidAccessFlags_t flags; /* flags controlling operation */ - int state; /* index into states telling how far along the - * RAID operation has gotten */ - RF_AccessState_t *states; /* array of states to be run */ - int status; /* pass/fail status of the last operation */ - RF_DagList_t *dagArray; /* array of dag lists, one list per stripe */ - RF_AccessStripeMapHeader_t *asmap; /* the asm for this I/O */ - void *bp; /* buf pointer for this RAID acc. ignored - * outside the kernel */ - RF_DagHeader_t **paramDAG; /* allows the DAG to be returned to - * the caller after I/O completion */ - RF_AccessStripeMapHeader_t **paramASM; /* allows the ASM to be - * returned to the caller - * after I/O completion */ - RF_AccTraceEntry_t tracerec; /* perf monitoring information for a - * user access (not for dag stats) */ - void (*callbackFunc) (RF_CBParam_t); /* callback function for this - * I/O */ - void *callbackArg; /* arg to give to callback func */ - - RF_AllocListElem_t *cleanupList; /* memory to be freed at the - * end of the access */ - - RF_RaidAccessDesc_t *next; - RF_RaidAccessDesc_t *head; - - int numPending; - - RF_DECLARE_MUTEX(mutex) /* these are used to implement - * blocking I/O */ - RF_DECLARE_COND(cond) - int async_flag; - - RF_Etimer_t timer; /* used for timing this access */ -}; -#endif /* !_RF__RF_DESC_H_ */ diff --git a/sys/dev/raidframe/rf_diskqueue.c b/sys/dev/raidframe/rf_diskqueue.c deleted file mode 100644 index c03e6cd..0000000 --- a/sys/dev/raidframe/rf_diskqueue.c +++ /dev/null @@ -1,593 +0,0 @@ -/* $NetBSD: rf_diskqueue.c,v 1.13 2000/03/04 04:22:34 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/**************************************************************************** - * - * rf_diskqueue.c -- higher-level disk queue code - * - * the routines here are a generic wrapper around the actual queueing - * routines. The code here implements thread scheduling, synchronization, - * and locking ops (see below) on top of the lower-level queueing code. - * - * to support atomic RMW, we implement "locking operations". When a - * locking op is dispatched to the lower levels of the driver, the - * queue is locked, and no further I/Os are dispatched until the queue - * receives & completes a corresponding "unlocking operation". This - * code relies on the higher layers to guarantee that a locking op - * will always be eventually followed by an unlocking op. The model - * is that the higher layers are structured so locking and unlocking - * ops occur in pairs, i.e. an unlocking op cannot be generated until - * after a locking op reports completion. There is no good way to - * check to see that an unlocking op "corresponds" to the op that - * currently has the queue locked, so we make no such attempt. Since - * by definition there can be only one locking op outstanding on a - * disk, this should not be a problem. - * - * In the kernel, we allow multiple I/Os to be concurrently dispatched - * to the disk driver. In order to support locking ops in this - * environment, when we decide to do a locking op, we stop dispatching - * new I/Os and wait until all dispatched I/Os have completed before - * dispatching the locking op. - * - * Unfortunately, the code is different in the 3 different operating - * states (user level, kernel, simulator). In the kernel, I/O is - * non-blocking, and we have no disk threads to dispatch for us. - * Therefore, we have to dispatch new I/Os to the scsi driver at the - * time of enqueue, and also at the time of completion. At user - * level, I/O is blocking, and so only the disk threads may dispatch - * I/Os. Thus at user level, all we can do at enqueue time is enqueue - * and wake up the disk thread to do the dispatch. - * - ****************************************************************************/ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_diskqueue.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_acctrace.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_configure.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_freelist.h> -#include <dev/raidframe/rf_debugprint.h> -#include <dev/raidframe/rf_shutdown.h> -#include <dev/raidframe/rf_cvscan.h> -#include <dev/raidframe/rf_sstf.h> -#include <dev/raidframe/rf_fifo.h> -#include <dev/raidframe/rf_kintf.h> - -static int init_dqd(RF_DiskQueueData_t *); -static void clean_dqd(RF_DiskQueueData_t *); -static void rf_ShutdownDiskQueueSystem(void *); - -#define Dprintf1(s,a) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf2(s,a,b) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf3(s,a,b,c) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL) - -/***************************************************************************** - * - * the disk queue switch defines all the functions used in the - * different queueing disciplines queue ID, init routine, enqueue - * routine, dequeue routine - * - ****************************************************************************/ - -static RF_DiskQueueSW_t diskqueuesw[] = { - {"fifo", /* FIFO */ - rf_FifoCreate, - rf_FifoEnqueue, - rf_FifoDequeue, - rf_FifoPeek, - rf_FifoPromote}, - - {"cvscan", /* cvscan */ - rf_CvscanCreate, - rf_CvscanEnqueue, - rf_CvscanDequeue, - rf_CvscanPeek, - rf_CvscanPromote}, - - {"sstf", /* shortest seek time first */ - rf_SstfCreate, - rf_SstfEnqueue, - rf_SstfDequeue, - rf_SstfPeek, - rf_SstfPromote}, - - {"scan", /* SCAN (two-way elevator) */ - rf_ScanCreate, - rf_SstfEnqueue, - rf_ScanDequeue, - rf_ScanPeek, - rf_SstfPromote}, - - {"cscan", /* CSCAN (one-way elevator) */ - rf_CscanCreate, - rf_SstfEnqueue, - rf_CscanDequeue, - rf_CscanPeek, - rf_SstfPromote}, - -}; -#define NUM_DISK_QUEUE_TYPES (sizeof(diskqueuesw)/sizeof(RF_DiskQueueSW_t)) - -static RF_FreeList_t *rf_dqd_freelist; - -#define RF_MAX_FREE_DQD 256 -#define RF_DQD_INC 16 -#define RF_DQD_INITIAL 64 - -#if defined(__FreeBSD__) && __FreeBSD_version > 500005 -#include <sys/bio.h> -#endif - -#include <sys/buf.h> - -static int -init_dqd(dqd) - RF_DiskQueueData_t *dqd; -{ - - dqd->bp = (RF_Buf_t) malloc(sizeof(*dqd->bp), M_RAIDFRAME, M_NOWAIT); - if (dqd->bp == NULL) { - return (ENOMEM); - } - memset(dqd->bp, 0, sizeof(*dqd->bp)); /* if you don't do it, nobody - * else will.. */ - return (0); -} - -static void -clean_dqd(dqd) - RF_DiskQueueData_t *dqd; -{ - free(dqd->bp, M_RAIDFRAME); -} -/* configures a single disk queue */ - -int -rf_ConfigureDiskQueue( - RF_Raid_t * raidPtr, - RF_DiskQueue_t * diskqueue, - RF_RowCol_t r, /* row & col -- debug only. BZZT not any - * more... */ - RF_RowCol_t c, - RF_DiskQueueSW_t * p, - RF_SectorCount_t sectPerDisk, - dev_t dev, - int maxOutstanding, - RF_ShutdownList_t ** listp, - RF_AllocListElem_t * clList) -{ - int rc; - - diskqueue->row = r; - diskqueue->col = c; - diskqueue->qPtr = p; - diskqueue->qHdr = (p->Create) (sectPerDisk, clList, listp); - diskqueue->dev = dev; - diskqueue->numOutstanding = 0; - diskqueue->queueLength = 0; - diskqueue->maxOutstanding = maxOutstanding; - diskqueue->curPriority = RF_IO_NORMAL_PRIORITY; - diskqueue->nextLockingOp = NULL; - diskqueue->unlockingOp = NULL; - diskqueue->numWaiting = 0; - diskqueue->flags = 0; - diskqueue->raidPtr = raidPtr; - diskqueue->rf_cinfo = &raidPtr->raid_cinfo[r][c]; - rc = rf_create_managed_mutex(listp, &diskqueue->mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return (rc); - } - rc = rf_create_managed_cond(listp, &diskqueue->cond); - if (rc) { - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return (rc); - } - return (0); -} - -static void -rf_ShutdownDiskQueueSystem(ignored) - void *ignored; -{ - RF_FREELIST_DESTROY_CLEAN(rf_dqd_freelist, next, (RF_DiskQueueData_t *), clean_dqd); -} - -int -rf_ConfigureDiskQueueSystem(listp) - RF_ShutdownList_t **listp; -{ - int rc; - - RF_FREELIST_CREATE(rf_dqd_freelist, RF_MAX_FREE_DQD, - RF_DQD_INC, sizeof(RF_DiskQueueData_t)); - if (rf_dqd_freelist == NULL) - return (ENOMEM); - rc = rf_ShutdownCreate(listp, rf_ShutdownDiskQueueSystem, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - rf_ShutdownDiskQueueSystem(NULL); - return (rc); - } - RF_FREELIST_PRIME_INIT(rf_dqd_freelist, RF_DQD_INITIAL, next, - (RF_DiskQueueData_t *), init_dqd); - return (0); -} - -int -rf_ConfigureDiskQueues( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - RF_DiskQueue_t **diskQueues, *spareQueues; - RF_DiskQueueSW_t *p; - RF_RowCol_t r, c; - int rc, i; - - raidPtr->maxQueueDepth = cfgPtr->maxOutstandingDiskReqs; - - for (p = NULL, i = 0; i < NUM_DISK_QUEUE_TYPES; i++) { - if (!strcmp(diskqueuesw[i].queueType, cfgPtr->diskQueueType)) { - p = &diskqueuesw[i]; - break; - } - } - if (p == NULL) { - RF_ERRORMSG2("Unknown queue type \"%s\". Using %s\n", cfgPtr->diskQueueType, diskqueuesw[0].queueType); - p = &diskqueuesw[0]; - } - raidPtr->qType = p; - RF_CallocAndAdd(diskQueues, raidPtr->numRow, sizeof(RF_DiskQueue_t *), (RF_DiskQueue_t **), raidPtr->cleanupList); - if (diskQueues == NULL) { - return (ENOMEM); - } - raidPtr->Queues = diskQueues; - for (r = 0; r < raidPtr->numRow; r++) { - RF_CallocAndAdd(diskQueues[r], raidPtr->numCol + - ((r == 0) ? RF_MAXSPARE : 0), - sizeof(RF_DiskQueue_t), (RF_DiskQueue_t *), - raidPtr->cleanupList); - if (diskQueues[r] == NULL) - return (ENOMEM); - for (c = 0; c < raidPtr->numCol; c++) { - rc = rf_ConfigureDiskQueue(raidPtr, &diskQueues[r][c], - r, c, p, - raidPtr->sectorsPerDisk, - raidPtr->Disks[r][c].dev, - cfgPtr->maxOutstandingDiskReqs, - listp, raidPtr->cleanupList); - if (rc) - return (rc); - } - } - - spareQueues = &raidPtr->Queues[0][raidPtr->numCol]; - for (r = 0; r < raidPtr->numSpare; r++) { - rc = rf_ConfigureDiskQueue(raidPtr, &spareQueues[r], - 0, raidPtr->numCol + r, p, - raidPtr->sectorsPerDisk, - raidPtr->Disks[0][raidPtr->numCol + r].dev, - cfgPtr->maxOutstandingDiskReqs, listp, - raidPtr->cleanupList); - if (rc) - return (rc); - } - return (0); -} -/* Enqueue a disk I/O - * - * Unfortunately, we have to do things differently in the different - * environments (simulator, user-level, kernel). - * At user level, all I/O is blocking, so we have 1 or more threads/disk - * and the thread that enqueues is different from the thread that dequeues. - * In the kernel, I/O is non-blocking and so we'd like to have multiple - * I/Os outstanding on the physical disks when possible. - * - * when any request arrives at a queue, we have two choices: - * dispatch it to the lower levels - * queue it up - * - * kernel rules for when to do what: - * locking request: queue empty => dispatch and lock queue, - * else queue it - * unlocking req : always dispatch it - * normal req : queue empty => dispatch it & set priority - * queue not full & priority is ok => dispatch it - * else queue it - * - * user-level rules: - * always enqueue. In the special case of an unlocking op, enqueue - * in a special way that will cause the unlocking op to be the next - * thing dequeued. - * - * simulator rules: - * Do the same as at user level, with the sleeps and wakeups suppressed. - */ -void -rf_DiskIOEnqueue(queue, req, pri) - RF_DiskQueue_t *queue; - RF_DiskQueueData_t *req; - int pri; -{ - RF_ETIMER_START(req->qtime); - RF_ASSERT(req->type == RF_IO_TYPE_NOP || req->numSector); - req->priority = pri; - - if (rf_queueDebug && (req->numSector == 0)) { - printf("Warning: Enqueueing zero-sector access\n"); - } - /* - * kernel - */ - RF_LOCK_QUEUE_MUTEX(queue, "DiskIOEnqueue"); - /* locking request */ - if (RF_LOCKING_REQ(req)) { - if (RF_QUEUE_EMPTY(queue)) { - Dprintf3("Dispatching pri %d locking op to r %d c %d (queue empty)\n", pri, queue->row, queue->col); - RF_LOCK_QUEUE(queue); - rf_DispatchKernelIO(queue, req); - } else { - queue->queueLength++; /* increment count of number - * of requests waiting in this - * queue */ - Dprintf3("Enqueueing pri %d locking op to r %d c %d (queue not empty)\n", pri, queue->row, queue->col); - req->queue = (void *) queue; - (queue->qPtr->Enqueue) (queue->qHdr, req, pri); - } - } - /* unlocking request */ - else - if (RF_UNLOCKING_REQ(req)) { /* we'll do the actual unlock - * when this I/O completes */ - Dprintf3("Dispatching pri %d unlocking op to r %d c %d\n", pri, queue->row, queue->col); - RF_ASSERT(RF_QUEUE_LOCKED(queue)); - rf_DispatchKernelIO(queue, req); - } - /* normal request */ - else - if (RF_OK_TO_DISPATCH(queue, req)) { - Dprintf3("Dispatching pri %d regular op to r %d c %d (ok to dispatch)\n", pri, queue->row, queue->col); - rf_DispatchKernelIO(queue, req); - } else { - queue->queueLength++; /* increment count of - * number of requests - * waiting in this queue */ - Dprintf3("Enqueueing pri %d regular op to r %d c %d (not ok to dispatch)\n", pri, queue->row, queue->col); - req->queue = (void *) queue; - (queue->qPtr->Enqueue) (queue->qHdr, req, pri); - } - RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOEnqueue"); -} - - -/* get the next set of I/Os started, kernel version only */ -void -rf_DiskIOComplete(queue, req, status) - RF_DiskQueue_t *queue; - RF_DiskQueueData_t *req; - int status; -{ - int done = 0; - - RF_LOCK_QUEUE_MUTEX(queue, "DiskIOComplete"); - - /* unlock the queue: (1) after an unlocking req completes (2) after a - * locking req fails */ - if (RF_UNLOCKING_REQ(req) || (RF_LOCKING_REQ(req) && status)) { - Dprintf2("DiskIOComplete: unlocking queue at r %d c %d\n", queue->row, queue->col); - RF_ASSERT(RF_QUEUE_LOCKED(queue) && (queue->unlockingOp == NULL)); - RF_UNLOCK_QUEUE(queue); - } - queue->numOutstanding--; - RF_ASSERT(queue->numOutstanding >= 0); - - /* dispatch requests to the disk until we find one that we can't. */ - /* no reason to continue once we've filled up the queue */ - /* no reason to even start if the queue is locked */ - - while (!done && !RF_QUEUE_FULL(queue) && !RF_QUEUE_LOCKED(queue)) { - if (queue->nextLockingOp) { - req = queue->nextLockingOp; - queue->nextLockingOp = NULL; - Dprintf3("DiskIOComplete: a pri %d locking req was pending at r %d c %d\n", req->priority, queue->row, queue->col); - } else { - req = (queue->qPtr->Dequeue) (queue->qHdr); - if (req != NULL) { - Dprintf3("DiskIOComplete: extracting pri %d req from queue at r %d c %d\n", req->priority, queue->row, queue->col); - } else { - Dprintf1("DiskIOComplete: no more requests to extract.\n", ""); - } - } - if (req) { - queue->queueLength--; /* decrement count of number - * of requests waiting in this - * queue */ - RF_ASSERT(queue->queueLength >= 0); - } - if (!req) - done = 1; - else - if (RF_LOCKING_REQ(req)) { - if (RF_QUEUE_EMPTY(queue)) { /* dispatch it */ - Dprintf3("DiskIOComplete: dispatching pri %d locking req to r %d c %d (queue empty)\n", req->priority, queue->row, queue->col); - RF_LOCK_QUEUE(queue); - rf_DispatchKernelIO(queue, req); - done = 1; - } else { /* put it aside to wait for - * the queue to drain */ - Dprintf3("DiskIOComplete: postponing pri %d locking req to r %d c %d\n", req->priority, queue->row, queue->col); - RF_ASSERT(queue->nextLockingOp == NULL); - queue->nextLockingOp = req; - done = 1; - } - } else - if (RF_UNLOCKING_REQ(req)) { /* should not happen: - * unlocking ops should - * not get queued */ - RF_ASSERT(RF_QUEUE_LOCKED(queue)); /* support it anyway for - * the future */ - Dprintf3("DiskIOComplete: dispatching pri %d unl req to r %d c %d (SHOULD NOT SEE THIS)\n", req->priority, queue->row, queue->col); - rf_DispatchKernelIO(queue, req); - done = 1; - } else - if (RF_OK_TO_DISPATCH(queue, req)) { - Dprintf3("DiskIOComplete: dispatching pri %d regular req to r %d c %d (ok to dispatch)\n", req->priority, queue->row, queue->col); - rf_DispatchKernelIO(queue, req); - } else { /* we can't dispatch it, - * so just re-enqueue - * it. */ - /* potential trouble here if - * disk queues batch reqs */ - Dprintf3("DiskIOComplete: re-enqueueing pri %d regular req to r %d c %d\n", req->priority, queue->row, queue->col); - queue->queueLength++; - (queue->qPtr->Enqueue) (queue->qHdr, req, req->priority); - done = 1; - } - } - - RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOComplete"); -} -/* promotes accesses tagged with the given parityStripeID from low priority - * to normal priority. This promotion is optional, meaning that a queue - * need not implement it. If there is no promotion routine associated with - * a queue, this routine does nothing and returns -1. - */ -int -rf_DiskIOPromote(queue, parityStripeID, which_ru) - RF_DiskQueue_t *queue; - RF_StripeNum_t parityStripeID; - RF_ReconUnitNum_t which_ru; -{ - int retval; - - if (!queue->qPtr->Promote) - return (-1); - RF_LOCK_QUEUE_MUTEX(queue, "DiskIOPromote"); - retval = (queue->qPtr->Promote) (queue->qHdr, parityStripeID, which_ru); - RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOPromote"); - return (retval); -} - -RF_DiskQueueData_t * -rf_CreateDiskQueueData( - RF_IoType_t typ, - RF_SectorNum_t ssect, - RF_SectorCount_t nsect, - caddr_t buf, - RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru, - int (*wakeF) (void *, int), - void *arg, - RF_DiskQueueData_t * next, - RF_AccTraceEntry_t * tracerec, - void *raidPtr, - RF_DiskQueueDataFlags_t flags, - void *kb_proc) -{ - RF_DiskQueueData_t *p; - - RF_FREELIST_GET_INIT(rf_dqd_freelist, p, next, (RF_DiskQueueData_t *), init_dqd); - - p->sectorOffset = ssect + rf_protectedSectors; - p->numSector = nsect; - p->type = typ; - p->buf = buf; - p->parityStripeID = parityStripeID; - p->which_ru = which_ru; - p->CompleteFunc = wakeF; - p->argument = arg; - p->next = next; - p->tracerec = tracerec; - p->priority = RF_IO_NORMAL_PRIORITY; - p->AuxFunc = NULL; - p->buf2 = NULL; - p->raidPtr = raidPtr; - p->flags = flags; - p->b_proc = kb_proc; - return (p); -} - -RF_DiskQueueData_t * -rf_CreateDiskQueueDataFull( - RF_IoType_t typ, - RF_SectorNum_t ssect, - RF_SectorCount_t nsect, - caddr_t buf, - RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru, - int (*wakeF) (void *, int), - void *arg, - RF_DiskQueueData_t * next, - RF_AccTraceEntry_t * tracerec, - int priority, - int (*AuxFunc) (void *,...), - caddr_t buf2, - void *raidPtr, - RF_DiskQueueDataFlags_t flags, - void *kb_proc) -{ - RF_DiskQueueData_t *p; - - RF_FREELIST_GET_INIT(rf_dqd_freelist, p, next, (RF_DiskQueueData_t *), init_dqd); - - p->sectorOffset = ssect + rf_protectedSectors; - p->numSector = nsect; - p->type = typ; - p->buf = buf; - p->parityStripeID = parityStripeID; - p->which_ru = which_ru; - p->CompleteFunc = wakeF; - p->argument = arg; - p->next = next; - p->tracerec = tracerec; - p->priority = priority; - p->AuxFunc = AuxFunc; - p->buf2 = buf2; - p->raidPtr = raidPtr; - p->flags = flags; - p->b_proc = kb_proc; - return (p); -} - -void -rf_FreeDiskQueueData(p) - RF_DiskQueueData_t *p; -{ - RF_FREELIST_FREE_CLEAN(rf_dqd_freelist, p, next, clean_dqd); -} diff --git a/sys/dev/raidframe/rf_diskqueue.h b/sys/dev/raidframe/rf_diskqueue.h deleted file mode 100644 index 7b162b0..0000000 --- a/sys/dev/raidframe/rf_diskqueue.h +++ /dev/null @@ -1,208 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_diskqueue.h,v 1.5 2000/02/13 04:53:57 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************************** - * - * rf_diskqueue.h -- header file for disk queues - * - * see comments in rf_diskqueue.c - * - ****************************************************************************************/ - - -#ifndef _RF__RF_DISKQUEUE_H_ -#define _RF__RF_DISKQUEUE_H_ - -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_acctrace.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_etimer.h> - -#include <dev/raidframe/rf_bsd.h> - -#define RF_IO_NORMAL_PRIORITY 1 -#define RF_IO_LOW_PRIORITY 0 - -/* the data held by a disk queue entry */ -struct RF_DiskQueueData_s { - RF_SectorNum_t sectorOffset; /* sector offset into the disk */ - RF_SectorCount_t numSector; /* number of sectors to read/write */ - RF_IoType_t type; /* read/write/nop */ - caddr_t buf; /* buffer pointer */ - RF_StripeNum_t parityStripeID; /* the RAID parity stripe ID this - * access is for */ - RF_ReconUnitNum_t which_ru; /* which RU within this parity stripe */ - int priority; /* the priority of this request */ - int (*CompleteFunc) (void *, int); /* function to be called upon - * completion */ - int (*AuxFunc) (void *,...); /* function called upon - * completion of the first I/O - * of a Read_Op_Write pair */ - void *argument; /* argument to be passed to CompleteFunc */ - RF_Raid_t *raidPtr; /* needed for simulation */ - RF_AccTraceEntry_t *tracerec; /* perf mon only */ - RF_Etimer_t qtime; /* perf mon only - time request is in queue */ - long entryTime; - RF_DiskQueueData_t *next; - RF_DiskQueueData_t *prev; - caddr_t buf2; /* for read-op-write */ - dev_t dev; /* the device number for in-kernel version */ - RF_DiskQueue_t *queue; /* the disk queue to which this req is - * targeted */ - RF_DiskQueueDataFlags_t flags; /* flags controlling operation */ - - struct proc *b_proc; /* the b_proc from the original bp passed into - * the driver for this I/O */ - /* XXX Should this be changed to the opaque - * RF_Thread_t ? */ - RF_Buf_t bp; /* a bp to use to get this I/O done */ -}; -#define RF_LOCK_DISK_QUEUE 0x01 -#define RF_UNLOCK_DISK_QUEUE 0x02 - -/* note: "Create" returns type-specific queue header pointer cast to (void *) */ -struct RF_DiskQueueSW_s { - RF_DiskQueueType_t queueType; - void *(*Create) (RF_SectorCount_t, RF_AllocListElem_t *, RF_ShutdownList_t **); /* creation routine -- - * one call per queue in - * system */ - void (*Enqueue) (void *, RF_DiskQueueData_t *, int); /* enqueue routine */ - RF_DiskQueueData_t *(*Dequeue) (void *); /* dequeue routine */ - RF_DiskQueueData_t *(*Peek) (void *); /* peek at head of queue */ - - /* the rest are optional: they improve performance, but the driver - * will deal with it if they don't exist */ - int (*Promote) (void *, RF_StripeNum_t, RF_ReconUnitNum_t); /* promotes priority of - * tagged accesses */ -}; - -struct RF_DiskQueue_s { - RF_DiskQueueSW_t *qPtr; /* access point to queue functions */ - void *qHdr; /* queue header, of whatever type */ - RF_DECLARE_MUTEX(mutex) /* mutex locking data structures */ - RF_DECLARE_COND(cond) /* condition variable for - * synchronization */ - long numOutstanding; /* number of I/Os currently outstanding on - * disk */ - long maxOutstanding; /* max # of I/Os that can be outstanding on a - * disk (in-kernel only) */ - int curPriority; /* the priority of accs all that are currently - * outstanding */ - long queueLength; /* number of requests in queue */ - RF_DiskQueueData_t *nextLockingOp; /* a locking op that has - * arrived at the head of the - * queue & is waiting for - * drainage */ - RF_DiskQueueData_t *unlockingOp; /* used at user level to - * communicate unlocking op - * b/w user (or dag exec) & - * disk threads */ - int numWaiting; /* number of threads waiting on this variable. - * user-level only */ - RF_DiskQueueFlags_t flags; /* terminate, locked */ - RF_Raid_t *raidPtr; /* associated array */ - dev_t dev; /* device number for kernel version */ - RF_SectorNum_t last_deq_sector; /* last sector number dequeued or - * dispatched */ - int row, col; /* debug only */ - struct raidcinfo *rf_cinfo; /* disks component info.. */ -}; -#define RF_DQ_LOCKED 0x02 /* no new accs allowed until queue is - * explicitly unlocked */ - -/* macros setting & returning information about queues and requests */ -#define RF_QUEUE_LOCKED(_q) ((_q)->flags & RF_DQ_LOCKED) -#define RF_QUEUE_EMPTY(_q) (((_q)->numOutstanding == 0) && ((_q)->nextLockingOp == NULL) && !RF_QUEUE_LOCKED(_q)) -#define RF_QUEUE_FULL(_q) ((_q)->numOutstanding == (_q)->maxOutstanding) - -#define RF_LOCK_QUEUE(_q) (_q)->flags |= RF_DQ_LOCKED -#define RF_UNLOCK_QUEUE(_q) (_q)->flags &= ~RF_DQ_LOCKED - -#define RF_LOCK_QUEUE_MUTEX(_q_,_wh_) RF_LOCK_MUTEX((_q_)->mutex) -#define RF_UNLOCK_QUEUE_MUTEX(_q_,_wh_) RF_UNLOCK_MUTEX((_q_)->mutex) - -#define RF_LOCKING_REQ(_r) ((_r)->flags & RF_LOCK_DISK_QUEUE) -#define RF_UNLOCKING_REQ(_r) ((_r)->flags & RF_UNLOCK_DISK_QUEUE) - -/* whether it is ok to dispatch a regular request */ -#define RF_OK_TO_DISPATCH(_q_,_r_) \ - (RF_QUEUE_EMPTY(_q_) || \ - (!RF_QUEUE_FULL(_q_) && ((_r_)->priority >= (_q_)->curPriority))) - -int rf_ConfigureDiskQueueSystem(RF_ShutdownList_t ** listp); - -void rf_TerminateDiskQueues(RF_Raid_t * raidPtr); - -int -rf_ConfigureDiskQueues(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); - -void rf_DiskIOEnqueue(RF_DiskQueue_t * queue, RF_DiskQueueData_t * req, int pri); - - -void rf_DiskIOComplete(RF_DiskQueue_t * queue, RF_DiskQueueData_t * req, int status); - -int -rf_DiskIOPromote(RF_DiskQueue_t * queue, RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru); - -RF_DiskQueueData_t * -rf_CreateDiskQueueData(RF_IoType_t typ, RF_SectorNum_t ssect, - RF_SectorCount_t nsect, caddr_t buf, - RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru, - int (*wakeF) (void *, int), - void *arg, RF_DiskQueueData_t * next, - RF_AccTraceEntry_t * tracerec, - void *raidPtr, RF_DiskQueueDataFlags_t flags, - void *kb_proc); - -RF_DiskQueueData_t * -rf_CreateDiskQueueDataFull(RF_IoType_t typ, RF_SectorNum_t ssect, - RF_SectorCount_t nsect, caddr_t buf, - RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru, - int (*wakeF) (void *, int), - void *arg, RF_DiskQueueData_t * next, - RF_AccTraceEntry_t * tracerec, - int priority, int (*AuxFunc) (void *,...), - caddr_t buf2, void *raidPtr, - RF_DiskQueueDataFlags_t flags, void *kb_proc); - -void -rf_FreeDiskQueueData(RF_DiskQueueData_t * p); - -int -rf_ConfigureDiskQueue(RF_Raid_t *, RF_DiskQueue_t *, RF_RowCol_t, - RF_RowCol_t, RF_DiskQueueSW_t *, - RF_SectorCount_t, dev_t, int, - RF_ShutdownList_t **, - RF_AllocListElem_t *); -#endif /* !_RF__RF_DISKQUEUE_H_ */ diff --git a/sys/dev/raidframe/rf_disks.c b/sys/dev/raidframe/rf_disks.c deleted file mode 100644 index 14f72c2..0000000 --- a/sys/dev/raidframe/rf_disks.c +++ /dev/null @@ -1,1140 +0,0 @@ -/* $NetBSD: rf_disks.c,v 1.34 2000/12/05 01:35:56 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/*- - * Copyright (c) 1999 The NetBSD Foundation, Inc. - * All rights reserved. - * - * This code is derived from software contributed to The NetBSD Foundation - * by Greg Oster - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. - * 4. Neither the name of The NetBSD Foundation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/*************************************************************** - * rf_disks.c -- code to perform operations on the actual disks - ***************************************************************/ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_utils.h> -#include <dev/raidframe/rf_configure.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_options.h> -#include <dev/raidframe/rf_kintf.h> -#include <dev/raidframe/rf_bsd.h> - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/proc.h> -#if defined(__NetBSD__) -#include <sys/ioctl.h> -#elif defined(__FreeBSD__) -#include <sys/ioccom.h> -#include <sys/filio.h> -#endif -#include <sys/fcntl.h> -#include <sys/vnode.h> - -static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *); -static void rf_print_label_status( RF_Raid_t *, int, int, char *, - RF_ComponentLabel_t *); -static int rf_check_label_vitals( RF_Raid_t *, int, int, char *, - RF_ComponentLabel_t *, int, int ); - -#define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f) -#define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g) - -/************************************************************************** - * - * initialize the disks comprising the array - * - * We want the spare disks to have regular row,col numbers so that we can - * easily substitue a spare for a failed disk. But, the driver code assumes - * throughout that the array contains numRow by numCol _non-spare_ disks, so - * it's not clear how to fit in the spares. This is an unfortunate holdover - * from raidSim. The quick and dirty fix is to make row zero bigger than the - * rest, and put all the spares in it. This probably needs to get changed - * eventually. - * - **************************************************************************/ - -int -rf_ConfigureDisks( listp, raidPtr, cfgPtr ) - RF_ShutdownList_t **listp; - RF_Raid_t *raidPtr; - RF_Config_t *cfgPtr; -{ - RF_RaidDisk_t **disks; - RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; - RF_RowCol_t r, c; - int bs, ret; - unsigned i, count, foundone = 0, numFailuresThisRow; - int force; - - force = cfgPtr->force; - - ret = rf_AllocDiskStructures(raidPtr, cfgPtr); - if (ret) - goto fail; - - disks = raidPtr->Disks; - - for (r = 0; r < raidPtr->numRow; r++) { - numFailuresThisRow = 0; - for (c = 0; c < raidPtr->numCol; c++) { - ret = rf_ConfigureDisk(raidPtr, - &cfgPtr->devnames[r][c][0], - &disks[r][c], r, c); - - if (ret) - goto fail; - - if (disks[r][c].status == rf_ds_optimal) { - raidread_component_label( - raidPtr->raid_cinfo[r][c].ci_dev, - raidPtr->raid_cinfo[r][c].ci_vp, - &raidPtr->raid_cinfo[r][c].ci_label); - } - - if (disks[r][c].status != rf_ds_optimal) { - numFailuresThisRow++; - } else { - if (disks[r][c].numBlocks < min_numblks) - min_numblks = disks[r][c].numBlocks; - DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", - r, c, disks[r][c].devname, - (long int) disks[r][c].numBlocks, - disks[r][c].blockSize, - (long int) disks[r][c].numBlocks * - disks[r][c].blockSize / 1024 / 1024); - } - } - /* XXX fix for n-fault tolerant */ - /* XXX this should probably check to see how many failures - we can handle for this configuration! */ - if (numFailuresThisRow > 0) - raidPtr->status[r] = rf_rs_degraded; - } - - /* all disks must be the same size & have the same block size, bs must - * be a power of 2 */ - bs = 0; - for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) { - for (c = 0; !foundone && c < raidPtr->numCol; c++) { - if (disks[r][c].status == rf_ds_optimal) { - bs = disks[r][c].blockSize; - foundone = 1; - } - } - } - if (!foundone) { - RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n"); - ret = EINVAL; - goto fail; - } - for (count = 0, i = 1; i; i <<= 1) - if (bs & i) - count++; - if (count != 1) { - RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs); - ret = EINVAL; - goto fail; - } - - if (rf_CheckLabels( raidPtr, cfgPtr )) { - printf("raid%d: There were fatal errors\n", raidPtr->raidid); - if (force != 0) { - printf("raid%d: Fatal errors being ignored.\n", - raidPtr->raidid); - } else { - ret = EINVAL; - goto fail; - } - } - - for (r = 0; r < raidPtr->numRow; r++) { - for (c = 0; c < raidPtr->numCol; c++) { - if (disks[r][c].status == rf_ds_optimal) { - if (disks[r][c].blockSize != bs) { - RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c); - ret = EINVAL; - goto fail; - } - if (disks[r][c].numBlocks != min_numblks) { - RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n", - r, c, (int) min_numblks); - disks[r][c].numBlocks = min_numblks; - } - } - } - } - - raidPtr->sectorsPerDisk = min_numblks; - raidPtr->logBytesPerSector = ffs(bs) - 1; - raidPtr->bytesPerSector = bs; - raidPtr->sectorMask = bs - 1; - return (0); - -fail: - - rf_UnconfigureVnodes( raidPtr ); - - return (ret); -} - - -/**************************************************************************** - * set up the data structures describing the spare disks in the array - * recall from the above comment that the spare disk descriptors are stored - * in row zero, which is specially expanded to hold them. - ****************************************************************************/ -int -rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr ) - RF_ShutdownList_t ** listp; - RF_Raid_t * raidPtr; - RF_Config_t * cfgPtr; -{ - int i, ret; - unsigned int bs; - RF_RaidDisk_t *disks; - int num_spares_done; - - num_spares_done = 0; - - /* The space for the spares should have already been allocated by - * ConfigureDisks() */ - - disks = &raidPtr->Disks[0][raidPtr->numCol]; - for (i = 0; i < raidPtr->numSpare; i++) { - ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0], - &disks[i], 0, raidPtr->numCol + i); - if (ret) - goto fail; - if (disks[i].status != rf_ds_optimal) { - RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", - &cfgPtr->spare_names[i][0]); - } else { - disks[i].status = rf_ds_spare; /* change status to - * spare */ - DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i, - disks[i].devname, - (long int) disks[i].numBlocks, disks[i].blockSize, - (long int) disks[i].numBlocks * - disks[i].blockSize / 1024 / 1024); - } - num_spares_done++; - } - - /* check sizes and block sizes on spare disks */ - bs = 1 << raidPtr->logBytesPerSector; - for (i = 0; i < raidPtr->numSpare; i++) { - if (disks[i].blockSize != bs) { - RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs); - ret = EINVAL; - goto fail; - } - if (disks[i].numBlocks < raidPtr->sectorsPerDisk) { - RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n", - disks[i].devname, disks[i].blockSize, - (long int) raidPtr->sectorsPerDisk); - ret = EINVAL; - goto fail; - } else - if (disks[i].numBlocks > raidPtr->sectorsPerDisk) { - RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk); - - disks[i].numBlocks = raidPtr->sectorsPerDisk; - } - } - - return (0); - -fail: - - /* Release the hold on the main components. We've failed to allocate - * a spare, and since we're failing, we need to free things.. - - XXX failing to allocate a spare is *not* that big of a deal... - We *can* survive without it, if need be, esp. if we get hot - adding working. - - If we don't fail out here, then we need a way to remove this spare... - that should be easier to do here than if we are "live"... - - */ - - rf_UnconfigureVnodes( raidPtr ); - - return (ret); -} - -static int -rf_AllocDiskStructures(raidPtr, cfgPtr) - RF_Raid_t *raidPtr; - RF_Config_t *cfgPtr; -{ - RF_RaidDisk_t **disks; - int ret; - int r; - - RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *), - (RF_RaidDisk_t **), raidPtr->cleanupList); - if (disks == NULL) { - ret = ENOMEM; - goto fail; - } - raidPtr->Disks = disks; - /* get space for the device-specific stuff... */ - RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow, - sizeof(struct raidcinfo *), (struct raidcinfo **), - raidPtr->cleanupList); - if (raidPtr->raid_cinfo == NULL) { - ret = ENOMEM; - goto fail; - } - - for (r = 0; r < raidPtr->numRow; r++) { - /* We allocate RF_MAXSPARE on the first row so that we - have room to do hot-swapping of spares */ - RF_CallocAndAdd(disks[r], raidPtr->numCol - + ((r == 0) ? RF_MAXSPARE : 0), - sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *), - raidPtr->cleanupList); - if (disks[r] == NULL) { - ret = ENOMEM; - goto fail; - } - /* get more space for device specific stuff.. */ - RF_CallocAndAdd(raidPtr->raid_cinfo[r], - raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0), - sizeof(struct raidcinfo), (struct raidcinfo *), - raidPtr->cleanupList); - if (raidPtr->raid_cinfo[r] == NULL) { - ret = ENOMEM; - goto fail; - } - } - return(0); -fail: - rf_UnconfigureVnodes( raidPtr ); - - return(ret); -} - - -/* configure a single disk during auto-configuration at boot */ -int -rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config) - RF_Raid_t *raidPtr; - RF_Config_t *cfgPtr; - RF_AutoConfig_t *auto_config; -{ - RF_RaidDisk_t **disks; - RF_RaidDisk_t *diskPtr; - RF_RowCol_t r, c; - RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; - int bs, ret; - int numFailuresThisRow; - int force; - RF_AutoConfig_t *ac; - int parity_good; - int mod_counter; - int mod_counter_found; - - rf_printf(0, "Starting autoconfiguration of RAID set...\n"); - force = cfgPtr->force; - - ret = rf_AllocDiskStructures(raidPtr, cfgPtr); - if (ret) - goto fail; - - disks = raidPtr->Disks; - - /* assume the parity will be fine.. */ - parity_good = RF_RAID_CLEAN; - - /* Check for mod_counters that are too low */ - mod_counter_found = 0; - mod_counter = 0; - ac = auto_config; - while(ac!=NULL) { - if (mod_counter_found==0) { - mod_counter = ac->clabel->mod_counter; - mod_counter_found = 1; - } else { - if (ac->clabel->mod_counter > mod_counter) { - mod_counter = ac->clabel->mod_counter; - } - } - ac->flag = 0; /* clear the general purpose flag */ - ac = ac->next; - } - - bs = 0; - for (r = 0; r < raidPtr->numRow; r++) { - numFailuresThisRow = 0; - for (c = 0; c < raidPtr->numCol; c++) { - diskPtr = &disks[r][c]; - - /* find this row/col in the autoconfig */ - rf_printf(1, "Looking for %d,%d in autoconfig\n",r,c); - ac = auto_config; - while(ac!=NULL) { - if (ac->clabel==NULL) { - /* big-time bad news. */ - goto fail; - } - if ((ac->clabel->row == r) && - (ac->clabel->column == c) && - (ac->clabel->mod_counter == mod_counter)) { - /* it's this one... */ - /* flag it as 'used', so we don't - free it later. */ - ac->flag = 1; - rf_printf(1, "Found: %s at %d,%d\n", - ac->devname, r, c); - break; - } - ac=ac->next; - } - - if (ac==NULL) { - /* we didn't find an exact match with a - correct mod_counter above... can we - find one with an incorrect mod_counter - to use instead? (this one, if we find - it, will be marked as failed once the - set configures) - */ - - ac = auto_config; - while(ac!=NULL) { - if (ac->clabel==NULL) { - /* big-time bad news. */ - goto fail; - } - if ((ac->clabel->row == r) && - (ac->clabel->column == c)) { - /* it's this one... - flag it as 'used', so we - don't free it later. */ - ac->flag = 1; - rf_printf(1, "Found(low mod_counter): %s at %d,%d\n", - ac->devname,r,c); - - break; - } - ac=ac->next; - } - } - - - - if (ac!=NULL) { - /* Found it. Configure it.. */ - diskPtr->blockSize = ac->clabel->blockSize; - diskPtr->numBlocks = ac->clabel->numBlocks; - /* Note: rf_protectedSectors is already - factored into numBlocks here */ - raidPtr->raid_cinfo[r][c].ci_vp = ac->vp; - raidPtr->raid_cinfo[r][c].ci_dev = ac->dev; - - memcpy(&raidPtr->raid_cinfo[r][c].ci_label, - ac->clabel, sizeof(*ac->clabel)); - sprintf(diskPtr->devname, "/dev/%s", - ac->devname); - - /* note the fact that this component was - autoconfigured. You'll need this info - later. Trust me :) */ - diskPtr->auto_configured = 1; - diskPtr->dev = ac->dev; - - /* - * we allow the user to specify that - * only a fraction of the disks should - * be used this is just for debug: it - * speeds up the parity scan - */ - - diskPtr->numBlocks = diskPtr->numBlocks * - rf_sizePercentage / 100; - - /* XXX these will get set multiple times, - but since we're autoconfiguring, they'd - better be always the same each time! - If not, this is the least of your worries */ - - bs = diskPtr->blockSize; - min_numblks = diskPtr->numBlocks; - - /* this gets done multiple times, but that's - fine -- the serial number will be the same - for all components, guaranteed */ - raidPtr->serial_number = - ac->clabel->serial_number; - /* check the last time the label - was modified */ - if (ac->clabel->mod_counter != - mod_counter) { - /* Even though we've filled in all - of the above, we don't trust - this component since it's - modification counter is not - in sync with the rest, and we really - consider it to be failed. */ - disks[r][c].status = rf_ds_failed; - numFailuresThisRow++; - } else { - if (ac->clabel->clean != - RF_RAID_CLEAN) { - parity_good = RF_RAID_DIRTY; - } - } - } else { - /* Didn't find it at all!! - Component must really be dead */ - disks[r][c].status = rf_ds_failed; - sprintf(disks[r][c].devname,"component%d", - r * raidPtr->numCol + c); - numFailuresThisRow++; - } - } - /* XXX fix for n-fault tolerant */ - /* XXX this should probably check to see how many failures - we can handle for this configuration! */ - if (numFailuresThisRow > 0) - raidPtr->status[r] = rf_rs_degraded; - } - - /* close the device for the ones that didn't get used */ - - ac = auto_config; - while(ac!=NULL) { - if (ac->flag == 0) { -#if defined(__NetBSD__) - vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); -#elif defined(__FreeBSD__) - vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY, - raidPtr->engine_thread); -#endif - VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED, 0); - vput(ac->vp); - ac->vp = NULL; - rf_printf(1, "Released %s from auto-config set.\n", - ac->devname); - } - ac = ac->next; - } - - raidPtr->mod_counter = mod_counter; - - /* note the state of the parity, if any */ - raidPtr->parity_good = parity_good; - raidPtr->sectorsPerDisk = min_numblks; - raidPtr->logBytesPerSector = ffs(bs) - 1; - raidPtr->bytesPerSector = bs; - raidPtr->sectorMask = bs - 1; - return (0); - -fail: - - rf_UnconfigureVnodes( raidPtr ); - - return (ret); - -} - -/* configure a single disk in the array */ -int -rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col) - RF_Raid_t *raidPtr; - char *buf; - RF_RaidDisk_t *diskPtr; - RF_RowCol_t row; - RF_RowCol_t col; -{ - char *p; - int retcode; - - int error; - - retcode = 0; - p = rf_find_non_white(buf); - if (p[strlen(p) - 1] == '\n') { - /* strip off the newline */ - p[strlen(p) - 1] = '\0'; - } - (void) strcpy(diskPtr->devname, p); - - /* Let's start by claiming the component is fine and well... */ - diskPtr->status = rf_ds_optimal; - - raidPtr->raid_cinfo[row][col].ci_vp = NULL; - raidPtr->raid_cinfo[row][col].ci_dev = NULL; - - error = raid_getcomponentsize(raidPtr, row, col); - if (error) { - printf("raidlookup on device: %s failed!\n", diskPtr->devname); - if (error == ENXIO) { - /* the component isn't there... must be dead :-( */ - diskPtr->status = rf_ds_failed; - return (error); - } - } - return (0); -} - -static void -rf_print_label_status( raidPtr, row, column, dev_name, ci_label ) - RF_Raid_t *raidPtr; - int row; - int column; - char *dev_name; - RF_ComponentLabel_t *ci_label; -{ - - printf("raid%d: Component %s being configured at row: %d col: %d\n", - raidPtr->raidid, dev_name, row, column ); - printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", - ci_label->row, ci_label->column, - ci_label->num_rows, ci_label->num_columns); - printf(" Version: %d Serial Number: %d Mod Counter: %d\n", - ci_label->version, ci_label->serial_number, - ci_label->mod_counter); - printf(" Clean: %s Status: %d\n", - ci_label->clean ? "Yes" : "No", ci_label->status ); -} - -static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label, - serial_number, mod_counter ) - RF_Raid_t *raidPtr; - int row; - int column; - char *dev_name; - RF_ComponentLabel_t *ci_label; - int serial_number; - int mod_counter; -{ - int fatal_error = 0; - - if (serial_number != ci_label->serial_number) { - printf("%s has a different serial number: %d %d\n", - dev_name, serial_number, ci_label->serial_number); - fatal_error = 1; - } - if (mod_counter != ci_label->mod_counter) { - printf("%s has a different modfication count: %d %d\n", - dev_name, mod_counter, ci_label->mod_counter); - } - - if (row != ci_label->row) { - printf("Row out of alignment for: %s\n", dev_name); - fatal_error = 1; - } - if (column != ci_label->column) { - printf("Column out of alignment for: %s\n", dev_name); - fatal_error = 1; - } - if (raidPtr->numRow != ci_label->num_rows) { - printf("Number of rows do not match for: %s\n", dev_name); - fatal_error = 1; - } - if (raidPtr->numCol != ci_label->num_columns) { - printf("Number of columns do not match for: %s\n", dev_name); - fatal_error = 1; - } - if (ci_label->clean == 0) { - /* it's not clean, but that's not fatal */ - printf("%s is not clean!\n", dev_name); - } - return(fatal_error); -} - - -/* - - rf_CheckLabels() - check all the component labels for consistency. - Return an error if there is anything major amiss. - - */ - -int -rf_CheckLabels( raidPtr, cfgPtr ) - RF_Raid_t *raidPtr; - RF_Config_t *cfgPtr; -{ - int r,c; - char *dev_name; - RF_ComponentLabel_t *ci_label; - int serial_number = 0; - int mod_number = 0; - int fatal_error = 0; - int mod_values[4]; - int mod_count[4]; - int ser_values[4]; - int ser_count[4]; - int num_ser; - int num_mod; - int i; - int found; - int hosed_row; - int hosed_column; - int too_fatal; - int parity_good; - int force; - - hosed_row = -1; - hosed_column = -1; - too_fatal = 0; - force = cfgPtr->force; - - /* - We're going to try to be a little intelligent here. If one - component's label is bogus, and we can identify that it's the - *only* one that's gone, we'll mark it as "failed" and allow - the configuration to proceed. This will be the *only* case - that we'll proceed if there would be (otherwise) fatal errors. - - Basically we simply keep a count of how many components had - what serial number. If all but one agree, we simply mark - the disagreeing component as being failed, and allow - things to come up "normally". - - We do this first for serial numbers, and then for "mod_counter". - - */ - - num_ser = 0; - num_mod = 0; - for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) { - for (c = 0; c < raidPtr->numCol; c++) { - ci_label = &raidPtr->raid_cinfo[r][c].ci_label; - found=0; - for(i=0;i<num_ser;i++) { - if (ser_values[i] == ci_label->serial_number) { - ser_count[i]++; - found=1; - break; - } - } - if (!found) { - ser_values[num_ser] = ci_label->serial_number; - ser_count[num_ser] = 1; - num_ser++; - if (num_ser>2) { - fatal_error = 1; - break; - } - } - found=0; - for(i=0;i<num_mod;i++) { - if (mod_values[i] == ci_label->mod_counter) { - mod_count[i]++; - found=1; - break; - } - } - if (!found) { - mod_values[num_mod] = ci_label->mod_counter; - mod_count[num_mod] = 1; - num_mod++; - if (num_mod>2) { - fatal_error = 1; - break; - } - } - } - } - rf_printf(1, "raid%d: Summary of serial numbers:\n", raidPtr->raidid); - for(i=0;i<num_ser;i++) { - rf_printf(1, "%d %d\n", ser_values[i], ser_count[i]); - } - rf_printf(1, "raid%d: Summary of mod counters:\n", raidPtr->raidid); - for(i=0;i<num_mod;i++) { - rf_printf(1, "%d %d\n", mod_values[i], mod_count[i]); - } - serial_number = ser_values[0]; - if (num_ser == 2) { - if ((ser_count[0] == 1) || (ser_count[1] == 1)) { - /* Locate the maverick component */ - if (ser_count[1] > ser_count[0]) { - serial_number = ser_values[1]; - } - for (r = 0; r < raidPtr->numRow; r++) { - for (c = 0; c < raidPtr->numCol; c++) { - ci_label = &raidPtr->raid_cinfo[r][c].ci_label; - if (serial_number != - ci_label->serial_number) { - hosed_row = r; - hosed_column = c; - break; - } - } - } - printf("Hosed component: %s\n", - &cfgPtr->devnames[hosed_row][hosed_column][0]); - if (!force) { - /* we'll fail this component, as if there are - other major errors, we arn't forcing things - and we'll abort the config anyways */ - raidPtr->Disks[hosed_row][hosed_column].status - = rf_ds_failed; - raidPtr->numFailures++; - raidPtr->status[hosed_row] = rf_rs_degraded; - } - } else { - too_fatal = 1; - } - if (cfgPtr->parityConfig == '0') { - /* We've identified two different serial numbers. - RAID 0 can't cope with that, so we'll punt */ - too_fatal = 1; - } - - } - - /* record the serial number for later. If we bail later, setting - this doesn't matter, otherwise we've got the best guess at the - correct serial number */ - raidPtr->serial_number = serial_number; - - mod_number = mod_values[0]; - if (num_mod == 2) { - if ((mod_count[0] == 1) || (mod_count[1] == 1)) { - /* Locate the maverick component */ - if (mod_count[1] > mod_count[0]) { - mod_number = mod_values[1]; - } else if (mod_count[1] < mod_count[0]) { - mod_number = mod_values[0]; - } else { - /* counts of different modification values - are the same. Assume greater value is - the correct one, all other things - considered */ - if (mod_values[0] > mod_values[1]) { - mod_number = mod_values[0]; - } else { - mod_number = mod_values[1]; - } - - } - for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) { - for (c = 0; c < raidPtr->numCol; c++) { - ci_label = &raidPtr->raid_cinfo[r][c].ci_label; - if (mod_number != - ci_label->mod_counter) { - if ( ( hosed_row == r ) && - ( hosed_column == c )) { - /* same one. Can - deal with it. */ - } else { - hosed_row = r; - hosed_column = c; - if (num_ser != 1) { - too_fatal = 1; - break; - } - } - } - } - } - printf("Hosed component: %s\n", - &cfgPtr->devnames[hosed_row][hosed_column][0]); - if (!force) { - /* we'll fail this component, as if there are - other major errors, we arn't forcing things - and we'll abort the config anyways */ - if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) { - raidPtr->Disks[hosed_row][hosed_column].status - = rf_ds_failed; - raidPtr->numFailures++; - raidPtr->status[hosed_row] = rf_rs_degraded; - } - } - } else { - too_fatal = 1; - } - if (cfgPtr->parityConfig == '0') { - /* We've identified two different mod counters. - RAID 0 can't cope with that, so we'll punt */ - too_fatal = 1; - } - } - - raidPtr->mod_counter = mod_number; - - if (too_fatal) { - /* we've had both a serial number mismatch, and a mod_counter - mismatch -- and they involved two different components!! - Bail -- make things fail so that the user must force - the issue... */ - hosed_row = -1; - hosed_column = -1; - } - - if (num_ser > 2) { - printf("raid%d: Too many different serial numbers!\n", - raidPtr->raidid); - } - - if (num_mod > 2) { - printf("raid%d: Too many different mod counters!\n", - raidPtr->raidid); - } - - /* we start by assuming the parity will be good, and flee from - that notion at the slightest sign of trouble */ - - parity_good = RF_RAID_CLEAN; - for (r = 0; r < raidPtr->numRow; r++) { - for (c = 0; c < raidPtr->numCol; c++) { - dev_name = &cfgPtr->devnames[r][c][0]; - ci_label = &raidPtr->raid_cinfo[r][c].ci_label; - - if ((r == hosed_row) && (c == hosed_column)) { - printf("raid%d: Ignoring %s\n", - raidPtr->raidid, dev_name); - } else { - rf_print_label_status( raidPtr, r, c, - dev_name, ci_label ); - if (rf_check_label_vitals( raidPtr, r, c, - dev_name, ci_label, - serial_number, - mod_number )) { - fatal_error = 1; - } - if (ci_label->clean != RF_RAID_CLEAN) { - parity_good = RF_RAID_DIRTY; - } - } - } - } - if (fatal_error) { - parity_good = RF_RAID_DIRTY; - } - - /* we note the state of the parity */ - raidPtr->parity_good = parity_good; - - return(fatal_error); -} - -int -rf_add_hot_spare(raidPtr, sparePtr) - RF_Raid_t *raidPtr; - RF_SingleComponent_t *sparePtr; -{ - RF_RaidDisk_t *disks; - RF_DiskQueue_t *spareQueues; - int ret; - unsigned int bs; - int spare_number; - -#if 0 - printf("Just in rf_add_hot_spare: %d\n",raidPtr->numSpare); - printf("Num col: %d\n",raidPtr->numCol); -#endif - if (raidPtr->numSpare >= RF_MAXSPARE) { - RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare); - return(EINVAL); - } - - RF_LOCK_MUTEX(raidPtr->mutex); - - /* the beginning of the spares... */ - disks = &raidPtr->Disks[0][raidPtr->numCol]; - - spare_number = raidPtr->numSpare; - - ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name, - &disks[spare_number], 0, - raidPtr->numCol + spare_number); - - if (ret) - goto fail; - if (disks[spare_number].status != rf_ds_optimal) { - RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", - sparePtr->component_name); - ret=EINVAL; - goto fail; - } else { - disks[spare_number].status = rf_ds_spare; - DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number, - disks[spare_number].devname, - (long int) disks[spare_number].numBlocks, - disks[spare_number].blockSize, - (long int) disks[spare_number].numBlocks * - disks[spare_number].blockSize / 1024 / 1024); - } - - - /* check sizes and block sizes on the spare disk */ - bs = 1 << raidPtr->logBytesPerSector; - if (disks[spare_number].blockSize != bs) { - RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs); - ret = EINVAL; - goto fail; - } - if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) { - RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n", - disks[spare_number].devname, - disks[spare_number].blockSize, - (long int) raidPtr->sectorsPerDisk); - ret = EINVAL; - goto fail; - } else { - if (disks[spare_number].numBlocks > - raidPtr->sectorsPerDisk) { - RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname, - (long int) raidPtr->sectorsPerDisk); - - disks[spare_number].numBlocks = raidPtr->sectorsPerDisk; - } - } - - spareQueues = &raidPtr->Queues[0][raidPtr->numCol]; - ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number], - 0, raidPtr->numCol + spare_number, - raidPtr->qType, - raidPtr->sectorsPerDisk, - raidPtr->Disks[0][raidPtr->numCol + - spare_number].dev, - raidPtr->maxOutstanding, - &raidPtr->shutdownList, - raidPtr->cleanupList); - - - raidPtr->numSpare++; - RF_UNLOCK_MUTEX(raidPtr->mutex); - return (0); - -fail: - RF_UNLOCK_MUTEX(raidPtr->mutex); - return(ret); -} - -int -rf_remove_hot_spare(raidPtr,sparePtr) - RF_Raid_t *raidPtr; - RF_SingleComponent_t *sparePtr; -{ - int spare_number; - - - if (raidPtr->numSpare==0) { - printf("No spares to remove!\n"); - return(EINVAL); - } - - spare_number = sparePtr->column; - - return(EINVAL); /* XXX not implemented yet */ -#if 0 - if (spare_number < 0 || spare_number > raidPtr->numSpare) { - return(EINVAL); - } - - /* verify that this spare isn't in use... */ - - - - - /* it's gone.. */ - - raidPtr->numSpare--; - - return(0); -#endif -} - - -int -rf_delete_component(raidPtr,component) - RF_Raid_t *raidPtr; - RF_SingleComponent_t *component; -{ - RF_RaidDisk_t *disks; - - if ((component->row < 0) || - (component->row >= raidPtr->numRow) || - (component->column < 0) || - (component->column >= raidPtr->numCol)) { - return(EINVAL); - } - - disks = &raidPtr->Disks[component->row][component->column]; - - /* 1. This component must be marked as 'failed' */ - - return(EINVAL); /* Not implemented yet. */ -} - -int -rf_incorporate_hot_spare(raidPtr,component) - RF_Raid_t *raidPtr; - RF_SingleComponent_t *component; -{ - - /* Issues here include how to 'move' this in if there is IO - taking place (e.g. component queues and such) */ - - return(EINVAL); /* Not implemented yet. */ -} diff --git a/sys/dev/raidframe/rf_disks.h b/sys/dev/raidframe/rf_disks.h deleted file mode 100644 index b57c4f8..0000000 --- a/sys/dev/raidframe/rf_disks.h +++ /dev/null @@ -1,108 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_disks.h,v 1.8 2000/03/27 03:25:17 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * rf_disks.h -- header file for code related to physical disks - */ - -#ifndef _RF__RF_DISKS_H_ -#define _RF__RF_DISKS_H_ - -#include <sys/types.h> - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_bsd.h> - -/* - * A physical disk can be in one of several states: - * IF YOU ADD A STATE, CHECK TO SEE IF YOU NEED TO MODIFY RF_DEAD_DISK() BELOW. - */ -enum RF_DiskStatus_e { - rf_ds_optimal, /* no problems */ - rf_ds_failed, /* reconstruction ongoing */ - rf_ds_reconstructing, /* reconstruction complete to spare, dead disk - * not yet replaced */ - rf_ds_dist_spared, /* reconstruction complete to distributed - * spare space, dead disk not yet replaced */ - rf_ds_spared, /* reconstruction complete to distributed - * spare space, dead disk not yet replaced */ - rf_ds_spare, /* an available spare disk */ - rf_ds_used_spare /* a spare which has been used, and hence is - * not available */ -}; -typedef enum RF_DiskStatus_e RF_DiskStatus_t; - -struct RF_RaidDisk_s { - char devname[56]; /* name of device file */ - RF_DiskStatus_t status; /* whether it is up or down */ - RF_RowCol_t spareRow; /* if in status "spared", this identifies the - * spare disk */ - RF_RowCol_t spareCol; /* if in status "spared", this identifies the - * spare disk */ - RF_SectorCount_t numBlocks; /* number of blocks, obtained via READ - * CAPACITY */ - int blockSize; - RF_SectorCount_t partitionSize; /* The *actual* and *full* size of - the partition, from the disklabel */ - int auto_configured;/* 1 if this component was autoconfigured. - 0 otherwise. */ - dev_t dev; -}; -/* - * An RF_DiskOp_t ptr is really a pointer to a UAGT_CCB, but I want - * to isolate the cam layer from all other layers, so I typecast to/from - * RF_DiskOp_t * (i.e. void *) at the interfaces. - */ -typedef void RF_DiskOp_t; - -/* if a disk is in any of these states, it is inaccessible */ -#define RF_DEAD_DISK(_dstat_) (((_dstat_) == rf_ds_spared) || \ - ((_dstat_) == rf_ds_reconstructing) || ((_dstat_) == rf_ds_failed) || \ - ((_dstat_) == rf_ds_dist_spared)) - -#ifdef _KERNEL -#include <dev/raidframe/rf_bsd.h> - -int rf_ConfigureDisks(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int rf_ConfigureSpareDisks(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int rf_ConfigureDisk(RF_Raid_t * raidPtr, char *buf, RF_RaidDisk_t * diskPtr, - RF_RowCol_t row, RF_RowCol_t col); -int rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, - RF_AutoConfig_t *auto_config); -int rf_CheckLabels( RF_Raid_t *, RF_Config_t *); -int rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr); -int rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr); -int rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component); -int rf_incorporate_hot_spare(RF_Raid_t *raidPtr, - RF_SingleComponent_t *component); -#endif /* _KERNEL */ -#endif /* !_RF__RF_DISKS_H_ */ diff --git a/sys/dev/raidframe/rf_driver.c b/sys/dev/raidframe/rf_driver.c deleted file mode 100644 index 9534132..0000000 --- a/sys/dev/raidframe/rf_driver.c +++ /dev/null @@ -1,1050 +0,0 @@ -/* $NetBSD: rf_driver.c,v 1.39 2000/12/15 02:12:58 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/*- - * Copyright (c) 1999 The NetBSD Foundation, Inc. - * All rights reserved. - * - * This code is derived from software contributed to The NetBSD Foundation - * by Greg Oster - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. - * 4. Neither the name of The NetBSD Foundation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, Khalil Amiri, Claudson Bornstein, William V. Courtright II, - * Robby Findler, Daniel Stodolsky, Rachad Youssef, Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/****************************************************************************** - * - * rf_driver.c -- main setup, teardown, and access routines for the RAID driver - * - * all routines are prefixed with rf_ (raidframe), to avoid conficts. - * - ******************************************************************************/ - - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/systm.h> -#if defined(__NetBSD__) -#include <sys/ioctl.h> -#elif defined(__FreeBSD__) -#include <sys/ioccom.h> -#include <sys/filio.h> -#endif -#include <sys/fcntl.h> -#include <sys/vnode.h> - - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_threadstuff.h> - -#include <sys/errno.h> - -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_aselect.h> -#include <dev/raidframe/rf_diskqueue.h> -#include <dev/raidframe/rf_parityscan.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_utils.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_acctrace.h> -#include <dev/raidframe/rf_configure.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_desc.h> -#include <dev/raidframe/rf_states.h> -#include <dev/raidframe/rf_freelist.h> -#include <dev/raidframe/rf_decluster.h> -#include <dev/raidframe/rf_map.h> -#include <dev/raidframe/rf_revent.h> -#include <dev/raidframe/rf_callback.h> -#include <dev/raidframe/rf_engine.h> -#include <dev/raidframe/rf_memchunk.h> -#include <dev/raidframe/rf_mcpair.h> -#include <dev/raidframe/rf_nwayxor.h> -#include <dev/raidframe/rf_debugprint.h> -#include <dev/raidframe/rf_copyback.h> -#include <dev/raidframe/rf_driver.h> -#include <dev/raidframe/rf_options.h> -#include <dev/raidframe/rf_shutdown.h> -#include <dev/raidframe/rf_kintf.h> - -#if defined(__FreeBSD__) && __FreeBSD_version > 500005 -#include <sys/bio.h> -#endif - -#include <sys/buf.h> - -/* rad == RF_RaidAccessDesc_t */ -static RF_FreeList_t *rf_rad_freelist; -#define RF_MAX_FREE_RAD 128 -#define RF_RAD_INC 16 -#define RF_RAD_INITIAL 32 - -/* debug variables */ -char rf_panicbuf[2048]; /* a buffer to hold an error msg when we panic */ - -/* main configuration routines */ -static int raidframe_booted = 0; - -static void rf_ConfigureDebug(RF_Config_t * cfgPtr); -static void set_debug_option(char *name, long val); -static void rf_UnconfigureArray(void); -static int init_rad(RF_RaidAccessDesc_t *); -static void clean_rad(RF_RaidAccessDesc_t *); -static void rf_ShutdownRDFreeList(void *); -static int rf_ConfigureRDFreeList(RF_ShutdownList_t **); - -RF_DECLARE_MUTEX(rf_printf_mutex) /* debug only: avoids interleaved - * printfs by different stripes */ - -#define SIGNAL_QUIESCENT_COND(_raid_) wakeup(&((_raid_)->accesses_suspended)) -#define WAIT_FOR_QUIESCENCE(_raid_) \ - RF_LTSLEEP(&((_raid_)->accesses_suspended), PRIBIO, \ - "raidframe quiesce", 0, &((_raid_)->access_suspend_mutex)) - -#if defined(__FreeBSD__) && __FreeBSD_version > 500005 -#define IO_BUF_ERR(bp, err) { \ - bp->bio_flags |= BIO_ERROR; \ - bp->bio_resid = bp->bio_bcount; \ - bp->bio_error = err; \ - biodone(bp); \ -}; -#else -#define IO_BUF_ERR(bp, err) { \ - bp->b_flags |= B_ERROR; \ - bp->b_resid = bp->b_bcount; \ - bp->b_error = err; \ - biodone(bp); \ -} -#endif - -static int configureCount = 0; /* number of active configurations */ -static int configInProgress = 0; /* configuration is in progress and code - * needs to be serialized. */ -static int isconfigged = 0; /* is basic raidframe (non per-array) - * stuff configged */ -RF_DECLARE_STATIC_MUTEX(configureMutex) /* used to lock the configuration - * stuff */ -static RF_ShutdownList_t *globalShutdown; /* non array-specific - * stuff */ - -/* called at system boot time */ -int -rf_BootRaidframe() -{ - int rc; - - if (raidframe_booted) - return (EBUSY); - raidframe_booted = 1; - - rc = rf_mutex_init(&configureMutex, __FUNCTION__); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - RF_PANIC(); - } - configureCount = 0; - isconfigged = 0; - globalShutdown = NULL; - return (0); -} -/* - * This function is really just for debugging user-level stuff: it - * frees up all memory, other RAIDframe resources which might otherwise - * be kept around. This is used with systems like "sentinel" to detect - * memory leaks. - */ -int -rf_UnbootRaidframe() -{ - int rc; - - RF_LOCK_MUTEX(configureMutex); - if (configureCount) { - RF_UNLOCK_MUTEX(configureMutex); - return (EBUSY); - } - raidframe_booted = 0; - RF_UNLOCK_MUTEX(configureMutex); - rc = rf_mutex_destroy(&configureMutex); - if (rc) { - RF_ERRORMSG3("Unable to destroy mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - RF_PANIC(); - } - return (0); -} -/* - * Called whenever an array is shutdown - */ -static void -rf_UnconfigureArray() -{ - int rc; - - RF_LOCK_MUTEX(configureMutex); - if (--configureCount == 0) { /* if no active configurations, shut - * everything down */ - isconfigged = 0; - - rc = rf_ShutdownList(&globalShutdown); - if (rc) { - RF_ERRORMSG1("RAIDFRAME: unable to do global shutdown, rc=%d\n", rc); - } - - /* - * We must wait until now, because the AllocList module - * uses the DebugMem module. - */ - if (rf_memDebug) - rf_print_unfreed(); - } - RF_UNLOCK_MUTEX(configureMutex); -} - -/* - * Called to shut down an array. - */ -int -rf_Shutdown(raidPtr) - RF_Raid_t *raidPtr; -{ - - if (!raidPtr->valid) { - RF_ERRORMSG("Attempt to shut down unconfigured RAIDframe driver. Aborting shutdown\n"); - return (EINVAL); - } - /* - * wait for outstanding IOs to land - * As described in rf_raid.h, we use the rad_freelist lock - * to protect the per-array info about outstanding descs - * since we need to do freelist locking anyway, and this - * cuts down on the amount of serialization we've got going - * on. - */ - RF_FREELIST_DO_LOCK(rf_rad_freelist); - if (raidPtr->waitShutdown) { - RF_FREELIST_DO_UNLOCK(rf_rad_freelist); - return (EBUSY); - } - raidPtr->waitShutdown = 1; - while (raidPtr->nAccOutstanding) { - RF_WAIT_COND(raidPtr->outstandingCond, RF_FREELIST_MUTEX_OF(rf_rad_freelist)); - } - RF_FREELIST_DO_UNLOCK(rf_rad_freelist); - - /* Wait for any parity re-writes to stop... */ - while (raidPtr->parity_rewrite_in_progress) { - printf("Waiting for parity re-write to exit...\n"); - tsleep(&raidPtr->parity_rewrite_in_progress, PRIBIO, - "rfprwshutdown", 0); - } - - raidPtr->valid = 0; - - rf_update_component_labels(raidPtr, RF_FINAL_COMPONENT_UPDATE); - - rf_UnconfigureVnodes(raidPtr); - - rf_ShutdownList(&raidPtr->shutdownList); - - rf_UnconfigureArray(); - - return (0); -} - - -#define DO_INIT_CONFIGURE(f) { \ - rc = f (&globalShutdown); \ - if (rc) { \ - RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \ - rf_ShutdownList(&globalShutdown); \ - RF_LOCK_MUTEX(configureMutex); \ - configInProgress = 0; \ - configureCount--; \ - RF_UNLOCK_MUTEX(configureMutex); \ - return(rc); \ - } \ -} - -#define DO_RAID_FAIL() { \ - rf_UnconfigureVnodes(raidPtr); \ - rf_ShutdownList(&raidPtr->shutdownList); \ - rf_UnconfigureArray(); \ -} - -#define DO_RAID_INIT_CONFIGURE(f) { \ - rc = f (&raidPtr->shutdownList, raidPtr, cfgPtr); \ - if (rc) { \ - RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \ - DO_RAID_FAIL(); \ - return(rc); \ - } \ -} - -#define DO_RAID_MUTEX(_m_) { \ - rc = rf_create_managed_mutex(&raidPtr->shutdownList, (_m_)); \ - if (rc) { \ - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", \ - __FILE__, __LINE__, rc); \ - DO_RAID_FAIL(); \ - return(rc); \ - } \ -} - -#define DO_RAID_COND(_c_) { \ - rc = rf_create_managed_cond(&raidPtr->shutdownList, (_c_)); \ - if (rc) { \ - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", \ - __FILE__, __LINE__, rc); \ - DO_RAID_FAIL(); \ - return(rc); \ - } \ -} - -int -rf_Configure(raidPtr, cfgPtr, ac) - RF_Raid_t *raidPtr; - RF_Config_t *cfgPtr; - RF_AutoConfig_t *ac; -{ - RF_RowCol_t row, col; - int i, rc; - - /* XXX This check can probably be removed now, since - RAIDFRAME_CONFIGURE now checks to make sure that the - RAID set is not already valid - */ - if (raidPtr->valid) { - RF_ERRORMSG("RAIDframe configuration not shut down. Aborting configure.\n"); - return (EINVAL); - } - RF_LOCK_MUTEX(configureMutex); - if (configInProgress == 1) { - RF_UNLOCK_MUTEX(configureMutex); - return (EBUSY); - } - configureCount++; - if (isconfigged == 0) { - configInProgress = 1; - RF_UNLOCK_MUTEX(configureMutex); - rc = rf_create_managed_mutex(&globalShutdown, &rf_printf_mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownList(&globalShutdown); - return (rc); - } - /* initialize globals */ - printf("RAIDFRAME: protectedSectors is %ld\n", - rf_protectedSectors); - - rf_clear_debug_print_buffer(); - - DO_INIT_CONFIGURE(rf_ConfigureAllocList); - - /* - * Yes, this does make debugging general to the whole - * system instead of being array specific. Bummer, drag. - */ - rf_ConfigureDebug(cfgPtr); - DO_INIT_CONFIGURE(rf_ConfigureDebugMem); - DO_INIT_CONFIGURE(rf_ConfigureAccessTrace); - DO_INIT_CONFIGURE(rf_ConfigureMapModule); - DO_INIT_CONFIGURE(rf_ConfigureReconEvent); - DO_INIT_CONFIGURE(rf_ConfigureCallback); - DO_INIT_CONFIGURE(rf_ConfigureMemChunk); - DO_INIT_CONFIGURE(rf_ConfigureRDFreeList); - DO_INIT_CONFIGURE(rf_ConfigureNWayXor); - DO_INIT_CONFIGURE(rf_ConfigureStripeLockFreeList); - DO_INIT_CONFIGURE(rf_ConfigureMCPair); - DO_INIT_CONFIGURE(rf_ConfigureDAGs); - DO_INIT_CONFIGURE(rf_ConfigureDAGFuncs); - DO_INIT_CONFIGURE(rf_ConfigureDebugPrint); - DO_INIT_CONFIGURE(rf_ConfigureReconstruction); - DO_INIT_CONFIGURE(rf_ConfigureCopyback); - DO_INIT_CONFIGURE(rf_ConfigureDiskQueueSystem); - - RF_LOCK_MUTEX(configureMutex); - isconfigged = 1; - configInProgress = 0; - } - RF_UNLOCK_MUTEX(configureMutex); - - DO_RAID_MUTEX(&raidPtr->mutex); - /* set up the cleanup list. Do this after ConfigureDebug so that - * value of memDebug will be set */ - - rf_MakeAllocList(raidPtr->cleanupList); - if (raidPtr->cleanupList == NULL) { - DO_RAID_FAIL(); - return (ENOMEM); - } - rc = rf_ShutdownCreate(&raidPtr->shutdownList, - (void (*) (void *)) rf_FreeAllocList, - raidPtr->cleanupList); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - DO_RAID_FAIL(); - return (rc); - } - raidPtr->numRow = cfgPtr->numRow; - raidPtr->numCol = cfgPtr->numCol; - raidPtr->numSpare = cfgPtr->numSpare; - - /* XXX we don't even pretend to support more than one row in the - * kernel... */ - if (raidPtr->numRow != 1) { - RF_ERRORMSG("Only one row supported in kernel.\n"); - DO_RAID_FAIL(); - return (EINVAL); - } - RF_CallocAndAdd(raidPtr->status, raidPtr->numRow, sizeof(RF_RowStatus_t), - (RF_RowStatus_t *), raidPtr->cleanupList); - if (raidPtr->status == NULL) { - DO_RAID_FAIL(); - return (ENOMEM); - } - RF_CallocAndAdd(raidPtr->reconControl, raidPtr->numRow, - sizeof(RF_ReconCtrl_t *), (RF_ReconCtrl_t **), raidPtr->cleanupList); - if (raidPtr->reconControl == NULL) { - DO_RAID_FAIL(); - return (ENOMEM); - } - for (i = 0; i < raidPtr->numRow; i++) { - raidPtr->status[i] = rf_rs_optimal; - raidPtr->reconControl[i] = NULL; - } - - DO_RAID_INIT_CONFIGURE(rf_ConfigureEngine); - DO_RAID_INIT_CONFIGURE(rf_ConfigureStripeLocks); - - DO_RAID_COND(&raidPtr->outstandingCond); - - raidPtr->nAccOutstanding = 0; - raidPtr->waitShutdown = 0; - - DO_RAID_MUTEX(&raidPtr->access_suspend_mutex); - DO_RAID_COND(&raidPtr->quiescent_cond); - - DO_RAID_COND(&raidPtr->waitForReconCond); - - DO_RAID_MUTEX(&raidPtr->recon_done_proc_mutex); - - if (ac!=NULL) { - /* We have an AutoConfig structure.. Don't do the - normal disk configuration... call the auto config - stuff */ - rf_AutoConfigureDisks(raidPtr, cfgPtr, ac); - } else { - DO_RAID_INIT_CONFIGURE(rf_ConfigureDisks); - DO_RAID_INIT_CONFIGURE(rf_ConfigureSpareDisks); - } - /* do this after ConfigureDisks & ConfigureSpareDisks to be sure dev - * no. is set */ - DO_RAID_INIT_CONFIGURE(rf_ConfigureDiskQueues); - - DO_RAID_INIT_CONFIGURE(rf_ConfigureLayout); - - DO_RAID_INIT_CONFIGURE(rf_ConfigurePSStatus); - - for (row = 0; row < raidPtr->numRow; row++) { - for (col = 0; col < raidPtr->numCol; col++) { - /* - * XXX better distribution - */ - raidPtr->hist_diskreq[row][col] = 0; - } - } - - raidPtr->numNewFailures = 0; - raidPtr->copyback_in_progress = 0; - raidPtr->parity_rewrite_in_progress = 0; - raidPtr->recon_in_progress = 0; - raidPtr->maxOutstanding = cfgPtr->maxOutstandingDiskReqs; - - /* autoconfigure and root_partition will actually get filled in - after the config is done */ - raidPtr->autoconfigure = 0; - raidPtr->root_partition = 0; - raidPtr->last_unit = raidPtr->raidid; - raidPtr->config_order = 0; - - if (rf_keepAccTotals) { - raidPtr->keep_acc_totals = 1; - } - rf_StartUserStats(raidPtr); - - raidPtr->valid = 1; - return (0); -} - -static int -init_rad(desc) - RF_RaidAccessDesc_t *desc; -{ - int rc; - - rc = rf_mutex_init(&desc->mutex, __FUNCTION__); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return (rc); - } - rc = rf_cond_init(&desc->cond); - if (rc) { - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_mutex_destroy(&desc->mutex); - return (rc); - } - return (0); -} - -static void -clean_rad(desc) - RF_RaidAccessDesc_t *desc; -{ - rf_mutex_destroy(&desc->mutex); - rf_cond_destroy(&desc->cond); -} - -static void -rf_ShutdownRDFreeList(ignored) - void *ignored; -{ - RF_FREELIST_DESTROY_CLEAN(rf_rad_freelist, next, (RF_RaidAccessDesc_t *), clean_rad); -} - -static int -rf_ConfigureRDFreeList(listp) - RF_ShutdownList_t **listp; -{ - int rc; - - RF_FREELIST_CREATE(rf_rad_freelist, RF_MAX_FREE_RAD, - RF_RAD_INC, sizeof(RF_RaidAccessDesc_t)); - if (rf_rad_freelist == NULL) { - return (ENOMEM); - } - rc = rf_ShutdownCreate(listp, rf_ShutdownRDFreeList, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownRDFreeList(NULL); - return (rc); - } - RF_FREELIST_PRIME_INIT(rf_rad_freelist, RF_RAD_INITIAL, next, - (RF_RaidAccessDesc_t *), init_rad); - return (0); -} - -RF_RaidAccessDesc_t * -rf_AllocRaidAccDesc( - RF_Raid_t * raidPtr, - RF_IoType_t type, - RF_RaidAddr_t raidAddress, - RF_SectorCount_t numBlocks, - caddr_t bufPtr, - void *bp, - RF_DagHeader_t ** paramDAG, - RF_AccessStripeMapHeader_t ** paramASM, - RF_RaidAccessFlags_t flags, - void (*cbF) (RF_Buf_t), - void *cbA, - RF_AccessState_t * states) -{ - RF_RaidAccessDesc_t *desc; - - RF_FREELIST_GET_INIT_NOUNLOCK(rf_rad_freelist, desc, next, (RF_RaidAccessDesc_t *), init_rad); - if (raidPtr->waitShutdown) { - /* - * Actually, we're shutting the array down. Free the desc - * and return NULL. - */ - RF_FREELIST_DO_UNLOCK(rf_rad_freelist); - RF_FREELIST_FREE_CLEAN(rf_rad_freelist, desc, next, clean_rad); - return (NULL); - } - raidPtr->nAccOutstanding++; - RF_FREELIST_DO_UNLOCK(rf_rad_freelist); - - desc->raidPtr = (void *) raidPtr; - desc->type = type; - desc->raidAddress = raidAddress; - desc->numBlocks = numBlocks; - desc->bufPtr = bufPtr; - desc->bp = bp; - desc->paramDAG = paramDAG; - desc->paramASM = paramASM; - desc->flags = flags; - desc->states = states; - desc->state = 0; - - desc->status = 0; - bzero((char *) &desc->tracerec, sizeof(RF_AccTraceEntry_t)); - desc->callbackFunc = (void (*) (RF_CBParam_t)) cbF; /* XXX */ - desc->callbackArg = cbA; - desc->next = NULL; - desc->head = desc; - desc->numPending = 0; - desc->cleanupList = NULL; - rf_MakeAllocList(desc->cleanupList); - return (desc); -} - -void -rf_FreeRaidAccDesc(RF_RaidAccessDesc_t * desc) -{ - RF_Raid_t *raidPtr = desc->raidPtr; - - RF_ASSERT(desc); - - rf_FreeAllocList(desc->cleanupList); - RF_FREELIST_FREE_CLEAN_NOUNLOCK(rf_rad_freelist, desc, next, clean_rad); - raidPtr->nAccOutstanding--; - if (raidPtr->waitShutdown) { - RF_SIGNAL_COND(raidPtr->outstandingCond); - } - RF_FREELIST_DO_UNLOCK(rf_rad_freelist); -} -/********************************************************************* - * Main routine for performing an access. - * Accesses are retried until a DAG can not be selected. This occurs - * when either the DAG library is incomplete or there are too many - * failures in a parity group. - ********************************************************************/ -int -rf_DoAccess( - RF_Raid_t * raidPtr, - RF_IoType_t type, - int async_flag, - RF_RaidAddr_t raidAddress, - RF_SectorCount_t numBlocks, - caddr_t bufPtr, - void *bp_in, - RF_DagHeader_t ** paramDAG, - RF_AccessStripeMapHeader_t ** paramASM, - RF_RaidAccessFlags_t flags, - RF_RaidAccessDesc_t ** paramDesc, - void (*cbF) (RF_Buf_t), - void *cbA) -/* -type should be read or write -async_flag should be RF_TRUE or RF_FALSE -bp_in is a buf pointer. void * to facilitate ignoring it outside the kernel -*/ -{ - RF_RaidAccessDesc_t *desc; - caddr_t lbufPtr = bufPtr; - RF_Buf_t bp = (RF_Buf_t) bp_in; - - raidAddress += rf_raidSectorOffset; - - if (!raidPtr->valid) { - RF_ERRORMSG("RAIDframe driver not successfully configured. Rejecting access.\n"); - IO_BUF_ERR(bp, EINVAL); - return (EINVAL); - } - - if (rf_accessDebug) { - - printf("logBytes is: %d %d %d\n", raidPtr->raidid, - raidPtr->logBytesPerSector, - (int) rf_RaidAddressToByte(raidPtr, numBlocks)); - printf("raid%d: %s raidAddr %d (stripeid %d-%d) numBlocks %d (%d bytes) buf 0x%lx\n", raidPtr->raidid, - (type == RF_IO_TYPE_READ) ? "READ" : "WRITE", (int) raidAddress, - (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress), - (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress + numBlocks - 1), - (int) numBlocks, - (int) rf_RaidAddressToByte(raidPtr, numBlocks), - (long) bufPtr); - } - if (raidAddress + numBlocks > raidPtr->totalSectors) { - - printf("DoAccess: raid addr %lu too large to access %lu sectors. Max legal addr is %lu\n", - (u_long) raidAddress, (u_long) numBlocks, (u_long) raidPtr->totalSectors); - - IO_BUF_ERR(bp, ENOSPC); - return (ENOSPC); - } - desc = rf_AllocRaidAccDesc(raidPtr, type, raidAddress, - numBlocks, lbufPtr, bp, paramDAG, paramASM, - flags, cbF, cbA, raidPtr->Layout.map->states); - - if (desc == NULL) { - return (ENOMEM); - } - RF_ETIMER_START(desc->tracerec.tot_timer); - - desc->async_flag = async_flag; - - rf_ContinueRaidAccess(desc); - - return (0); -} -/* force the array into reconfigured mode without doing reconstruction */ -int -rf_SetReconfiguredMode(raidPtr, row, col) - RF_Raid_t *raidPtr; - int row; - int col; -{ - if (!(raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - printf("Can't set reconfigured mode in dedicated-spare array\n"); - RF_PANIC(); - } - RF_LOCK_MUTEX(raidPtr->mutex); - raidPtr->numFailures++; - raidPtr->Disks[row][col].status = rf_ds_dist_spared; - raidPtr->status[row] = rf_rs_reconfigured; - rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE); - /* install spare table only if declustering + distributed sparing - * architecture. */ - if (raidPtr->Layout.map->flags & RF_BD_DECLUSTERED) - rf_InstallSpareTable(raidPtr, row, col); - RF_UNLOCK_MUTEX(raidPtr->mutex); - return (0); -} - -extern int fail_row, fail_col, fail_time; -extern int delayed_recon; - -int -rf_FailDisk( - RF_Raid_t * raidPtr, - int frow, - int fcol, - int initRecon) -{ - printf("raid%d: Failing disk r%d c%d\n", raidPtr->raidid, frow, fcol); - RF_LOCK_MUTEX(raidPtr->mutex); - raidPtr->numFailures++; - raidPtr->Disks[frow][fcol].status = rf_ds_failed; - raidPtr->status[frow] = rf_rs_degraded; - rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE); - RF_UNLOCK_MUTEX(raidPtr->mutex); - if (initRecon) - rf_ReconstructFailedDisk(raidPtr, frow, fcol); - return (0); -} -/* releases a thread that is waiting for the array to become quiesced. - * access_suspend_mutex should be locked upon calling this - */ -void -rf_SignalQuiescenceLock(raidPtr, reconDesc) - RF_Raid_t *raidPtr; - RF_RaidReconDesc_t *reconDesc; -{ - if (rf_quiesceDebug) { - printf("raid%d: Signalling quiescence lock\n", - raidPtr->raidid); - } - raidPtr->access_suspend_release = 1; - - if (raidPtr->waiting_for_quiescence) { - SIGNAL_QUIESCENT_COND(raidPtr); - } -} -/* suspends all new requests to the array. No effect on accesses that are in flight. */ -int -rf_SuspendNewRequestsAndWait(raidPtr) - RF_Raid_t *raidPtr; -{ - if (rf_quiesceDebug) - printf("Suspending new reqs\n"); - - RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); - raidPtr->accesses_suspended++; - raidPtr->waiting_for_quiescence = (raidPtr->accs_in_flight == 0) ? 0 : 1; - - if (raidPtr->waiting_for_quiescence) { - raidPtr->access_suspend_release = 0; - while (!raidPtr->access_suspend_release) { - printf("Suspending: Waiting for Quiescence\n"); - WAIT_FOR_QUIESCENCE(raidPtr); - raidPtr->waiting_for_quiescence = 0; - } - } - printf("Quiescence reached..\n"); - - RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); - return (raidPtr->waiting_for_quiescence); -} -/* wake up everyone waiting for quiescence to be released */ -void -rf_ResumeNewRequests(raidPtr) - RF_Raid_t *raidPtr; -{ - RF_CallbackDesc_t *t, *cb; - - if (rf_quiesceDebug) - printf("Resuming new reqs\n"); - - RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); - raidPtr->accesses_suspended--; - if (raidPtr->accesses_suspended == 0) - cb = raidPtr->quiesce_wait_list; - else - cb = NULL; - raidPtr->quiesce_wait_list = NULL; - RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); - - while (cb) { - t = cb; - cb = cb->next; - (t->callbackFunc) (t->callbackArg); - rf_FreeCallbackDesc(t); - } -} -/***************************************************************************************** - * - * debug routines - * - ****************************************************************************************/ - -static void -set_debug_option(name, val) - char *name; - long val; -{ - RF_DebugName_t *p; - - for (p = rf_debugNames; p->name; p++) { - if (!strcmp(p->name, name)) { - *(p->ptr) = val; - printf("[Set debug variable %s to %ld]\n", name, val); - return; - } - } - RF_ERRORMSG1("Unknown debug string \"%s\"\n", name); -} - - -/* would like to use sscanf here, but apparently not available in kernel */ -/*ARGSUSED*/ -static void -rf_ConfigureDebug(cfgPtr) - RF_Config_t *cfgPtr; -{ - char *val_p, *name_p, *white_p; - long val; - int i; - - rf_ResetDebugOptions(); - for (i = 0; cfgPtr->debugVars[i][0] && i < RF_MAXDBGV; i++) { - name_p = rf_find_non_white(&cfgPtr->debugVars[i][0]); - white_p = rf_find_white(name_p); /* skip to start of 2nd - * word */ - val_p = rf_find_non_white(white_p); - if (*val_p == '0' && *(val_p + 1) == 'x') - val = rf_htoi(val_p + 2); - else - val = rf_atoi(val_p); - *white_p = '\0'; - set_debug_option(name_p, val); - } -} -/* performance monitoring stuff */ - -#define TIMEVAL_TO_US(t) (((long) t.tv_sec) * 1000000L + (long) t.tv_usec) - -#if !defined(_KERNEL) && !defined(SIMULATE) - -/* - * Throughput stats currently only used in user-level RAIDframe - */ - -static int -rf_InitThroughputStats( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - int rc; - - /* these used by user-level raidframe only */ - rc = rf_create_managed_mutex(listp, &raidPtr->throughputstats.mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return (rc); - } - raidPtr->throughputstats.sum_io_us = 0; - raidPtr->throughputstats.num_ios = 0; - raidPtr->throughputstats.num_out_ios = 0; - return (0); -} - -void -rf_StartThroughputStats(RF_Raid_t * raidPtr) -{ - RF_LOCK_MUTEX(raidPtr->throughputstats.mutex); - raidPtr->throughputstats.num_ios++; - raidPtr->throughputstats.num_out_ios++; - if (raidPtr->throughputstats.num_out_ios == 1) - RF_GETTIME(raidPtr->throughputstats.start); - RF_UNLOCK_MUTEX(raidPtr->throughputstats.mutex); -} - -static void -rf_StopThroughputStats(RF_Raid_t * raidPtr) -{ - struct timeval diff; - - RF_LOCK_MUTEX(raidPtr->throughputstats.mutex); - raidPtr->throughputstats.num_out_ios--; - if (raidPtr->throughputstats.num_out_ios == 0) { - RF_GETTIME(raidPtr->throughputstats.stop); - RF_TIMEVAL_DIFF(&raidPtr->throughputstats.start, &raidPtr->throughputstats.stop, &diff); - raidPtr->throughputstats.sum_io_us += TIMEVAL_TO_US(diff); - } - RF_UNLOCK_MUTEX(raidPtr->throughputstats.mutex); -} - -static void -rf_PrintThroughputStats(RF_Raid_t * raidPtr) -{ - RF_ASSERT(raidPtr->throughputstats.num_out_ios == 0); - if (raidPtr->throughputstats.sum_io_us != 0) { - printf("[Througphut: %8.2f IOs/second]\n", raidPtr->throughputstats.num_ios - / (raidPtr->throughputstats.sum_io_us / 1000000.0)); - } -} -#endif /* !KERNEL && !SIMULATE */ - -void -rf_StartUserStats(RF_Raid_t * raidPtr) -{ - RF_GETTIME(raidPtr->userstats.start); - raidPtr->userstats.sum_io_us = 0; - raidPtr->userstats.num_ios = 0; - raidPtr->userstats.num_sect_moved = 0; -} - -void -rf_StopUserStats(RF_Raid_t * raidPtr) -{ - RF_GETTIME(raidPtr->userstats.stop); -} - -void -rf_UpdateUserStats(raidPtr, rt, numsect) - RF_Raid_t *raidPtr; - int rt; /* resp time in us */ - int numsect; /* number of sectors for this access */ -{ - raidPtr->userstats.sum_io_us += rt; - raidPtr->userstats.num_ios++; - raidPtr->userstats.num_sect_moved += numsect; -} - -void -rf_PrintUserStats(RF_Raid_t * raidPtr) -{ - long elapsed_us, mbs, mbs_frac; - struct timeval diff; - - RF_TIMEVAL_DIFF(&raidPtr->userstats.start, &raidPtr->userstats.stop, &diff); - elapsed_us = TIMEVAL_TO_US(diff); - - /* 2000 sectors per megabyte, 10000000 microseconds per second */ - if (elapsed_us) - mbs = (raidPtr->userstats.num_sect_moved / 2000) / (elapsed_us / 1000000); - else - mbs = 0; - - /* this computes only the first digit of the fractional mb/s moved */ - if (elapsed_us) { - mbs_frac = ((raidPtr->userstats.num_sect_moved / 200) / (elapsed_us / 1000000)) - - (mbs * 10); - } else { - mbs_frac = 0; - } - - printf("Number of I/Os: %ld\n", raidPtr->userstats.num_ios); - printf("Elapsed time (us): %ld\n", elapsed_us); - printf("User I/Os per second: %ld\n", RF_DB0_CHECK(raidPtr->userstats.num_ios, (elapsed_us / 1000000))); - printf("Average user response time: %ld us\n", RF_DB0_CHECK(raidPtr->userstats.sum_io_us, raidPtr->userstats.num_ios)); - printf("Total sectors moved: %ld\n", raidPtr->userstats.num_sect_moved); - printf("Average access size (sect): %ld\n", RF_DB0_CHECK(raidPtr->userstats.num_sect_moved, raidPtr->userstats.num_ios)); - printf("Achieved data rate: %ld.%ld MB/sec\n", mbs, mbs_frac); -} - - -void -rf_print_panic_message(line,file) - int line; - char *file; -{ - sprintf(rf_panicbuf,"raidframe error at line %d file %s", - line, file); -} - -void -rf_print_assert_panic_message(line,file,condition) - int line; - char *file; - char *condition; -{ - sprintf(rf_panicbuf, - "raidframe error at line %d file %s (failed asserting %s)\n", - line, file, condition); -} diff --git a/sys/dev/raidframe/rf_driver.h b/sys/dev/raidframe/rf_driver.h deleted file mode 100644 index 8b156c5..0000000 --- a/sys/dev/raidframe/rf_driver.h +++ /dev/null @@ -1,79 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_driver.h,v 1.4 2000/02/13 04:53:57 oster Exp $ */ -/* - * rf_driver.h - */ -/* - * Copyright (c) 1996 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_DRIVER_H_ -#define _RF__RF_DRIVER_H_ - -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_bsd.h> - -#if _KERNEL -RF_DECLARE_EXTERN_MUTEX(rf_printf_mutex) -int rf_BootRaidframe(void); -int rf_UnbootRaidframe(void); -int rf_Shutdown(RF_Raid_t * raidPtr); -int rf_Configure(RF_Raid_t * raidPtr, RF_Config_t * cfgPtr, - RF_AutoConfig_t *ac); -RF_RaidAccessDesc_t *rf_AllocRaidAccDesc(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_RaidAddr_t raidAddress, - RF_SectorCount_t numBlocks, - caddr_t bufPtr, - void *bp, RF_DagHeader_t ** paramDAG, - RF_AccessStripeMapHeader_t ** paramASM, - RF_RaidAccessFlags_t flags, - void (*cbF) (RF_Buf_t), - void *cbA, - RF_AccessState_t * states); -void rf_FreeRaidAccDesc(RF_RaidAccessDesc_t * desc); -int rf_DoAccess(RF_Raid_t * raidPtr, RF_IoType_t type, int async_flag, - RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks, - caddr_t bufPtr, void *bp_in, RF_DagHeader_t ** paramDAG, - RF_AccessStripeMapHeader_t ** paramASM, - RF_RaidAccessFlags_t flags, - RF_RaidAccessDesc_t ** paramDesc, - void (*cbF) (RF_Buf_t), void *cbA); -int rf_SetReconfiguredMode(RF_Raid_t * raidPtr, RF_RowCol_t row, - RF_RowCol_t col); -int rf_FailDisk(RF_Raid_t * raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol, - int initRecon); -void rf_SignalQuiescenceLock(RF_Raid_t * raidPtr, - RF_RaidReconDesc_t * reconDesc); -int rf_SuspendNewRequestsAndWait(RF_Raid_t * raidPtr); -void rf_ResumeNewRequests(RF_Raid_t * raidPtr); -void rf_StartThroughputStats(RF_Raid_t * raidPtr); -void rf_StartUserStats(RF_Raid_t * raidPtr); -void rf_StopUserStats(RF_Raid_t * raidPtr); -void rf_UpdateUserStats(RF_Raid_t * raidPtr, int rt, int numsect); -void rf_PrintUserStats(RF_Raid_t * raidPtr); -#endif /* _KERNEL */ -#endif /* !_RF__RF_DRIVER_H_ */ diff --git a/sys/dev/raidframe/rf_engine.c b/sys/dev/raidframe/rf_engine.c deleted file mode 100644 index d49ec20..0000000 --- a/sys/dev/raidframe/rf_engine.c +++ /dev/null @@ -1,812 +0,0 @@ -/* $NetBSD: rf_engine.c,v 1.10 2000/08/20 16:51:03 thorpej Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: William V. Courtright II, Mark Holland, Rachad Youssef - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/**************************************************************************** - * * - * engine.c -- code for DAG execution engine * - * * - * Modified to work as follows (holland): * - * A user-thread calls into DispatchDAG, which fires off the nodes that * - * are direct successors to the header node. DispatchDAG then returns, * - * and the rest of the I/O continues asynchronously. As each node * - * completes, the node execution function calls FinishNode(). FinishNode * - * scans the list of successors to the node and increments the antecedent * - * counts. Each node that becomes enabled is placed on a central node * - * queue. A dedicated dag-execution thread grabs nodes off of this * - * queue and fires them. * - * * - * NULL nodes are never fired. * - * * - * Terminator nodes are never fired, but rather cause the callback * - * associated with the DAG to be invoked. * - * * - * If a node fails, the dag either rolls forward to the completion or * - * rolls back, undoing previously-completed nodes and fails atomically. * - * The direction of recovery is determined by the location of the failed * - * node in the graph. If the failure occured before the commit node in * - * the graph, backward recovery is used. Otherwise, forward recovery is * - * used. * - * * - ****************************************************************************/ - -#include <dev/raidframe/rf_threadstuff.h> - -#include <sys/errno.h> - -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_engine.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_shutdown.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_kintf.h> - -static void DAGExecutionThread(RF_ThreadArg_t arg); - -#define DO_INIT(_l_,_r_) { \ - int _rc; \ - _rc = rf_create_managed_mutex(_l_,&(_r_)->node_queue_mutex); \ - if (_rc) { \ - return(_rc); \ - } \ - _rc = rf_create_managed_cond(_l_,&(_r_)->node_queue_cond); \ - if (_rc) { \ - return(_rc); \ - } \ -} - -/* synchronization primitives for this file. DO_WAIT should be enclosed in a while loop. */ - -/* - * XXX Is this spl-ing really necessary? - */ -#define DO_LOCK(_r_) \ -do { \ - ks = splbio(); \ - RF_LOCK_MUTEX((_r_)->node_queue_mutex); \ -} while (0) - -#define DO_UNLOCK(_r_) \ -do { \ - RF_UNLOCK_MUTEX((_r_)->node_queue_mutex); \ - splx(ks); \ -} while (0) - -#define DO_WAIT(_r_) \ - RF_WAIT_COND((_r_)->node_queue, (_r_)->node_queue_mutex) - -#define DO_SIGNAL(_r_) \ - RF_BROADCAST_COND((_r_)->node_queue) /* XXX RF_SIGNAL_COND? */ - -static void rf_ShutdownEngine(void *); - -static void -rf_ShutdownEngine(arg) - void *arg; -{ - RF_Raid_t *raidPtr; - - raidPtr = (RF_Raid_t *) arg; - raidPtr->shutdown_engine = 1; - DO_SIGNAL(raidPtr); -} - -int -rf_ConfigureEngine( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - int rc; - - DO_INIT(listp, raidPtr); - - raidPtr->node_queue = NULL; - raidPtr->dags_in_flight = 0; - - rc = rf_init_managed_threadgroup(listp, &raidPtr->engine_tg); - if (rc) - return (rc); - - /* we create the execution thread only once per system boot. no need - * to check return code b/c the kernel panics if it can't create the - * thread. */ - if (rf_engineDebug) { - printf("raid%d: Creating engine thread\n", raidPtr->raidid); - } - if (RF_CREATE_THREAD(raidPtr->engine_thread, DAGExecutionThread, raidPtr,"raid")) { - RF_ERRORMSG("RAIDFRAME: Unable to create engine thread\n"); - return (ENOMEM); - } - if (rf_engineDebug) { - printf("raid%d: Created engine thread\n", raidPtr->raidid); - } - RF_THREADGROUP_STARTED(&raidPtr->engine_tg); - /* XXX something is missing here... */ -#ifdef debug - printf("Skipping the WAIT_START!!\n"); -#endif -#if 1 - printf("Waiting for DAG engine to start\n"); - RF_THREADGROUP_WAIT_START(&raidPtr->engine_tg); -#endif - /* engine thread is now running and waiting for work */ - if (rf_engineDebug) { - printf("raid%d: Engine thread running and waiting for events\n", raidPtr->raidid); - } - rc = rf_ShutdownCreate(listp, rf_ShutdownEngine, raidPtr); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownEngine(NULL); - } - return (rc); -} - -static int -BranchDone(RF_DagNode_t * node) -{ - int i; - - /* return true if forward execution is completed for a node and it's - * succedents */ - switch (node->status) { - case rf_wait: - /* should never be called in this state */ - RF_PANIC(); - break; - case rf_fired: - /* node is currently executing, so we're not done */ - return (RF_FALSE); - case rf_good: - for (i = 0; i < node->numSuccedents; i++) /* for each succedent */ - if (!BranchDone(node->succedents[i])) /* recursively check - * branch */ - return RF_FALSE; - return RF_TRUE; /* node and all succedent branches aren't in - * fired state */ - break; - case rf_bad: - /* succedents can't fire */ - return (RF_TRUE); - case rf_recover: - /* should never be called in this state */ - RF_PANIC(); - break; - case rf_undone: - case rf_panic: - /* XXX need to fix this case */ - /* for now, assume that we're done */ - return (RF_TRUE); - break; - default: - /* illegal node status */ - RF_PANIC(); - break; - } -} - -static int -NodeReady(RF_DagNode_t * node) -{ - int ready; - - switch (node->dagHdr->status) { - case rf_enable: - case rf_rollForward: - if ((node->status == rf_wait) && (node->numAntecedents == node->numAntDone)) - ready = RF_TRUE; - else - ready = RF_FALSE; - break; - case rf_rollBackward: - RF_ASSERT(node->numSuccDone <= node->numSuccedents); - RF_ASSERT(node->numSuccFired <= node->numSuccedents); - RF_ASSERT(node->numSuccFired <= node->numSuccDone); - if ((node->status == rf_good) && (node->numSuccDone == node->numSuccedents)) - ready = RF_TRUE; - else - ready = RF_FALSE; - break; - default: - printf("Execution engine found illegal DAG status in NodeReady\n"); - RF_PANIC(); - break; - } - - return (ready); -} - - - -/* user context and dag-exec-thread context: - * Fire a node. The node's status field determines which function, do or undo, - * to be fired. - * This routine assumes that the node's status field has alread been set to - * "fired" or "recover" to indicate the direction of execution. - */ -static void -FireNode(RF_DagNode_t * node) -{ - switch (node->status) { - case rf_fired: - /* fire the do function of a node */ - if (rf_engineDebug) { - printf("raid%d: Firing node 0x%lx (%s)\n", - node->dagHdr->raidPtr->raidid, - (unsigned long) node, node->name); - } - if (node->flags & RF_DAGNODE_FLAG_YIELD) { -#if defined(__NetBSD__) || defined(__FreeBSD__) && defined(_KERNEL) - /* thread_block(); */ - /* printf("Need to block the thread here...\n"); */ - /* XXX thread_block is actually mentioned in - * /usr/include/vm/vm_extern.h */ -#else - thread_block(); -#endif - } - (*(node->doFunc)) (node); - break; - case rf_recover: - /* fire the undo function of a node */ - if (rf_engineDebug) { - printf("raid%d: Firing (undo) node 0x%lx (%s)\n", - node->dagHdr->raidPtr->raidid, - (unsigned long) node, node->name); - } - if (node->flags & RF_DAGNODE_FLAG_YIELD) -#if defined(__NetBSD__) || defined(__FreeBSD__) && defined(_KERNEL) - /* thread_block(); */ - /* printf("Need to block the thread here...\n"); */ - /* XXX thread_block is actually mentioned in - * /usr/include/vm/vm_extern.h */ -#else - thread_block(); -#endif - (*(node->undoFunc)) (node); - break; - default: - RF_PANIC(); - break; - } -} - - - -/* user context: - * Attempt to fire each node in a linear array. - * The entire list is fired atomically. - */ -static void -FireNodeArray( - int numNodes, - RF_DagNode_t ** nodeList) -{ - RF_DagStatus_t dstat; - RF_DagNode_t *node; - int i, j; - - /* first, mark all nodes which are ready to be fired */ - for (i = 0; i < numNodes; i++) { - node = nodeList[i]; - dstat = node->dagHdr->status; - RF_ASSERT((node->status == rf_wait) || (node->status == rf_good)); - if (NodeReady(node)) { - if ((dstat == rf_enable) || (dstat == rf_rollForward)) { - RF_ASSERT(node->status == rf_wait); - if (node->commitNode) - node->dagHdr->numCommits++; - node->status = rf_fired; - for (j = 0; j < node->numAntecedents; j++) - node->antecedents[j]->numSuccFired++; - } else { - RF_ASSERT(dstat == rf_rollBackward); - RF_ASSERT(node->status == rf_good); - RF_ASSERT(node->commitNode == RF_FALSE); /* only one commit node - * per graph */ - node->status = rf_recover; - } - } - } - /* now, fire the nodes */ - for (i = 0; i < numNodes; i++) { - if ((nodeList[i]->status == rf_fired) || (nodeList[i]->status == rf_recover)) - FireNode(nodeList[i]); - } -} - - -/* user context: - * Attempt to fire each node in a linked list. - * The entire list is fired atomically. - */ -static void -FireNodeList(RF_DagNode_t * nodeList) -{ - RF_DagNode_t *node, *next; - RF_DagStatus_t dstat; - int j; - - if (nodeList) { - /* first, mark all nodes which are ready to be fired */ - for (node = nodeList; node; node = next) { - next = node->next; - dstat = node->dagHdr->status; - RF_ASSERT((node->status == rf_wait) || (node->status == rf_good)); - if (NodeReady(node)) { - if ((dstat == rf_enable) || (dstat == rf_rollForward)) { - RF_ASSERT(node->status == rf_wait); - if (node->commitNode) - node->dagHdr->numCommits++; - node->status = rf_fired; - for (j = 0; j < node->numAntecedents; j++) - node->antecedents[j]->numSuccFired++; - } else { - RF_ASSERT(dstat == rf_rollBackward); - RF_ASSERT(node->status == rf_good); - RF_ASSERT(node->commitNode == RF_FALSE); /* only one commit node - * per graph */ - node->status = rf_recover; - } - } - } - /* now, fire the nodes */ - for (node = nodeList; node; node = next) { - next = node->next; - if ((node->status == rf_fired) || (node->status == rf_recover)) - FireNode(node); - } - } -} -/* interrupt context: - * for each succedent - * propagate required results from node to succedent - * increment succedent's numAntDone - * place newly-enable nodes on node queue for firing - * - * To save context switches, we don't place NIL nodes on the node queue, - * but rather just process them as if they had fired. Note that NIL nodes - * that are the direct successors of the header will actually get fired by - * DispatchDAG, which is fine because no context switches are involved. - * - * Important: when running at user level, this can be called by any - * disk thread, and so the increment and check of the antecedent count - * must be locked. I used the node queue mutex and locked down the - * entire function, but this is certainly overkill. - */ -static void -PropagateResults( - RF_DagNode_t * node, - int context) -{ - RF_DagNode_t *s, *a; - RF_Raid_t *raidPtr; - int i, ks; - RF_DagNode_t *finishlist = NULL; /* a list of NIL nodes to be - * finished */ - RF_DagNode_t *skiplist = NULL; /* list of nodes with failed truedata - * antecedents */ - RF_DagNode_t *firelist = NULL; /* a list of nodes to be fired */ - RF_DagNode_t *q = NULL, *qh = NULL, *next; - int j, skipNode; - - raidPtr = node->dagHdr->raidPtr; - - DO_LOCK(raidPtr); - - /* debug - validate fire counts */ - for (i = 0; i < node->numAntecedents; i++) { - a = *(node->antecedents + i); - RF_ASSERT(a->numSuccFired >= a->numSuccDone); - RF_ASSERT(a->numSuccFired <= a->numSuccedents); - a->numSuccDone++; - } - - switch (node->dagHdr->status) { - case rf_enable: - case rf_rollForward: - for (i = 0; i < node->numSuccedents; i++) { - s = *(node->succedents + i); - RF_ASSERT(s->status == rf_wait); - (s->numAntDone)++; - if (s->numAntDone == s->numAntecedents) { - /* look for NIL nodes */ - if (s->doFunc == rf_NullNodeFunc) { - /* don't fire NIL nodes, just process - * them */ - s->next = finishlist; - finishlist = s; - } else { - /* look to see if the node is to be - * skipped */ - skipNode = RF_FALSE; - for (j = 0; j < s->numAntecedents; j++) - if ((s->antType[j] == rf_trueData) && (s->antecedents[j]->status == rf_bad)) - skipNode = RF_TRUE; - if (skipNode) { - /* this node has one or more - * failed true data - * dependencies, so skip it */ - s->next = skiplist; - skiplist = s; - } else - /* add s to list of nodes (q) - * to execute */ - if (context != RF_INTR_CONTEXT) { - /* we only have to - * enqueue if we're at - * intr context */ - s->next = firelist; /* put node on a list to - * be fired after we - * unlock */ - firelist = s; - } else { /* enqueue the node for - * the dag exec thread - * to fire */ - RF_ASSERT(NodeReady(s)); - if (q) { - q->next = s; - q = s; - } else { - qh = q = s; - qh->next = NULL; - } - } - } - } - } - - if (q) { - /* xfer our local list of nodes to the node queue */ - q->next = raidPtr->node_queue; - raidPtr->node_queue = qh; - DO_SIGNAL(raidPtr); - } - DO_UNLOCK(raidPtr); - - for (; skiplist; skiplist = next) { - next = skiplist->next; - skiplist->status = rf_skipped; - for (i = 0; i < skiplist->numAntecedents; i++) { - skiplist->antecedents[i]->numSuccFired++; - } - if (skiplist->commitNode) { - skiplist->dagHdr->numCommits++; - } - rf_FinishNode(skiplist, context); - } - for (; finishlist; finishlist = next) { - /* NIL nodes: no need to fire them */ - next = finishlist->next; - finishlist->status = rf_good; - for (i = 0; i < finishlist->numAntecedents; i++) { - finishlist->antecedents[i]->numSuccFired++; - } - if (finishlist->commitNode) - finishlist->dagHdr->numCommits++; - /* - * Okay, here we're calling rf_FinishNode() on nodes that - * have the null function as their work proc. Such a node - * could be the terminal node in a DAG. If so, it will - * cause the DAG to complete, which will in turn free - * memory used by the DAG, which includes the node in - * question. Thus, we must avoid referencing the node - * at all after calling rf_FinishNode() on it. - */ - rf_FinishNode(finishlist, context); /* recursive call */ - } - /* fire all nodes in firelist */ - FireNodeList(firelist); - break; - - case rf_rollBackward: - for (i = 0; i < node->numAntecedents; i++) { - a = *(node->antecedents + i); - RF_ASSERT(a->status == rf_good); - RF_ASSERT(a->numSuccDone <= a->numSuccedents); - RF_ASSERT(a->numSuccDone <= a->numSuccFired); - - if (a->numSuccDone == a->numSuccFired) { - if (a->undoFunc == rf_NullNodeFunc) { - /* don't fire NIL nodes, just process - * them */ - a->next = finishlist; - finishlist = a; - } else { - if (context != RF_INTR_CONTEXT) { - /* we only have to enqueue if - * we're at intr context */ - a->next = firelist; /* put node on a list to - * be fired after we - * unlock */ - firelist = a; - } else { /* enqueue the node for - * the dag exec thread - * to fire */ - RF_ASSERT(NodeReady(a)); - if (q) { - q->next = a; - q = a; - } else { - qh = q = a; - qh->next = NULL; - } - } - } - } - } - if (q) { - /* xfer our local list of nodes to the node queue */ - q->next = raidPtr->node_queue; - raidPtr->node_queue = qh; - DO_SIGNAL(raidPtr); - } - DO_UNLOCK(raidPtr); - for (; finishlist; finishlist = next) { /* NIL nodes: no need to - * fire them */ - next = finishlist->next; - finishlist->status = rf_good; - /* - * Okay, here we're calling rf_FinishNode() on nodes that - * have the null function as their work proc. Such a node - * could be the first node in a DAG. If so, it will - * cause the DAG to complete, which will in turn free - * memory used by the DAG, which includes the node in - * question. Thus, we must avoid referencing the node - * at all after calling rf_FinishNode() on it. - */ - rf_FinishNode(finishlist, context); /* recursive call */ - } - /* fire all nodes in firelist */ - FireNodeList(firelist); - - break; - default: - printf("Engine found illegal DAG status in PropagateResults()\n"); - RF_PANIC(); - break; - } -} - - - -/* - * Process a fired node which has completed - */ -static void -ProcessNode( - RF_DagNode_t * node, - int context) -{ - RF_Raid_t *raidPtr; - - raidPtr = node->dagHdr->raidPtr; - - switch (node->status) { - case rf_good: - /* normal case, don't need to do anything */ - break; - case rf_bad: - if ((node->dagHdr->numCommits > 0) || (node->dagHdr->numCommitNodes == 0)) { - node->dagHdr->status = rf_rollForward; /* crossed commit - * barrier */ - if (rf_engineDebug || 1) { - printf("raid%d: node (%s) returned fail, rolling forward\n", raidPtr->raidid, node->name); - } - } else { - node->dagHdr->status = rf_rollBackward; /* never reached commit - * barrier */ - if (rf_engineDebug || 1) { - printf("raid%d: node (%s) returned fail, rolling backward\n", raidPtr->raidid, node->name); - } - } - break; - case rf_undone: - /* normal rollBackward case, don't need to do anything */ - break; - case rf_panic: - /* an undo node failed!!! */ - printf("UNDO of a node failed!!!/n"); - break; - default: - printf("node finished execution with an illegal status!!!\n"); - RF_PANIC(); - break; - } - - /* enqueue node's succedents (antecedents if rollBackward) for - * execution */ - PropagateResults(node, context); -} - - - -/* user context or dag-exec-thread context: - * This is the first step in post-processing a newly-completed node. - * This routine is called by each node execution function to mark the node - * as complete and fire off any successors that have been enabled. - */ -int -rf_FinishNode( - RF_DagNode_t * node, - int context) -{ - /* as far as I can tell, retcode is not used -wvcii */ - int retcode = RF_FALSE; - node->dagHdr->numNodesCompleted++; - ProcessNode(node, context); - - return (retcode); -} - - -/* user context: - * submit dag for execution, return non-zero if we have to wait for completion. - * if and only if we return non-zero, we'll cause cbFunc to get invoked with - * cbArg when the DAG has completed. - * - * for now we always return 1. If the DAG does not cause any I/O, then the callback - * may get invoked before DispatchDAG returns. There's code in state 5 of ContinueRaidAccess - * to handle this. - * - * All we do here is fire the direct successors of the header node. The - * DAG execution thread does the rest of the dag processing. - */ -int -rf_DispatchDAG( - RF_DagHeader_t * dag, - void (*cbFunc) (void *), - void *cbArg) -{ - RF_Raid_t *raidPtr; - - raidPtr = dag->raidPtr; - if (dag->tracerec) { - RF_ETIMER_START(dag->tracerec->timer); - } - if (rf_engineDebug || rf_validateDAGDebug) { - if (rf_ValidateDAG(dag)) - RF_PANIC(); - } - if (rf_engineDebug) { - printf("raid%d: Entering DispatchDAG\n", raidPtr->raidid); - } - raidPtr->dags_in_flight++; /* debug only: blow off proper - * locking */ - dag->cbFunc = cbFunc; - dag->cbArg = cbArg; - dag->numNodesCompleted = 0; - dag->status = rf_enable; - FireNodeArray(dag->numSuccedents, dag->succedents); - return (1); -} -/* dedicated kernel thread: - * the thread that handles all DAG node firing. - * To minimize locking and unlocking, we grab a copy of the entire node queue and then set the - * node queue to NULL before doing any firing of nodes. This way we only have to release the - * lock once. Of course, it's probably rare that there's more than one node in the queue at - * any one time, but it sometimes happens. - * - * In the kernel, this thread runs at spl0 and is not swappable. I copied these - * characteristics from the aio_completion_thread. - */ - -static void -DAGExecutionThread(RF_ThreadArg_t arg) -{ - RF_DagNode_t *nd, *local_nq, *term_nq, *fire_nq; - RF_Raid_t *raidPtr; - int ks; - - raidPtr = (RF_Raid_t *) arg; - - if (rf_engineDebug) { - printf("raid%d: Engine thread is running\n", raidPtr->raidid); - } - - mtx_lock(&Giant); - - RF_THREADGROUP_RUNNING(&raidPtr->engine_tg); - - DO_LOCK(raidPtr); - while (!raidPtr->shutdown_engine) { - - while (raidPtr->node_queue != NULL) { - local_nq = raidPtr->node_queue; - fire_nq = NULL; - term_nq = NULL; - raidPtr->node_queue = NULL; - DO_UNLOCK(raidPtr); - - /* first, strip out the terminal nodes */ - while (local_nq) { - nd = local_nq; - local_nq = local_nq->next; - switch (nd->dagHdr->status) { - case rf_enable: - case rf_rollForward: - if (nd->numSuccedents == 0) { - /* end of the dag, add to - * callback list */ - nd->next = term_nq; - term_nq = nd; - } else { - /* not the end, add to the - * fire queue */ - nd->next = fire_nq; - fire_nq = nd; - } - break; - case rf_rollBackward: - if (nd->numAntecedents == 0) { - /* end of the dag, add to the - * callback list */ - nd->next = term_nq; - term_nq = nd; - } else { - /* not the end, add to the - * fire queue */ - nd->next = fire_nq; - fire_nq = nd; - } - break; - default: - RF_PANIC(); - break; - } - } - - /* execute callback of dags which have reached the - * terminal node */ - while (term_nq) { - nd = term_nq; - term_nq = term_nq->next; - nd->next = NULL; - (nd->dagHdr->cbFunc) (nd->dagHdr->cbArg); - raidPtr->dags_in_flight--; /* debug only */ - } - - /* fire remaining nodes */ - FireNodeList(fire_nq); - - DO_LOCK(raidPtr); - } - while (!raidPtr->shutdown_engine && raidPtr->node_queue == NULL) - DO_WAIT(raidPtr); - } - DO_UNLOCK(raidPtr); - - RF_THREADGROUP_DONE(&raidPtr->engine_tg); - - RF_THREAD_EXIT(0); -} diff --git a/sys/dev/raidframe/rf_engine.h b/sys/dev/raidframe/rf_engine.h deleted file mode 100644 index c758c05..0000000 --- a/sys/dev/raidframe/rf_engine.h +++ /dev/null @@ -1,48 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_engine.h,v 1.3 1999/02/05 00:06:11 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: William V. Courtright II, Mark Holland, Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/********************************************************** - * * - * engine.h -- header file for execution engine functions * - * * - **********************************************************/ - -#ifndef _RF__RF_ENGINE_H_ -#define _RF__RF_ENGINE_H_ - -int -rf_ConfigureEngine(RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, RF_Config_t * cfgPtr); - -int rf_FinishNode(RF_DagNode_t * node, int context); /* return finished node - * to engine */ - -int rf_DispatchDAG(RF_DagHeader_t * dag, void (*cbFunc) (void *), void *cbArg); /* execute dag */ - -#endif /* !_RF__RF_ENGINE_H_ */ diff --git a/sys/dev/raidframe/rf_etimer.h b/sys/dev/raidframe/rf_etimer.h deleted file mode 100644 index e66e01b..0000000 --- a/sys/dev/raidframe/rf_etimer.h +++ /dev/null @@ -1,95 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_etimer.h,v 1.4 1999/08/13 03:26:55 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_TIMER_H_ -#define _RF__RF_TIMER_H_ - -#include <dev/raidframe/rf_options.h> -#include <dev/raidframe/rf_utils.h> - -#include <sys/time.h> - -struct RF_Etimer_s { - struct timeval st; - struct timeval et; - struct timeval diff; -}; - -#if defined(_KERNEL) -#include <sys/kernel.h> - -#if defined(__NetBSD__) -#define RF_ETIMER_START(_t_) \ - { \ - int s; \ - bzero(&(_t_), sizeof (_t_)); \ - s = splclock(); \ - (_t_).st = mono_time; \ - splx(s); \ - } -#elif defined(__FreeBSD__) -#define RF_ETIMER_START(_t_) \ - { \ - int s; \ - bzero(&(_t_), sizeof (_t_)); \ - s = splclock(); \ - getmicrouptime(&(_t_).st); \ - splx(s); \ - } -#endif - -#if defined(__NetBSD__) -#define RF_ETIMER_STOP(_t_) \ - { \ - int s; \ - s = splclock(); \ - (_t_).et = mono_time; \ - splx(s); \ - } -#elif defined(__FreeBSD__) -#define RF_ETIMER_STOP(_t_) \ - { \ - int s; \ - s = splclock(); \ - getmicrouptime(&(_t_).et); \ - splx(s); \ - } -#endif - -#define RF_ETIMER_EVAL(_t_) \ - { \ - RF_TIMEVAL_DIFF(&(_t_).st, &(_t_).et, &(_t_).diff) \ - } - -#define RF_ETIMER_VAL_US(_t_) (RF_TIMEVAL_TO_US((_t_).diff)) -#define RF_ETIMER_VAL_MS(_t_) (RF_TIMEVAL_TO_US((_t_).diff)/1000) - -#endif /* _KERNEL */ - -#endif /* !_RF__RF_TIMER_H_ */ diff --git a/sys/dev/raidframe/rf_evenodd.c b/sys/dev/raidframe/rf_evenodd.c deleted file mode 100644 index 334ba0b..0000000 --- a/sys/dev/raidframe/rf_evenodd.c +++ /dev/null @@ -1,559 +0,0 @@ -/* $NetBSD: rf_evenodd.c,v 1.4 2000/01/07 03:40:59 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Chang-Ming Wu - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************************** - * - * rf_evenodd.c -- implements EVENODD array architecture - * - ****************************************************************************************/ - -#include <dev/raidframe/rf_archs.h> - -#if RF_INCLUDE_EVENODD > 0 - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagffrd.h> -#include <dev/raidframe/rf_dagffwr.h> -#include <dev/raidframe/rf_dagdegrd.h> -#include <dev/raidframe/rf_dagdegwr.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_evenodd.h> -#include <dev/raidframe/rf_configure.h> -#include <dev/raidframe/rf_parityscan.h> -#include <dev/raidframe/rf_utils.h> -#include <dev/raidframe/rf_map.h> -#include <dev/raidframe/rf_pq.h> -#include <dev/raidframe/rf_mcpair.h> -#include <dev/raidframe/rf_evenodd.h> -#include <dev/raidframe/rf_evenodd_dagfuncs.h> -#include <dev/raidframe/rf_evenodd_dags.h> -#include <dev/raidframe/rf_engine.h> -#include <dev/raidframe/rf_kintf.h> - -typedef struct RF_EvenOddConfigInfo_s { - RF_RowCol_t **stripeIdentifier; /* filled in at config time & used by - * IdentifyStripe */ -} RF_EvenOddConfigInfo_t; - -int -rf_ConfigureEvenOdd(listp, raidPtr, cfgPtr) - RF_ShutdownList_t **listp; - RF_Raid_t *raidPtr; - RF_Config_t *cfgPtr; -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_EvenOddConfigInfo_t *info; - RF_RowCol_t i, j, startdisk; - - RF_MallocAndAdd(info, sizeof(RF_EvenOddConfigInfo_t), (RF_EvenOddConfigInfo_t *), raidPtr->cleanupList); - layoutPtr->layoutSpecificInfo = (void *) info; - - RF_ASSERT(raidPtr->numRow == 1); - - info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, raidPtr->numCol, raidPtr->cleanupList); - startdisk = 0; - for (i = 0; i < raidPtr->numCol; i++) { - for (j = 0; j < raidPtr->numCol; j++) { - info->stripeIdentifier[i][j] = (startdisk + j) % raidPtr->numCol; - } - if ((startdisk -= 2) < 0) - startdisk += raidPtr->numCol; - } - - /* fill in the remaining layout parameters */ - layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = raidPtr->numCol - 2; /* ORIG: - * layoutPtr->numDataCol - * = raidPtr->numCol-1; */ -#if RF_EO_MATRIX_DIM > 17 - if (raidPtr->numCol <= 17) { - printf("Number of stripe units in a parity stripe is smaller than 17. Please\n"); - printf("define the macro RF_EO_MATRIX_DIM in file rf_evenodd_dagfuncs.h to \n"); - printf("be 17 to increase performance. \n"); - return (EINVAL); - } -#elif RF_EO_MATRIX_DIM == 17 - if (raidPtr->numCol > 17) { - printf("Number of stripe units in a parity stripe is bigger than 17. Please\n"); - printf("define the macro RF_EO_MATRIX_DIM in file rf_evenodd_dagfuncs.h to \n"); - printf("be 257 for encoding and decoding functions to work. \n"); - return (EINVAL); - } -#endif - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numParityCol = 2; - layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk; - raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; - - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - - return (0); -} - -int -rf_GetDefaultNumFloatingReconBuffersEvenOdd(RF_Raid_t * raidPtr) -{ - return (20); -} - -RF_HeadSepLimit_t -rf_GetDefaultHeadSepLimitEvenOdd(RF_Raid_t * raidPtr) -{ - return (10); -} - -void -rf_IdentifyStripeEvenOdd( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) -{ - RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr); - RF_EvenOddConfigInfo_t *info = (RF_EvenOddConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - - *outRow = 0; - *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol]; -} -/* The layout of stripe unit on the disks are: c0 c1 c2 c3 c4 - - 0 1 2 E P - 5 E P 3 4 - P 6 7 8 E - 10 11 E P 9 - E P 12 13 14 - .... - - We use the MapSectorRAID5 to map data information because the routine can be shown to map exactly - the layout of data stripe unit as shown above although we have 2 redundant information now. - But for E and P, we use rf_MapEEvenOdd and rf_MapParityEvenOdd which are different method from raid-5. -*/ - - -void -rf_MapParityEvenOdd( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - RF_StripeNum_t endSUIDofthisStrip = (SUID / raidPtr->Layout.numDataCol + 1) * raidPtr->Layout.numDataCol - 1; - - *row = 0; - *col = (endSUIDofthisStrip + 2) % raidPtr->numCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); -} - -void -rf_MapEEvenOdd( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - RF_StripeNum_t endSUIDofthisStrip = (SUID / raidPtr->Layout.numDataCol + 1) * raidPtr->Layout.numDataCol - 1; - - *row = 0; - *col = (endSUIDofthisStrip + 1) % raidPtr->numCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); -} - -void -rf_EODagSelect( - RF_Raid_t * raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t * asmap, - RF_VoidFuncPtr * createFunc) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - unsigned ndfail = asmap->numDataFailed; - unsigned npfail = asmap->numParityFailed + asmap->numQFailed; - unsigned ntfail = npfail + ndfail; - - RF_ASSERT(RF_IO_IS_R_OR_W(type)); - if (ntfail > 2) { - RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n"); - /* *infoFunc = */ *createFunc = NULL; - return; - } - /* ok, we can do this I/O */ - if (type == RF_IO_TYPE_READ) { - switch (ndfail) { - case 0: - /* fault free read */ - *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; /* same as raid 5 */ - break; - case 1: - /* lost a single data unit */ - /* two cases: (1) parity is not lost. do a normal raid - * 5 reconstruct read. (2) parity is lost. do a - * reconstruct read using "e". */ - if (ntfail == 2) { /* also lost redundancy */ - if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) - *createFunc = (RF_VoidFuncPtr) rf_EO_110_CreateReadDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_EO_101_CreateReadDAG; - } else { - /* P and E are ok. But is there a failure in - * some unaccessed data unit? */ - if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2) - *createFunc = (RF_VoidFuncPtr) rf_EO_200_CreateReadDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_EO_100_CreateReadDAG; - } - break; - case 2: - /* *createFunc = rf_EO_200_CreateReadDAG; */ - *createFunc = NULL; - break; - } - return; - } - /* a write */ - switch (ntfail) { - case 0: /* fault free */ - if (rf_suppressLocksAndLargeWrites || - (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) || - (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) { - - *createFunc = (RF_VoidFuncPtr) rf_EOCreateSmallWriteDAG; - } else { - *createFunc = (RF_VoidFuncPtr) rf_EOCreateLargeWriteDAG; - } - break; - - case 1: /* single disk fault */ - if (npfail == 1) { - RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q)); - if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) { /* q died, treat like - * normal mode raid5 - * write. */ - if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) - || (asmap->parityInfo->next != NULL) || rf_NumFailedDataUnitsInStripe(raidPtr, asmap)) - *createFunc = (RF_VoidFuncPtr) rf_EO_001_CreateSmallWriteDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_EO_001_CreateLargeWriteDAG; - } else {/* parity died, small write only updating Q */ - if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) - || (asmap->qInfo->next != NULL) || rf_NumFailedDataUnitsInStripe(raidPtr, asmap)) - *createFunc = (RF_VoidFuncPtr) rf_EO_010_CreateSmallWriteDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_EO_010_CreateLargeWriteDAG; - } - } else { /* data missing. Do a P reconstruct write if - * only a single data unit is lost in the - * stripe, otherwise a reconstruct write which - * employnig both P and E units. */ - if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2) { - if (asmap->numStripeUnitsAccessed == 1) - *createFunc = (RF_VoidFuncPtr) rf_EO_200_CreateWriteDAG; - else - *createFunc = NULL; /* No direct support for - * this case now, like - * that in Raid-5 */ - } else { - if (asmap->numStripeUnitsAccessed != 1 && asmap->failedPDAs[0]->numSector != layoutPtr->sectorsPerStripeUnit) - *createFunc = NULL; /* No direct support for - * this case now, like - * that in Raid-5 */ - else - *createFunc = (RF_VoidFuncPtr) rf_EO_100_CreateWriteDAG; - } - } - break; - - case 2: /* two disk faults */ - switch (npfail) { - case 2: /* both p and q dead */ - *createFunc = (RF_VoidFuncPtr) rf_EO_011_CreateWriteDAG; - break; - case 1: /* either p or q and dead data */ - RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA); - RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)); - if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q) { - if (asmap->numStripeUnitsAccessed != 1 && asmap->failedPDAs[0]->numSector != layoutPtr->sectorsPerStripeUnit) - *createFunc = NULL; /* In both PQ and - * EvenOdd, no direct - * support for this case - * now, like that in - * Raid-5 */ - else - *createFunc = (RF_VoidFuncPtr) rf_EO_101_CreateWriteDAG; - } else { - if (asmap->numStripeUnitsAccessed != 1 && asmap->failedPDAs[0]->numSector != layoutPtr->sectorsPerStripeUnit) - *createFunc = NULL; /* No direct support for - * this case, like that - * in Raid-5 */ - else - *createFunc = (RF_VoidFuncPtr) rf_EO_110_CreateWriteDAG; - } - break; - case 0: /* double data loss */ - /* if(asmap->failedPDAs[0]->numSector + - * asmap->failedPDAs[1]->numSector == 2 * - * layoutPtr->sectorsPerStripeUnit ) createFunc = - * rf_EOCreateLargeWriteDAG; else */ - *createFunc = NULL; /* currently, in Evenodd, No - * support for simultaneous - * access of both failed SUs */ - break; - } - break; - - default: /* more than 2 disk faults */ - *createFunc = NULL; - RF_PANIC(); - } - return; -} - - -int -rf_VerifyParityEvenOdd(raidPtr, raidAddr, parityPDA, correct_it, flags) - RF_Raid_t *raidPtr; - RF_RaidAddr_t raidAddr; - RF_PhysDiskAddr_t *parityPDA; - int correct_it; - RF_RaidAccessFlags_t flags; -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_RaidAddr_t startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr); - RF_SectorCount_t numsector = parityPDA->numSector; - int numbytes = rf_RaidAddressToByte(raidPtr, numsector); - int bytesPerStripe = numbytes * layoutPtr->numDataCol; - RF_DagHeader_t *rd_dag_h, *wr_dag_h; /* read, write dag */ - RF_DagNode_t *blockNode, *unblockNode, *wrBlock, *wrUnblock; - RF_AccessStripeMapHeader_t *asm_h; - RF_AccessStripeMap_t *asmap; - RF_AllocListElem_t *alloclist; - RF_PhysDiskAddr_t *pda; - char *pbuf, *buf, *end_p, *p; - char *redundantbuf2; - int redundantTwoErr = 0, redundantOneErr = 0; - int parity_cant_correct = RF_FALSE, red2_cant_correct = RF_FALSE, - parity_corrected = RF_FALSE, red2_corrected = RF_FALSE; - int i, retcode; - RF_ReconUnitNum_t which_ru; - RF_StripeNum_t psID = rf_RaidAddressToParityStripeID(layoutPtr, raidAddr, &which_ru); - int stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; - RF_AccTraceEntry_t tracerec; - RF_MCPair_t *mcpair; - - retcode = RF_PARITY_OKAY; - - mcpair = rf_AllocMCPair(); - rf_MakeAllocList(alloclist); - RF_MallocAndAdd(buf, numbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol), (char *), alloclist); - RF_CallocAndAdd(pbuf, 1, numbytes, (char *), alloclist); /* use calloc to make - * sure buffer is zeroed */ - end_p = buf + bytesPerStripe; - RF_CallocAndAdd(redundantbuf2, 1, numbytes, (char *), alloclist); /* use calloc to make - * sure buffer is zeroed */ - - rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, numbytes, buf, rf_DiskReadFunc, rf_DiskReadUndoFunc, - "Rod", alloclist, flags, RF_IO_NORMAL_PRIORITY); - blockNode = rd_dag_h->succedents[0]; - unblockNode = blockNode->succedents[0]->succedents[0]; - - /* map the stripe and fill in the PDAs in the dag */ - asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, buf, RF_DONT_REMAP); - asmap = asm_h->stripeMap; - - for (pda = asmap->physInfo, i = 0; i < layoutPtr->numDataCol; i++, pda = pda->next) { - RF_ASSERT(pda); - rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); - RF_ASSERT(pda->numSector != 0); - if (rf_TryToRedirectPDA(raidPtr, pda, 0)) - goto out; /* no way to verify parity if disk is - * dead. return w/ good status */ - blockNode->succedents[i]->params[0].p = pda; - blockNode->succedents[i]->params[2].v = psID; - blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - - RF_ASSERT(!asmap->parityInfo->next); - rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->parityInfo, 0, 1); - RF_ASSERT(asmap->parityInfo->numSector != 0); - if (rf_TryToRedirectPDA(raidPtr, asmap->parityInfo, 1)) - goto out; - blockNode->succedents[layoutPtr->numDataCol]->params[0].p = asmap->parityInfo; - - RF_ASSERT(!asmap->qInfo->next); - rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->qInfo, 0, 1); - RF_ASSERT(asmap->qInfo->numSector != 0); - if (rf_TryToRedirectPDA(raidPtr, asmap->qInfo, 1)) - goto out; - /* if disk is dead, b/c no reconstruction is implemented right now, - * the function "rf_TryToRedirectPDA" always return one, which cause - * go to out and return w/ good status */ - blockNode->succedents[layoutPtr->numDataCol + 1]->params[0].p = asmap->qInfo; - - /* fire off the DAG */ - bzero((char *) &tracerec, sizeof(tracerec)); - rd_dag_h->tracerec = &tracerec; - - if (rf_verifyParityDebug) { - printf("Parity verify read dag:\n"); - rf_PrintDAGList(rd_dag_h); - } - RF_LOCK_MUTEX(mcpair->mutex); - mcpair->flag = 0; - rf_DispatchDAG(rd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, - (void *) mcpair); - while (!mcpair->flag) - RF_WAIT_COND(mcpair->cond, mcpair->mutex); - RF_UNLOCK_MUTEX(mcpair->mutex); - if (rd_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to verify parity: can't read the stripe\n"); - retcode = RF_PARITY_COULD_NOT_VERIFY; - goto out; - } - for (p = buf, i = 0; p < end_p; p += numbytes, i++) { - rf_e_encToBuf(raidPtr, i, p, RF_EO_MATRIX_DIM - 2, redundantbuf2, numsector); - /* the corresponding columes in EvenOdd encoding Matrix for - * these p pointers which point to the databuffer in a full - * stripe are sequentially from 0 to layoutPtr->numDataCol-1 */ - rf_bxor(p, pbuf, numbytes, NULL); - } - RF_ASSERT(i == layoutPtr->numDataCol); - - for (i = 0; i < numbytes; i++) { - if (pbuf[i] != buf[bytesPerStripe + i]) { - if (!correct_it) { - RF_ERRORMSG3("Parity verify error: byte %d of parity is 0x%x should be 0x%x\n", - i, (u_char) buf[bytesPerStripe + i], (u_char) pbuf[i]); - } - } - redundantOneErr = 1; - break; - } - - for (i = 0; i < numbytes; i++) { - if (redundantbuf2[i] != buf[bytesPerStripe + numbytes + i]) { - if (!correct_it) { - RF_ERRORMSG3("Parity verify error: byte %d of second redundant information is 0x%x should be 0x%x\n", - i, (u_char) buf[bytesPerStripe + numbytes + i], (u_char) redundantbuf2[i]); - } - redundantTwoErr = 1; - break; - } - } - if (redundantOneErr || redundantTwoErr) - retcode = RF_PARITY_BAD; - - /* correct the first redundant disk, ie parity if it is error */ - if (redundantOneErr && correct_it) { - wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, pbuf, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - "Wnp", alloclist, flags, RF_IO_NORMAL_PRIORITY); - wrBlock = wr_dag_h->succedents[0]; - wrUnblock = wrBlock->succedents[0]->succedents[0]; - wrBlock->succedents[0]->params[0].p = asmap->parityInfo; - wrBlock->succedents[0]->params[2].v = psID; - wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - bzero((char *) &tracerec, sizeof(tracerec)); - wr_dag_h->tracerec = &tracerec; - if (rf_verifyParityDebug) { - printf("Parity verify write dag:\n"); - rf_PrintDAGList(wr_dag_h); - } - RF_LOCK_MUTEX(mcpair->mutex); - mcpair->flag = 0; - rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, - (void *) mcpair); - while (!mcpair->flag) - RF_WAIT_COND(mcpair->cond, mcpair->mutex); - RF_UNLOCK_MUTEX(mcpair->mutex); - if (wr_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to correct parity in VerifyParity: can't write the stripe\n"); - parity_cant_correct = RF_TRUE; - } else { - parity_corrected = RF_TRUE; - } - rf_FreeDAG(wr_dag_h); - } - if (redundantTwoErr && correct_it) { - wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, redundantbuf2, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - "Wnred2", alloclist, flags, RF_IO_NORMAL_PRIORITY); - wrBlock = wr_dag_h->succedents[0]; - wrUnblock = wrBlock->succedents[0]->succedents[0]; - wrBlock->succedents[0]->params[0].p = asmap->qInfo; - wrBlock->succedents[0]->params[2].v = psID; - wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - bzero((char *) &tracerec, sizeof(tracerec)); - wr_dag_h->tracerec = &tracerec; - if (rf_verifyParityDebug) { - printf("Dag of write new second redundant information in parity verify :\n"); - rf_PrintDAGList(wr_dag_h); - } - RF_LOCK_MUTEX(mcpair->mutex); - mcpair->flag = 0; - rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, - (void *) mcpair); - while (!mcpair->flag) - RF_WAIT_COND(mcpair->cond, mcpair->mutex); - RF_UNLOCK_MUTEX(mcpair->mutex); - if (wr_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to correct second redundant information in VerifyParity: can't write the stripe\n"); - red2_cant_correct = RF_TRUE; - } else { - red2_corrected = RF_TRUE; - } - rf_FreeDAG(wr_dag_h); - } - if ((redundantOneErr && parity_cant_correct) || - (redundantTwoErr && red2_cant_correct)) - retcode = RF_PARITY_COULD_NOT_CORRECT; - if ((retcode = RF_PARITY_BAD) && parity_corrected && red2_corrected) - retcode = RF_PARITY_CORRECTED; - - -out: - rf_FreeAccessStripeMap(asm_h); - rf_FreeAllocList(alloclist); - rf_FreeDAG(rd_dag_h); - rf_FreeMCPair(mcpair); - return (retcode); -} -#endif /* RF_INCLUDE_EVENODD > 0 */ diff --git a/sys/dev/raidframe/rf_evenodd.h b/sys/dev/raidframe/rf_evenodd.h deleted file mode 100644 index 4babdec..0000000 --- a/sys/dev/raidframe/rf_evenodd.h +++ /dev/null @@ -1,55 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_evenodd.h,v 1.2 1999/02/05 00:06:11 oster Exp $ */ -/* - * Copyright (c) 1995, 1996 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Chang-Ming Wu - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_EVENODD_H_ -#define _RF__RF_EVENODD_H_ - -/* extern declerations of the failure mode functions. */ -int -rf_ConfigureEvenOdd(RF_ShutdownList_t ** shutdownListp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int rf_GetDefaultNumFloatingReconBuffersEvenOdd(RF_Raid_t * raidPtr); -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitEvenOdd(RF_Raid_t * raidPtr); -void -rf_IdentifyStripeEvenOdd(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outrow); -void -rf_MapParityEvenOdd(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapEEvenOdd(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_EODagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); -int -rf_VerifyParityEvenOdd(RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t * parityPDA, int correct_it, RF_RaidAccessFlags_t flags); - -#endif /* !_RF__RF_EVENODD_H_ */ diff --git a/sys/dev/raidframe/rf_evenodd_dagfuncs.c b/sys/dev/raidframe/rf_evenodd_dagfuncs.c deleted file mode 100644 index 2e39a53..0000000 --- a/sys/dev/raidframe/rf_evenodd_dagfuncs.c +++ /dev/null @@ -1,977 +0,0 @@ -/* $NetBSD: rf_evenodd_dagfuncs.c,v 1.7 2001/01/26 03:50:53 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: ChangMing Wu - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * Code for RAID-EVENODD architecture. - */ - -#include <dev/raidframe/rf_archs.h> - -#if RF_INCLUDE_EVENODD > 0 - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagffrd.h> -#include <dev/raidframe/rf_dagffwr.h> -#include <dev/raidframe/rf_dagdegrd.h> -#include <dev/raidframe/rf_dagdegwr.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_configure.h> -#include <dev/raidframe/rf_parityscan.h> -#include <dev/raidframe/rf_evenodd.h> -#include <dev/raidframe/rf_evenodd_dagfuncs.h> - -/* These redundant functions are for small write */ -RF_RedFuncs_t rf_EOSmallWritePFuncs = {rf_RegularXorFunc, "Regular Old-New P", rf_SimpleXorFunc, "Simple Old-New P"}; -RF_RedFuncs_t rf_EOSmallWriteEFuncs = {rf_RegularONEFunc, "Regular Old-New E", rf_SimpleONEFunc, "Regular Old-New E"}; -/* These redundant functions are for degraded read */ -RF_RedFuncs_t rf_eoPRecoveryFuncs = {rf_RecoveryXorFunc, "Recovery Xr", rf_RecoveryXorFunc, "Recovery Xr"}; -RF_RedFuncs_t rf_eoERecoveryFuncs = {rf_RecoveryEFunc, "Recovery E Func", rf_RecoveryEFunc, "Recovery E Func"}; -/********************************************************************************************** - * the following encoding node functions is used in EO_000_CreateLargeWriteDAG - **********************************************************************************************/ -int -rf_RegularPEFunc(node) - RF_DagNode_t *node; -{ - rf_RegularESubroutine(node, node->results[1]); - rf_RegularXorFunc(node);/* does the wakeup here! */ -#if 1 - return (0); /* XXX This was missing... GO */ -#endif -} - - -/************************************************************************************************ - * For EO_001_CreateSmallWriteDAG, there are (i)RegularONEFunc() and (ii)SimpleONEFunc() to - * be used. The previous case is when write access at least sectors of full stripe unit. - * The later function is used when the write access two stripe units but with total sectors - * less than sectors per SU. In this case, the access of parity and 'E' are shown as disconnected - * areas in their stripe unit and parity write and 'E' write are both devided into two distinct - * writes( totally four). This simple old-new write and regular old-new write happen as in RAID-5 - ************************************************************************************************/ - -/* Algorithm: - 1. Store the difference of old data and new data in the Rod buffer. - 2. then encode this buffer into the buffer which already have old 'E' information inside it, - the result can be shown to be the new 'E' information. - 3. xor the Wnd buffer into the difference buffer to recover the original old data. - Here we have another alternative: to allocate a temporary buffer for storing the difference of - old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach - take the same speed as the previous, and need more memory. -*/ -int -rf_RegularONEFunc(node) - RF_DagNode_t *node; -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; - int EpdaIndex = (node->numParams - 1) / 2 - 1; /* the parameter of node - * where you can find - * e-pda */ - int i, k, retcode = 0; - int suoffset, length; - RF_RowCol_t scol; - char *srcbuf, *destbuf; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - RF_PhysDiskAddr_t *pda, *EPDA = (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p; - int ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector); /* generally zero */ - - RF_ASSERT(EPDA->type == RF_PDA_TYPE_Q); - RF_ASSERT(ESUOffset == 0); - - RF_ETIMER_START(timer); - - /* Xor the Wnd buffer into Rod buffer, the difference of old data and - * new data is stored in Rod buffer */ - for (k = 0; k < EpdaIndex; k += 2) { - length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector); - retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length, node->dagHdr->bp); - } - /* Start to encoding the buffer storing the difference of old data and - * new data into 'E' buffer */ - for (i = 0; i < EpdaIndex; i += 2) - if (node->params[i + 1].p != node->results[0]) { /* results[0] is buf ptr - * of E */ - pda = (RF_PhysDiskAddr_t *) node->params[i].p; - srcbuf = (char *) node->params[i + 1].p; - scol = rf_EUCol(layoutPtr, pda->raidAddress); - suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); - destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset); - rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); - } - /* Recover the original old data to be used by parity encoding - * function in XorNode */ - for (k = 0; k < EpdaIndex; k += 2) { - length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector); - retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length, node->dagHdr->bp); - } - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->q_us += RF_ETIMER_VAL_US(timer); - rf_GenericWakeupFunc(node, 0); -#if 1 - return (0); /* XXX this was missing.. GO */ -#endif -} - -int -rf_SimpleONEFunc(node) - RF_DagNode_t *node; -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; - RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; - int retcode = 0; - char *srcbuf, *destbuf; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - int length; - RF_RowCol_t scol; - RF_Etimer_t timer; - - RF_ASSERT(((RF_PhysDiskAddr_t *) node->params[2].p)->type == RF_PDA_TYPE_Q); - if (node->dagHdr->status == rf_enable) { - RF_ETIMER_START(timer); - length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[4].p)->numSector); /* this is a pda of - * writeDataNodes */ - /* bxor to buffer of readDataNodes */ - retcode = rf_bxor(node->params[5].p, node->params[1].p, length, node->dagHdr->bp); - /* find out the corresponding colume in encoding matrix for - * write colume to be encoded into redundant disk 'E' */ - scol = rf_EUCol(layoutPtr, pda->raidAddress); - srcbuf = node->params[1].p; - destbuf = node->params[3].p; - /* Start encoding process */ - rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); - rf_bxor(node->params[5].p, node->params[1].p, length, node->dagHdr->bp); - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->q_us += RF_ETIMER_VAL_US(timer); - - } - return (rf_GenericWakeupFunc(node, retcode)); /* call wake func - * explicitly since no - * I/O in this node */ -} - - -/****** called by rf_RegularPEFunc(node) and rf_RegularEFunc(node) in f.f. large write ********/ -void -rf_RegularESubroutine(node, ebuf) - RF_DagNode_t *node; - char *ebuf; -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; - RF_PhysDiskAddr_t *pda; - int i, suoffset; - RF_RowCol_t scol; - char *srcbuf, *destbuf; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - - RF_ETIMER_START(timer); - for (i = 0; i < node->numParams - 2; i += 2) { - RF_ASSERT(node->params[i + 1].p != ebuf); - pda = (RF_PhysDiskAddr_t *) node->params[i].p; - suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); - scol = rf_EUCol(layoutPtr, pda->raidAddress); - srcbuf = (char *) node->params[i + 1].p; - destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset); - rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); - } - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->xor_us += RF_ETIMER_VAL_US(timer); -} - - -/******************************************************************************************* - * Used in EO_001_CreateLargeWriteDAG - ******************************************************************************************/ -int -rf_RegularEFunc(node) - RF_DagNode_t *node; -{ - rf_RegularESubroutine(node, node->results[0]); - rf_GenericWakeupFunc(node, 0); -#if 1 - return (0); /* XXX this was missing?.. GO */ -#endif -} -/******************************************************************************************* - * This degraded function allow only two case: - * 1. when write access the full failed stripe unit, then the access can be more than - * one tripe units. - * 2. when write access only part of the failed SU, we assume accesses of more than - * one stripe unit is not allowed so that the write can be dealt with like a - * large write. - * The following function is based on these assumptions. So except in the second case, - * it looks the same as a large write encodeing function. But this is not exactly the - * normal way for doing a degraded write, since raidframe have to break cases of access - * other than the above two into smaller accesses. We may have to change - * DegrESubroutin in the future. - *******************************************************************************************/ -void -rf_DegrESubroutine(node, ebuf) - RF_DagNode_t *node; - char *ebuf; -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; - RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; - RF_PhysDiskAddr_t *pda; - int i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); - RF_RowCol_t scol; - char *srcbuf, *destbuf; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - - RF_ETIMER_START(timer); - for (i = 0; i < node->numParams - 2; i += 2) { - RF_ASSERT(node->params[i + 1].p != ebuf); - pda = (RF_PhysDiskAddr_t *) node->params[i].p; - suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); - scol = rf_EUCol(layoutPtr, pda->raidAddress); - srcbuf = (char *) node->params[i + 1].p; - destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); - rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); - } - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->q_us += RF_ETIMER_VAL_US(timer); -} - - -/************************************************************************************** - * This function is used in case where one data disk failed and both redundant disks - * alive. It is used in the EO_100_CreateWriteDAG. Note: if there is another disk - * failed in the stripe but not accessed at this time, then we should, instead, use - * the rf_EOWriteDoubleRecoveryFunc(). - **************************************************************************************/ -int -rf_Degraded_100_EOFunc(node) - RF_DagNode_t *node; -{ - rf_DegrESubroutine(node, node->results[1]); - rf_RecoveryXorFunc(node); /* does the wakeup here! */ -#if 1 - return (0); /* XXX this was missing... SHould these be - * void functions??? GO */ -#endif -} -/************************************************************************************** - * This function is to encode one sector in one of the data disks to the E disk. - * However, in evenodd this function can also be used as decoding function to recover - * data from dead disk in the case of parity failure and a single data failure. - **************************************************************************************/ -void -rf_e_EncOneSect( - RF_RowCol_t srcLogicCol, - char *srcSecbuf, - RF_RowCol_t destLogicCol, - char *destSecbuf, - int bytesPerSector) -{ - int S_index; /* index of the EU in the src col which need - * be Xored into all EUs in a dest sector */ - int numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1; - RF_RowCol_t j, indexInDest, /* row index of an encoding unit in - * the destination colume of encoding - * matrix */ - indexInSrc; /* row index of an encoding unit in the source - * colume used for recovery */ - int bytesPerEU = bytesPerSector / numRowInEncMatix; - -#if RF_EO_MATRIX_DIM > 17 - int shortsPerEU = bytesPerEU / sizeof(short); - short *destShortBuf, *srcShortBuf1, *srcShortBuf2; - short temp1; -#elif RF_EO_MATRIX_DIM == 17 - int longsPerEU = bytesPerEU / sizeof(long); - long *destLongBuf, *srcLongBuf1, *srcLongBuf2; - long temp1; -#endif - -#if RF_EO_MATRIX_DIM > 17 - RF_ASSERT(sizeof(short) == 2 || sizeof(short) == 1); - RF_ASSERT(bytesPerEU % sizeof(short) == 0); -#elif RF_EO_MATRIX_DIM == 17 - RF_ASSERT(sizeof(long) == 8 || sizeof(long) == 4); - RF_ASSERT(bytesPerEU % sizeof(long) == 0); -#endif - - S_index = rf_EO_Mod((RF_EO_MATRIX_DIM - 1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM); -#if RF_EO_MATRIX_DIM > 17 - srcShortBuf1 = (short *) (srcSecbuf + S_index * bytesPerEU); -#elif RF_EO_MATRIX_DIM == 17 - srcLongBuf1 = (long *) (srcSecbuf + S_index * bytesPerEU); -#endif - - for (indexInDest = 0; indexInDest < numRowInEncMatix; indexInDest++) { - indexInSrc = rf_EO_Mod((indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM); - -#if RF_EO_MATRIX_DIM > 17 - destShortBuf = (short *) (destSecbuf + indexInDest * bytesPerEU); - srcShortBuf2 = (short *) (srcSecbuf + indexInSrc * bytesPerEU); - for (j = 0; j < shortsPerEU; j++) { - temp1 = destShortBuf[j] ^ srcShortBuf1[j]; - /* note: S_index won't be at the end row for any src - * col! */ - if (indexInSrc != RF_EO_MATRIX_DIM - 1) - destShortBuf[j] = (srcShortBuf2[j]) ^ temp1; - /* if indexInSrc is at the end row, ie. - * RF_EO_MATRIX_DIM -1, then all elements are zero! */ - else - destShortBuf[j] = temp1; - } - -#elif RF_EO_MATRIX_DIM == 17 - destLongBuf = (long *) (destSecbuf + indexInDest * bytesPerEU); - srcLongBuf2 = (long *) (srcSecbuf + indexInSrc * bytesPerEU); - for (j = 0; j < longsPerEU; j++) { - temp1 = destLongBuf[j] ^ srcLongBuf1[j]; - if (indexInSrc != RF_EO_MATRIX_DIM - 1) - destLongBuf[j] = (srcLongBuf2[j]) ^ temp1; - else - destLongBuf[j] = temp1; - } -#endif - } -} - -void -rf_e_encToBuf( - RF_Raid_t * raidPtr, - RF_RowCol_t srcLogicCol, - char *srcbuf, - RF_RowCol_t destLogicCol, - char *destbuf, - int numSector) -{ - int i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); - - for (i = 0; i < numSector; i++) { - rf_e_EncOneSect(srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector); - srcbuf += bytesPerSector; - destbuf += bytesPerSector; - } -} -/************************************************************************************** - * when parity die and one data die, We use second redundant information, 'E', - * to recover the data in dead disk. This function is used in the recovery node of - * for EO_110_CreateReadDAG - **************************************************************************************/ -int -rf_RecoveryEFunc(node) - RF_DagNode_t *node; -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; - RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; - RF_RowCol_t scol, /* source logical column */ - fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress); /* logical column of - * failed SU */ - int i; - RF_PhysDiskAddr_t *pda; - int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); - char *srcbuf, *destbuf; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - - bzero((char *) node->results[0], rf_RaidAddressToByte(raidPtr, failedPDA->numSector)); - if (node->dagHdr->status == rf_enable) { - RF_ETIMER_START(timer); - for (i = 0; i < node->numParams - 2; i += 2) - if (node->params[i + 1].p != node->results[0]) { - pda = (RF_PhysDiskAddr_t *) node->params[i].p; - if (i == node->numParams - 4) - scol = RF_EO_MATRIX_DIM - 2; /* the colume of - * redundant E */ - else - scol = rf_EUCol(layoutPtr, pda->raidAddress); - srcbuf = (char *) node->params[i + 1].p; - suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); - destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); - rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector); - } - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->xor_us += RF_ETIMER_VAL_US(timer); - } - return (rf_GenericWakeupFunc(node, 0)); /* node execute successfully */ -} -/************************************************************************************** - * This function is used in the case where one data and the parity have filed. - * (in EO_110_CreateWriteDAG ) - **************************************************************************************/ -int -rf_EO_DegradedWriteEFunc(RF_DagNode_t * node) -{ - rf_DegrESubroutine(node, node->results[0]); - rf_GenericWakeupFunc(node, 0); -#if 1 - return (0); /* XXX Yet another one!! GO */ -#endif -} - - - -/************************************************************************************** - * THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES - **************************************************************************************/ - -void -rf_doubleEOdecode( - RF_Raid_t * raidPtr, - char **rrdbuf, - char **dest, - RF_RowCol_t * fcol, - char *pbuf, - char *ebuf) -{ - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); - int i, j, k, f1, f2, row; - int rrdrow, erow, count = 0; - int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); - int numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1; -#if 0 - int pcol = (RF_EO_MATRIX_DIM) - 1; -#endif - int ecol = (RF_EO_MATRIX_DIM) - 2; - int bytesPerEU = bytesPerSector / numRowInEncMatix; - int numDataCol = layoutPtr->numDataCol; -#if RF_EO_MATRIX_DIM > 17 - int shortsPerEU = bytesPerEU / sizeof(short); - short *rrdbuf_current, *pbuf_current, *ebuf_current; - short *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current; - short *temp; - short *P; - - RF_ASSERT(bytesPerEU % sizeof(short) == 0); - RF_Malloc(P, bytesPerEU, (short *)); - RF_Malloc(temp, bytesPerEU, (short *)); -#elif RF_EO_MATRIX_DIM == 17 - int longsPerEU = bytesPerEU / sizeof(long); - long *rrdbuf_current, *pbuf_current, *ebuf_current; - long *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current; - long *temp; - long *P; - - RF_ASSERT(bytesPerEU % sizeof(long) == 0); - RF_Malloc(P, bytesPerEU, (long *)); - RF_Malloc(temp, bytesPerEU, (long *)); -#endif - RF_ASSERT(*((long *) dest[0]) == 0); - RF_ASSERT(*((long *) dest[1]) == 0); - bzero((char *) P, bytesPerEU); - bzero((char *) temp, bytesPerEU); - RF_ASSERT(*P == 0); - /* calculate the 'P' parameter, which, not parity, is the Xor of all - * elements in the last two column, ie. 'E' and 'parity' colume, see - * the Ref. paper by Blaum, et al 1993 */ - for (i = 0; i < numRowInEncMatix; i++) - for (k = 0; k < longsPerEU; k++) { -#if RF_EO_MATRIX_DIM > 17 - ebuf_current = ((short *) ebuf) + i * shortsPerEU + k; - pbuf_current = ((short *) pbuf) + i * shortsPerEU + k; -#elif RF_EO_MATRIX_DIM == 17 - ebuf_current = ((long *) ebuf) + i * longsPerEU + k; - pbuf_current = ((long *) pbuf) + i * longsPerEU + k; -#endif - P[k] ^= *ebuf_current; - P[k] ^= *pbuf_current; - } - RF_ASSERT(fcol[0] != fcol[1]); - if (fcol[0] < fcol[1]) { -#if RF_EO_MATRIX_DIM > 17 - dest_smaller = (short *) (dest[0]); - dest_larger = (short *) (dest[1]); -#elif RF_EO_MATRIX_DIM == 17 - dest_smaller = (long *) (dest[0]); - dest_larger = (long *) (dest[1]); -#endif - f1 = fcol[0]; - f2 = fcol[1]; - } else { -#if RF_EO_MATRIX_DIM > 17 - dest_smaller = (short *) (dest[1]); - dest_larger = (short *) (dest[0]); -#elif RF_EO_MATRIX_DIM == 17 - dest_smaller = (long *) (dest[1]); - dest_larger = (long *) (dest[0]); -#endif - f1 = fcol[1]; - f2 = fcol[0]; - } - row = (RF_EO_MATRIX_DIM) - 1; - while ((row = rf_EO_Mod((row + f1 - f2), RF_EO_MATRIX_DIM)) != ((RF_EO_MATRIX_DIM) - 1)) { -#if RF_EO_MATRIX_DIM > 17 - dest_larger_current = dest_larger + row * shortsPerEU; - dest_smaller_current = dest_smaller + row * shortsPerEU; -#elif RF_EO_MATRIX_DIM == 17 - dest_larger_current = dest_larger + row * longsPerEU; - dest_smaller_current = dest_smaller + row * longsPerEU; -#endif - /** Do the diagonal recovery. Initially, temp[k] = (failed 1), - which is the failed data in the colume which has smaller col index. **/ - /* step 1: ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3)) */ - for (j = 0; j < numDataCol; j++) { - if (j == f1 || j == f2) - continue; - rrdrow = rf_EO_Mod((row + f2 - j), RF_EO_MATRIX_DIM); - if (rrdrow != (RF_EO_MATRIX_DIM) - 1) { -#if RF_EO_MATRIX_DIM > 17 - rrdbuf_current = (short *) (rrdbuf[j]) + rrdrow * shortsPerEU; - for (k = 0; k < shortsPerEU; k++) - temp[k] ^= *(rrdbuf_current + k); -#elif RF_EO_MATRIX_DIM == 17 - rrdbuf_current = (long *) (rrdbuf[j]) + rrdrow * longsPerEU; - for (k = 0; k < longsPerEU; k++) - temp[k] ^= *(rrdbuf_current + k); -#endif - } - } - /* step 2: ^E(erow,m-2), If erow is at the buttom row, don't - * Xor into it E(erow,m-2) = (principle diagonal) ^ (failed - * 1) ^ (failed 2) ^ ( SUM of nonfailed in-diagonal - * A(rrdrow,0..m-3) ) After this step, temp[k] = (principle - * diagonal) ^ (failed 2) */ - - erow = rf_EO_Mod((row + f2 - ecol), (RF_EO_MATRIX_DIM)); - if (erow != (RF_EO_MATRIX_DIM) - 1) { -#if RF_EO_MATRIX_DIM > 17 - ebuf_current = (short *) ebuf + shortsPerEU * erow; - for (k = 0; k < shortsPerEU; k++) - temp[k] ^= *(ebuf_current + k); -#elif RF_EO_MATRIX_DIM == 17 - ebuf_current = (long *) ebuf + longsPerEU * erow; - for (k = 0; k < longsPerEU; k++) - temp[k] ^= *(ebuf_current + k); -#endif - } - /* step 3: ^P to obtain the failed data (failed 2). P can be - * proved to be actually (principle diagonal) After this - * step, temp[k] = (failed 2), the failed data to be recovered */ -#if RF_EO_MATRIX_DIM > 17 - for (k = 0; k < shortsPerEU; k++) - temp[k] ^= P[k]; - /* Put the data to the destination buffer */ - for (k = 0; k < shortsPerEU; k++) - dest_larger_current[k] = temp[k]; -#elif RF_EO_MATRIX_DIM == 17 - for (k = 0; k < longsPerEU; k++) - temp[k] ^= P[k]; - /* Put the data to the destination buffer */ - for (k = 0; k < longsPerEU; k++) - dest_larger_current[k] = temp[k]; -#endif - - /** THE FOLLOWING DO THE HORIZONTAL XOR **/ - /* step 1: ^(SUM of A(row,0..m-3)), ie. all nonfailed data - * columes */ - for (j = 0; j < numDataCol; j++) { - if (j == f1 || j == f2) - continue; -#if RF_EO_MATRIX_DIM > 17 - rrdbuf_current = (short *) (rrdbuf[j]) + row * shortsPerEU; - for (k = 0; k < shortsPerEU; k++) - temp[k] ^= *(rrdbuf_current + k); -#elif RF_EO_MATRIX_DIM == 17 - rrdbuf_current = (long *) (rrdbuf[j]) + row * longsPerEU; - for (k = 0; k < longsPerEU; k++) - temp[k] ^= *(rrdbuf_current + k); -#endif - } - /* step 2: ^A(row,m-1) */ - /* step 3: Put the data to the destination buffer */ -#if RF_EO_MATRIX_DIM > 17 - pbuf_current = (short *) pbuf + shortsPerEU * row; - for (k = 0; k < shortsPerEU; k++) - temp[k] ^= *(pbuf_current + k); - for (k = 0; k < shortsPerEU; k++) - dest_smaller_current[k] = temp[k]; -#elif RF_EO_MATRIX_DIM == 17 - pbuf_current = (long *) pbuf + longsPerEU * row; - for (k = 0; k < longsPerEU; k++) - temp[k] ^= *(pbuf_current + k); - for (k = 0; k < longsPerEU; k++) - dest_smaller_current[k] = temp[k]; -#endif - count++; - } - /* Check if all Encoding Unit in the data buffer have been decoded, - * according EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime number, - * this algorithm will covered all buffer */ - RF_ASSERT(count == numRowInEncMatix); - RF_Free((char *) P, bytesPerEU); - RF_Free((char *) temp, bytesPerEU); -} - - -/*************************************************************************************** -* This function is called by double degragded read -* EO_200_CreateReadDAG -* -***************************************************************************************/ -int -rf_EvenOddDoubleRecoveryFunc(node) - RF_DagNode_t *node; -{ - int ndataParam = 0; - int np = node->numParams; - RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p; - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); - int i, prm, sector, nresults = node->numResults; - RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; - unsigned sosAddr; - int two = 0, mallc_one = 0, mallc_two = 0; /* flags to indicate if - * memory is allocated */ - int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); - RF_PhysDiskAddr_t *ppda, *ppda2, *epda, *epda2, *pda, *pda0, *pda1, - npda; - RF_RowCol_t fcol[2], fsuoff[2], fsuend[2], numDataCol = layoutPtr->numDataCol; - char **buf, *ebuf, *pbuf, *dest[2]; - long *suoff = NULL, *suend = NULL, *prmToCol = NULL, psuoff, esuoff; - RF_SectorNum_t startSector, endSector; - RF_Etimer_t timer; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - - RF_ETIMER_START(timer); - - /* Find out the number of parameters which are pdas for data - * information */ - for (i = 0; i <= np; i++) - if (((RF_PhysDiskAddr_t *) node->params[i].p)->type != RF_PDA_TYPE_DATA) { - ndataParam = i; - break; - } - RF_Malloc(buf, numDataCol * sizeof(char *), (char **)); - if (ndataParam != 0) { - RF_Malloc(suoff, ndataParam * sizeof(long), (long *)); - RF_Malloc(suend, ndataParam * sizeof(long), (long *)); - RF_Malloc(prmToCol, ndataParam * sizeof(long), (long *)); - } - if (asmap->failedPDAs[1] && - (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) { - RF_ASSERT(0); /* currently, no support for this situation */ - ppda = node->params[np - 6].p; - ppda2 = node->params[np - 5].p; - RF_ASSERT(ppda2->type == RF_PDA_TYPE_PARITY); - epda = node->params[np - 4].p; - epda2 = node->params[np - 3].p; - RF_ASSERT(epda2->type == RF_PDA_TYPE_Q); - two = 1; - } else { - ppda = node->params[np - 4].p; - epda = node->params[np - 3].p; - psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector); - esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector); - RF_ASSERT(psuoff == esuoff); - } - /* - the followings have three goals: - 1. determine the startSector to begin decoding and endSector to end decoding. - 2. determine the colume numbers of the two failed disks. - 3. determine the offset and end offset of the access within each failed stripe unit. - */ - if (nresults == 1) { - /* find the startSector to begin decoding */ - pda = node->results[0]; - bzero(pda->bufPtr, bytesPerSector * pda->numSector); - fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector); - fsuend[0] = fsuoff[0] + pda->numSector; - startSector = fsuoff[0]; - endSector = fsuend[0]; - - /* find out the column of failed disk being accessed */ - fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress); - - /* find out the other failed colume not accessed */ - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - for (i = 0; i < numDataCol; i++) { - npda.raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0); - /* skip over dead disks */ - if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status)) - if (i != fcol[0]) - break; - } - RF_ASSERT(i < numDataCol); - fcol[1] = i; - } else { - RF_ASSERT(nresults == 2); - pda0 = node->results[0]; - bzero(pda0->bufPtr, bytesPerSector * pda0->numSector); - pda1 = node->results[1]; - bzero(pda1->bufPtr, bytesPerSector * pda1->numSector); - /* determine the failed colume numbers of the two failed - * disks. */ - fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress); - fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress); - /* determine the offset and end offset of the access within - * each failed stripe unit. */ - fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector); - fsuend[0] = fsuoff[0] + pda0->numSector; - fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector); - fsuend[1] = fsuoff[1] + pda1->numSector; - /* determine the startSector to begin decoding */ - startSector = RF_MIN(pda0->startSector, pda1->startSector); - /* determine the endSector to end decoding */ - endSector = RF_MAX(fsuend[0], fsuend[1]); - } - /* - assign the beginning sector and the end sector for each parameter - find out the corresponding colume # for each parameter - */ - for (prm = 0; prm < ndataParam; prm++) { - pda = node->params[prm].p; - suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector); - suend[prm] = suoff[prm] + pda->numSector; - prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress); - } - /* 'sector' is the sector for the current decoding algorithm. For each - * sector in the failed SU, find out the corresponding parameters that - * cover the current sector and that are needed for decoding of this - * sector in failed SU. 2. Find out if sector is in the shadow of any - * accessed failed SU. If not, malloc a temporary space of a sector in - * size. */ - for (sector = startSector; sector < endSector; sector++) { - if (nresults == 2) - if (!(fsuoff[0] <= sector && sector < fsuend[0]) && !(fsuoff[1] <= sector && sector < fsuend[1])) - continue; - for (prm = 0; prm < ndataParam; prm++) - if (suoff[prm] <= sector && sector < suend[prm]) - buf[(prmToCol[prm])] = ((RF_PhysDiskAddr_t *) node->params[prm].p)->bufPtr + - rf_RaidAddressToByte(raidPtr, sector - suoff[prm]); - /* find out if sector is in the shadow of any accessed failed - * SU. If yes, assign dest[0], dest[1] to point at suitable - * position of the buffer corresponding to failed SUs. if no, - * malloc a temporary space of a sector in size for - * destination of decoding. */ - RF_ASSERT(nresults == 1 || nresults == 2); - if (nresults == 1) { - dest[0] = ((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]); - /* Always malloc temp buffer to dest[1] */ - RF_Malloc(dest[1], bytesPerSector, (char *)); - bzero(dest[1], bytesPerSector); - mallc_two = 1; - } else { - if (fsuoff[0] <= sector && sector < fsuend[0]) - dest[0] = ((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]); - else { - RF_Malloc(dest[0], bytesPerSector, (char *)); - bzero(dest[0], bytesPerSector); - mallc_one = 1; - } - if (fsuoff[1] <= sector && sector < fsuend[1]) - dest[1] = ((RF_PhysDiskAddr_t *) node->results[1])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[1]); - else { - RF_Malloc(dest[1], bytesPerSector, (char *)); - bzero(dest[1], bytesPerSector); - mallc_two = 1; - } - RF_ASSERT(mallc_one == 0 || mallc_two == 0); - } - pbuf = ppda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - psuoff); - ebuf = epda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - esuoff); - /* - * After finish finding all needed sectors, call doubleEOdecode function for decoding - * one sector to destination. - */ - rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf); - /* free all allocated memory, and mark flag to indicate no - * memory is being allocated */ - if (mallc_one == 1) - RF_Free(dest[0], bytesPerSector); - if (mallc_two == 1) - RF_Free(dest[1], bytesPerSector); - mallc_one = mallc_two = 0; - } - RF_Free(buf, numDataCol * sizeof(char *)); - if (ndataParam != 0) { - RF_Free(suoff, ndataParam * sizeof(long)); - RF_Free(suend, ndataParam * sizeof(long)); - RF_Free(prmToCol, ndataParam * sizeof(long)); - } - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - if (tracerec) { - tracerec->q_us += RF_ETIMER_VAL_US(timer); - } - rf_GenericWakeupFunc(node, 0); -#if 1 - return (0); /* XXX is this even close!!?!?!!? GO */ -#endif -} - - -/* currently, only access of one of the two failed SU is allowed in this function. - * also, asmap->numStripeUnitsAccessed is limited to be one, the RaidFrame will break large access into - * many accesses of single stripe unit. - */ - -int -rf_EOWriteDoubleRecoveryFunc(node) - RF_DagNode_t *node; -{ - int np = node->numParams; - RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p; - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); - RF_SectorNum_t sector; - RF_RowCol_t col, scol; - int prm, i, j; - RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; - unsigned sosAddr; - unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); - RF_int64 numbytes; - RF_SectorNum_t startSector, endSector; - RF_PhysDiskAddr_t *ppda, *epda, *pda, *fpda, npda; - RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol; - char **buf; /* buf[0], buf[1], buf[2], ...etc. point to - * buffer storing data read from col0, col1, - * col2 */ - char *ebuf, *pbuf, *dest[2], *olddata[2]; - RF_Etimer_t timer; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - - RF_ASSERT(asmap->numDataFailed == 1); /* currently only support this - * case, the other failed SU - * is not being accessed */ - RF_ETIMER_START(timer); - RF_Malloc(buf, numDataCol * sizeof(char *), (char **)); - - ppda = node->results[0];/* Instead of being buffers, node->results[0] - * and [1] are Ppda and Epda */ - epda = node->results[1]; - fpda = asmap->failedPDAs[0]; - - /* First, recovery the failed old SU using EvenOdd double decoding */ - /* determine the startSector and endSector for decoding */ - startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector); - endSector = startSector + fpda->numSector; - /* Assign buf[col] pointers to point to each non-failed colume and - * initialize the pbuf and ebuf to point at the beginning of each - * source buffers and destination buffers */ - for (prm = 0; prm < numDataCol - 2; prm++) { - pda = (RF_PhysDiskAddr_t *) node->params[prm].p; - col = rf_EUCol(layoutPtr, pda->raidAddress); - buf[col] = pda->bufPtr; - } - /* pbuf and ebuf: they will change values as double recovery decoding - * goes on */ - pbuf = ppda->bufPtr; - ebuf = epda->bufPtr; - /* find out the logical colume numbers in the encoding matrix of the - * two failed columes */ - fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress); - - /* find out the other failed colume not accessed this time */ - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - for (i = 0; i < numDataCol; i++) { - npda.raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0); - /* skip over dead disks */ - if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status)) - if (i != fcol[0]) - break; - } - RF_ASSERT(i < numDataCol); - fcol[1] = i; - /* assign temporary space to put recovered failed SU */ - numbytes = fpda->numSector * bytesPerSector; - RF_Malloc(olddata[0], numbytes, (char *)); - RF_Malloc(olddata[1], numbytes, (char *)); - dest[0] = olddata[0]; - dest[1] = olddata[1]; - bzero(olddata[0], numbytes); - bzero(olddata[1], numbytes); - /* Begin the recovery decoding, initially buf[j], ebuf, pbuf, dest[j] - * have already pointed at the beginning of each source buffers and - * destination buffers */ - for (sector = startSector, i = 0; sector < endSector; sector++, i++) { - rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf); - for (j = 0; j < numDataCol; j++) - if ((j != fcol[0]) && (j != fcol[1])) - buf[j] += bytesPerSector; - dest[0] += bytesPerSector; - dest[1] += bytesPerSector; - ebuf += bytesPerSector; - pbuf += bytesPerSector; - } - /* after recovery, the buffer pointed by olddata[0] is the old failed - * data. With new writing data and this old data, use small write to - * calculate the new redundant informations */ - /* node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of - * Rrd; params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol - * -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ; params[ - * PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol - * +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1] are Pdas of - * wudNodes; For current implementation, we assume the simplest case: - * asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1 - * ie. PDAPerDisk = 1 then node->params[numDataCol] must be the new - * data to be writen to the failed disk. We first bxor the new data - * into the old recovered data, then do the same things as small - * write. */ - - rf_bxor(((RF_PhysDiskAddr_t *) node->params[numDataCol].p)->bufPtr, olddata[0], numbytes, node->dagHdr->bp); - /* do new 'E' calculation */ - /* find out the corresponding colume in encoding matrix for write - * colume to be encoded into redundant disk 'E' */ - scol = rf_EUCol(layoutPtr, fpda->raidAddress); - /* olddata[0] now is source buffer pointer; epda->bufPtr is the dest - * buffer pointer */ - rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2, epda->bufPtr, fpda->numSector); - - /* do new 'P' calculation */ - rf_bxor(olddata[0], ppda->bufPtr, numbytes, node->dagHdr->bp); - /* Free the allocated buffer */ - RF_Free(olddata[0], numbytes); - RF_Free(olddata[1], numbytes); - RF_Free(buf, numDataCol * sizeof(char *)); - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - if (tracerec) { - tracerec->q_us += RF_ETIMER_VAL_US(timer); - } - rf_GenericWakeupFunc(node, 0); - return (0); -} -#endif /* RF_INCLUDE_EVENODD > 0 */ diff --git a/sys/dev/raidframe/rf_evenodd_dagfuncs.h b/sys/dev/raidframe/rf_evenodd_dagfuncs.h deleted file mode 100644 index cf5028b..0000000 --- a/sys/dev/raidframe/rf_evenodd_dagfuncs.h +++ /dev/null @@ -1,79 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_evenodd_dagfuncs.h,v 1.2 1999/02/05 00:06:11 oster Exp $ */ -/* - * rf_evenodd_dagfuncs.h - */ -/* - * Copyright (c) 1996 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Chang-Ming Wu - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_EVENODD_DAGFUNCS_H_ -#define _RF__RF_EVENODD_DAGFUNCS_H_ - -extern RF_RedFuncs_t rf_EOSmallWriteEFuncs; -extern RF_RedFuncs_t rf_EOSmallWritePFuncs; -extern RF_RedFuncs_t rf_eoERecoveryFuncs; -extern RF_RedFuncs_t rf_eoPRecoveryFuncs; - -int rf_RegularPEFunc(RF_DagNode_t * node); -int rf_RegularONEFunc(RF_DagNode_t * node); -int rf_SimpleONEFunc(RF_DagNode_t * node); -void rf_RegularESubroutine(RF_DagNode_t * node, char *ebuf); -int rf_RegularEFunc(RF_DagNode_t * node); -void rf_DegrESubroutine(RF_DagNode_t * node, char *ebuf); -int rf_Degraded_100_EOFunc(RF_DagNode_t * node); -void -rf_e_EncOneSect(RF_RowCol_t srcLogicCol, char *srcSecbuf, - RF_RowCol_t destLogicCol, char *destSecbuf, int bytesPerSector); -void -rf_e_encToBuf(RF_Raid_t * raidPtr, RF_RowCol_t srcLogicCol, - char *srcbuf, RF_RowCol_t destLogicCol, char *destbuf, int numSector); -int rf_RecoveryEFunc(RF_DagNode_t * node); -int rf_EO_DegradedWriteEFunc(RF_DagNode_t * node); -void -rf_doubleEOdecode(RF_Raid_t * raidPtr, char **rrdbuf, char **dest, - RF_RowCol_t * fcol, char *pbuf, char *ebuf); -int rf_EvenOddDoubleRecoveryFunc(RF_DagNode_t * node); -int rf_EOWriteDoubleRecoveryFunc(RF_DagNode_t * node); - -#define rf_EUCol(_layoutPtr_, _addr_ ) \ -( (_addr_)%( (_layoutPtr_)->dataSectorsPerStripe ) )/((_layoutPtr_)->sectorsPerStripeUnit) - -#define rf_EO_Mod( _int1_, _int2_ ) \ -( ((_int1_) < 0)? (((_int1_)+(_int2_))%(_int2_)) : (_int1_)%(_int2_) ) - -#define rf_OffsetOfNextEUBoundary(_offset_, sec_per_eu) ((_offset_)/(sec_per_eu) + 1)*(sec_per_eu) - -#define RF_EO_MATRIX_DIM 17 - -/* - * RF_EO_MATRIX_DIM should be a prime number: and "bytesPerSector" should be - * dividable by ( RF_EO_MATRIX_DIM - 1) to fully encode and utilize the space - * in a sector, this number could also be 17. Tha later case doesn't apply - * for disk array larger than 17 columns totally. - */ - -#endif /* !_RF__RF_EVENODD_DAGFUNCS_H_ */ diff --git a/sys/dev/raidframe/rf_evenodd_dags.c b/sys/dev/raidframe/rf_evenodd_dags.c deleted file mode 100644 index cef32c2..0000000 --- a/sys/dev/raidframe/rf_evenodd_dags.c +++ /dev/null @@ -1,191 +0,0 @@ -/* $NetBSD: rf_evenodd_dags.c,v 1.2 1999/02/05 00:06:11 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * rf_evenodd_dags.c - */ -/* - * Copyright (c) 1996 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Chang-Ming Wu - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#include <dev/raidframe/rf_archs.h> - -#if RF_INCLUDE_EVENODD > 0 - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_acctrace.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_evenodd_dags.h> -#include <dev/raidframe/rf_evenodd.h> -#include <dev/raidframe/rf_evenodd_dagfuncs.h> -#include <dev/raidframe/rf_pq.h> -#include <dev/raidframe/rf_dagdegrd.h> -#include <dev/raidframe/rf_dagdegwr.h> -#include <dev/raidframe/rf_dagffwr.h> - - -/* - * Lost one data. - * Use P to reconstruct missing data. - */ -RF_CREATE_DAG_FUNC_DECL(rf_EO_100_CreateReadDAG) -{ - rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_eoPRecoveryFuncs); -} -/* - * Lost data + E. - * Use P to reconstruct missing data. - */ -RF_CREATE_DAG_FUNC_DECL(rf_EO_101_CreateReadDAG) -{ - rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_eoPRecoveryFuncs); -} -/* - * Lost data + P. - * Make E look like P, and use Eor for Xor, and we can - * use degraded read DAG. - */ -RF_CREATE_DAG_FUNC_DECL(rf_EO_110_CreateReadDAG) -{ - RF_PhysDiskAddr_t *temp; - /* swap P and E pointers to fake out the DegradedReadDAG code */ - temp = asmap->parityInfo; - asmap->parityInfo = asmap->qInfo; - asmap->qInfo = temp; - rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_eoERecoveryFuncs); -} -/* - * Lost two data. - */ -RF_CREATE_DAG_FUNC_DECL(rf_EOCreateDoubleDegradedReadDAG) -{ - rf_EO_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList); -} -/* - * Lost two data. - */ -RF_CREATE_DAG_FUNC_DECL(rf_EO_200_CreateReadDAG) -{ - rf_EOCreateDoubleDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList); -} -RF_CREATE_DAG_FUNC_DECL(rf_EO_100_CreateWriteDAG) -{ - if (asmap->numStripeUnitsAccessed != 1 && - asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) - RF_PANIC(); - rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2, (int (*) (RF_DagNode_t *)) rf_Degraded_100_EOFunc, RF_TRUE); -} -/* - * E is dead. Small write. - */ -RF_CREATE_DAG_FUNC_DECL(rf_EO_001_CreateSmallWriteDAG) -{ - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_EOSmallWritePFuncs, NULL); -} -/* - * E is dead. Large write. - */ -RF_CREATE_DAG_FUNC_DECL(rf_EO_001_CreateLargeWriteDAG) -{ - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularPFunc, RF_TRUE); -} -/* - * P is dead. Small write. - * Swap E + P, use single-degraded stuff. - */ -RF_CREATE_DAG_FUNC_DECL(rf_EO_010_CreateSmallWriteDAG) -{ - RF_PhysDiskAddr_t *temp; - /* swap P and E pointers to fake out the DegradedReadDAG code */ - temp = asmap->parityInfo; - asmap->parityInfo = asmap->qInfo; - asmap->qInfo = temp; - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_EOSmallWriteEFuncs, NULL); -} -/* - * P is dead. Large write. - * Swap E + P, use single-degraded stuff. - */ -RF_CREATE_DAG_FUNC_DECL(rf_EO_010_CreateLargeWriteDAG) -{ - RF_PhysDiskAddr_t *temp; - /* swap P and E pointers to fake out the code */ - temp = asmap->parityInfo; - asmap->parityInfo = asmap->qInfo; - asmap->qInfo = temp; - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularEFunc, RF_FALSE); -} -RF_CREATE_DAG_FUNC_DECL(rf_EO_011_CreateWriteDAG) -{ - rf_CreateNonRedundantWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - RF_IO_TYPE_WRITE); -} -RF_CREATE_DAG_FUNC_DECL(rf_EO_110_CreateWriteDAG) -{ - RF_PhysDiskAddr_t *temp; - - if (asmap->numStripeUnitsAccessed != 1 && - asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) { - RF_PANIC(); - } - /* swap P and E to fake out parity code */ - temp = asmap->parityInfo; - asmap->parityInfo = asmap->qInfo; - asmap->qInfo = temp; - rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, (int (*) (RF_DagNode_t *)) rf_EO_DegradedWriteEFunc, RF_FALSE); - /* is the regular E func the right one to call? */ -} -RF_CREATE_DAG_FUNC_DECL(rf_EO_101_CreateWriteDAG) -{ - if (asmap->numStripeUnitsAccessed != 1 && - asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) - RF_PANIC(); - rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RecoveryXorFunc, RF_TRUE); -} -RF_CREATE_DAG_FUNC_DECL(rf_EO_DoubleDegRead) -{ - rf_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList, - "Re", "EvenOddRecovery", rf_EvenOddDoubleRecoveryFunc); -} -RF_CREATE_DAG_FUNC_DECL(rf_EOCreateSmallWriteDAG) -{ - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_EOSmallWriteEFuncs); -} -RF_CREATE_DAG_FUNC_DECL(rf_EOCreateLargeWriteDAG) -{ - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2, rf_RegularPEFunc, RF_FALSE); -} -RF_CREATE_DAG_FUNC_DECL(rf_EO_200_CreateWriteDAG) -{ - rf_DoubleDegSmallWrite(raidPtr, asmap, dag_h, bp, flags, allocList, "Re", "We", "EOWrDDRecovery", rf_EOWriteDoubleRecoveryFunc); -} -#endif /* RF_INCLUDE_EVENODD > 0 */ diff --git a/sys/dev/raidframe/rf_evenodd_dags.h b/sys/dev/raidframe/rf_evenodd_dags.h deleted file mode 100644 index c4218a4..0000000 --- a/sys/dev/raidframe/rf_evenodd_dags.h +++ /dev/null @@ -1,64 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_evenodd_dags.h,v 1.2 1999/02/05 00:06:11 oster Exp $ */ -/* - * rf_evenodd_dags.h - */ -/* - * Copyright (c) 1996 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Chang-Ming Wu - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_EVENODD_DAGS_H_ -#define _RF__RF_EVENODD_DAGS_H_ - -#include <dev/raidframe/rf_types.h> - -#if RF_UTILITY == 0 -#include <dev/raidframe/rf_dag.h> - -/* extern decl's of the failure mode EO functions. - * swiped from rf_pqdeg.h - */ - -RF_CREATE_DAG_FUNC_DECL(rf_EO_100_CreateReadDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EO_101_CreateReadDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EO_110_CreateReadDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EO_200_CreateReadDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EOCreateDoubleDegradedReadDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EO_100_CreateWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EO_010_CreateSmallWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EO_001_CreateSmallWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EO_010_CreateLargeWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EO_001_CreateLargeWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EO_011_CreateWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EO_110_CreateWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EO_101_CreateWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EO_DoubleDegRead); -RF_CREATE_DAG_FUNC_DECL(rf_EOCreateSmallWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EOCreateLargeWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_EO_200_CreateWriteDAG); -#endif /* RF_UTILITY == 0 */ - -#endif /* !_RF__RF_EVENODD_DAGS_H_ */ diff --git a/sys/dev/raidframe/rf_fifo.c b/sys/dev/raidframe/rf_fifo.c deleted file mode 100644 index d5ce0d0..0000000 --- a/sys/dev/raidframe/rf_fifo.c +++ /dev/null @@ -1,238 +0,0 @@ -/* $NetBSD: rf_fifo.c,v 1.5 2000/03/04 03:27:13 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/*************************************************** - * - * rf_fifo.c -- prioritized fifo queue code. - * There are only two priority levels: hi and lo. - * - * Aug 4, 1994, adapted from raidSim version (MCH) - * - ***************************************************/ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_stripelocks.h> -#include <dev/raidframe/rf_layout.h> -#include <dev/raidframe/rf_diskqueue.h> -#include <dev/raidframe/rf_fifo.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_options.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_types.h> - -/* just malloc a header, zero it (via calloc), and return it */ -/*ARGSUSED*/ -void * -rf_FifoCreate(sectPerDisk, clList, listp) - RF_SectorCount_t sectPerDisk; - RF_AllocListElem_t *clList; - RF_ShutdownList_t **listp; -{ - RF_FifoHeader_t *q; - - RF_CallocAndAdd(q, 1, sizeof(RF_FifoHeader_t), (RF_FifoHeader_t *), clList); - q->hq_count = q->lq_count = 0; - return ((void *) q); -} - -void -rf_FifoEnqueue(q_in, elem, priority) - void *q_in; - RF_DiskQueueData_t *elem; - int priority; -{ - RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in; - - RF_ASSERT(priority == RF_IO_NORMAL_PRIORITY || priority == RF_IO_LOW_PRIORITY); - - elem->next = NULL; - if (priority == RF_IO_NORMAL_PRIORITY) { - if (!q->hq_tail) { - RF_ASSERT(q->hq_count == 0 && q->hq_head == NULL); - q->hq_head = q->hq_tail = elem; - } else { - RF_ASSERT(q->hq_count != 0 && q->hq_head != NULL); - q->hq_tail->next = elem; - q->hq_tail = elem; - } - q->hq_count++; - } else { - RF_ASSERT(elem->next == NULL); - if (rf_fifoDebug) { - printf("raid%d: fifo: ENQ lopri\n", - elem->raidPtr->raidid); - } - if (!q->lq_tail) { - RF_ASSERT(q->lq_count == 0 && q->lq_head == NULL); - q->lq_head = q->lq_tail = elem; - } else { - RF_ASSERT(q->lq_count != 0 && q->lq_head != NULL); - q->lq_tail->next = elem; - q->lq_tail = elem; - } - q->lq_count++; - } - if ((q->hq_count + q->lq_count) != elem->queue->queueLength) { - printf("Queue lengths differ!: %d %d %d\n", - q->hq_count, q->lq_count, (int) elem->queue->queueLength); - printf("%d %d %d %d\n", - (int) elem->queue->numOutstanding, - (int) elem->queue->maxOutstanding, - (int) elem->queue->row, - (int) elem->queue->col); - } - RF_ASSERT((q->hq_count + q->lq_count) == elem->queue->queueLength); -} - -RF_DiskQueueData_t * -rf_FifoDequeue(q_in) - void *q_in; -{ - RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in; - RF_DiskQueueData_t *nd; - - RF_ASSERT(q); - if (q->hq_head) { - RF_ASSERT(q->hq_count != 0 && q->hq_tail != NULL); - nd = q->hq_head; - q->hq_head = q->hq_head->next; - if (!q->hq_head) - q->hq_tail = NULL; - nd->next = NULL; - q->hq_count--; - } else - if (q->lq_head) { - RF_ASSERT(q->lq_count != 0 && q->lq_tail != NULL); - nd = q->lq_head; - q->lq_head = q->lq_head->next; - if (!q->lq_head) - q->lq_tail = NULL; - nd->next = NULL; - q->lq_count--; - if (rf_fifoDebug) { - printf("raid%d: fifo: DEQ lopri %lx\n", - nd->raidPtr->raidid, (long) nd); - } - } else { - RF_ASSERT(q->hq_count == 0 && q->lq_count == 0 && q->hq_tail == NULL && q->lq_tail == NULL); - nd = NULL; - } - return (nd); -} - -/* Return ptr to item at head of queue. Used to examine request - * info without actually dequeueing the request. - */ -RF_DiskQueueData_t * -rf_FifoPeek(void *q_in) -{ - RF_DiskQueueData_t *headElement = NULL; - RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in; - - RF_ASSERT(q); - if (q->hq_head) - headElement = q->hq_head; - else - if (q->lq_head) - headElement = q->lq_head; - return (headElement); -} -/* We sometimes need to promote a low priority access to a regular priority access. - * Currently, this is only used when the user wants to write a stripe which is currently - * under reconstruction. - * This routine will promote all accesses tagged with the indicated parityStripeID from - * the low priority queue to the end of the normal priority queue. - * We assume the queue is locked upon entry. - */ -int -rf_FifoPromote(q_in, parityStripeID, which_ru) - void *q_in; - RF_StripeNum_t parityStripeID; - RF_ReconUnitNum_t which_ru; -{ - RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in; - RF_DiskQueueData_t *lp = q->lq_head, *pt = NULL; /* lp = lo-pri queue - * pointer, pt = trailer */ - int retval = 0; - - while (lp) { - - /* search for the indicated parity stripe in the low-pri queue */ - if (lp->parityStripeID == parityStripeID && lp->which_ru == which_ru) { - /* printf("FifoPromote: promoting access for psid - * %ld\n",parityStripeID); */ - if (pt) - pt->next = lp->next; /* delete an entry other - * than the first */ - else - q->lq_head = lp->next; /* delete the head entry */ - - if (!q->lq_head) - q->lq_tail = NULL; /* we deleted the only - * entry */ - else - if (lp == q->lq_tail) - q->lq_tail = pt; /* we deleted the tail - * entry */ - - lp->next = NULL; - q->lq_count--; - - if (q->hq_tail) { - q->hq_tail->next = lp; - q->hq_tail = lp; - } - /* append to hi-priority queue */ - else { - q->hq_head = q->hq_tail = lp; - } - q->hq_count++; - - /* UpdateShortestSeekFinishTimeForced(lp->requestPtr, - * lp->diskState); *//* deal with this later, if ever */ - - lp = (pt) ? pt->next : q->lq_head; /* reset low-pri pointer - * and continue */ - retval++; - - } else { - pt = lp; - lp = lp->next; - } - } - - /* sanity check. delete this if you ever put more than one entry in - * the low-pri queue */ - RF_ASSERT(retval == 0 || retval == 1); - return (retval); -} diff --git a/sys/dev/raidframe/rf_fifo.h b/sys/dev/raidframe/rf_fifo.h deleted file mode 100644 index 9392f08..0000000 --- a/sys/dev/raidframe/rf_fifo.h +++ /dev/null @@ -1,62 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_fifo.h,v 1.3 1999/02/05 00:06:11 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * rf_fifo.h -- prioritized FIFO queue code. - * - * 4-9-93 Created (MCH) - */ - - -#ifndef _RF__RF_FIFO_H_ -#define _RF__RF_FIFO_H_ - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_diskqueue.h> - -typedef struct RF_FifoHeader_s { - RF_DiskQueueData_t *hq_head, *hq_tail; /* high priority requests */ - RF_DiskQueueData_t *lq_head, *lq_tail; /* low priority requests */ - int hq_count, lq_count; /* debug only */ -} RF_FifoHeader_t; - -extern void * -rf_FifoCreate(RF_SectorCount_t sectPerDisk, - RF_AllocListElem_t * clList, RF_ShutdownList_t ** listp); -extern void -rf_FifoEnqueue(void *q_in, RF_DiskQueueData_t * elem, - int priority); -extern RF_DiskQueueData_t *rf_FifoDequeue(void *q_in); -extern RF_DiskQueueData_t *rf_FifoPeek(void *q_in); -extern int -rf_FifoPromote(void *q_in, RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru); - -#endif /* !_RF__RF_FIFO_H_ */ diff --git a/sys/dev/raidframe/rf_freebsdkintf.c b/sys/dev/raidframe/rf_freebsdkintf.c deleted file mode 100644 index 13f5abb..0000000 --- a/sys/dev/raidframe/rf_freebsdkintf.c +++ /dev/null @@ -1,3192 +0,0 @@ -/*- - * Copyright (c) 2002 Scott Long <scottl@freebsd.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -/* $NetBSD: rf_netbsdkintf.c,v 1.105 2001/04/05 02:48:51 oster Exp $ */ -/*- - * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. - * All rights reserved. - * - * This code is derived from software contributed to The NetBSD Foundation - * by Greg Oster; Jason R. Thorpe. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. - * 4. Neither the name of The NetBSD Foundation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * Copyright (c) 1988 University of Utah. - * Copyright (c) 1990, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * the Systems Programming Group of the University of Utah Computer - * Science Department. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: Utah $Hdr: cd.c 1.6 90/11/28$ - * - * @(#)cd.c 8.2 (Berkeley) 11/16/93 - */ - - - - -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Authors: Mark Holland, Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/*********************************************************** - * - * rf_kintf.c -- the kernel interface routines for RAIDframe - * - ***********************************************************/ - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/errno.h> -#include <sys/param.h> -#include <sys/queue.h> -#include <sys/stat.h> -#include <sys/ioccom.h> -#include <sys/filio.h> -#include <sys/filedesc.h> -#include <sys/fcntl.h> -#include <sys/systm.h> -#include <sys/namei.h> -#include <sys/vnode.h> -#include <sys/bio.h> -#include <sys/buf.h> -#include <sys/conf.h> -#include <sys/disk.h> -#include <sys/lock.h> -#include <sys/reboot.h> -#include <sys/module.h> -#include <vm/uma.h> -#include <geom/geom_disk.h> - -#include "opt_raid.h" -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_raidframe.h> -#include <dev/raidframe/rf_copyback.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagflags.h> -#include <dev/raidframe/rf_desc.h> -#include <dev/raidframe/rf_diskqueue.h> -#include <dev/raidframe/rf_acctrace.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_kintf.h> -#include <dev/raidframe/rf_options.h> -#include <dev/raidframe/rf_driver.h> -#include <dev/raidframe/rf_parityscan.h> -#include <dev/raidframe/rf_debugprint.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_configure.h> - -RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex) - -static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a - * spare table */ -static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from - * installation process */ - -/* prototypes */ -static void KernelWakeupFunc(struct bio *); -static void InitBP(struct bio *, struct vnode *, unsigned rw_flag, - dev_t dev, RF_SectorNum_t startSect, - RF_SectorCount_t numSect, caddr_t buf, - void (*cbFunc) (struct bio *), void *cbArg, - int logBytesPerSector, struct proc * b_proc); -static struct raid_softc *raidinit(RF_Raid_t *); -static void rf_search_label(dev_t, struct disklabel *, - RF_AutoConfig_t **) __unused; - -static int raid_modevent(module_t, int, void*); -void raidattach(void); - -disk_open_t raidopen; -disk_close_t raidclose; -disk_ioctl_t raidioctl; -disk_strategy_t raidstrategy; - -d_open_t raidctlopen; -d_close_t raidctlclose; -d_ioctl_t raidctlioctl; - -static struct cdevsw raidctl_cdevsw = { - .d_version = D_VERSION, - .d_flags = D_NEEDGIANT, - .d_open = raidctlopen, - .d_close = raidctlclose, - .d_ioctl = raidctlioctl, - .d_name = "raidctl", -}; - -/* - * Pilfered from ccd.c - */ - -struct raidbuf { - struct bio rf_buf; /* new I/O buf. MUST BE FIRST!!! */ - struct bio *rf_obp; /* ptr. to original I/O buf */ - int rf_flags; /* misc. flags */ - RF_DiskQueueData_t *req;/* the request that this was part of.. */ -}; - - -#define RAIDGETBUF(sc) uma_zalloc((sc)->sc_cbufpool, M_NOWAIT) -#define RAIDPUTBUF(sc, cbp) uma_zfree((sc)->sc_cbufpool, cbp) - -#define RF_MAX_ARRAYS 32 - -/* Raid control device */ -struct raidctl_softc { - dev_t sc_dev; /* Device node */ - int sc_flags; /* flags */ - int sc_numraid; /* Number of configured raid devices */ - struct raid_softc *sc_raiddevs[RF_MAX_ARRAYS]; -}; - -struct raid_softc { - dev_t sc_parent_dev; - int sc_flags; /* flags */ - int sc_busycount; /* How many times are we opened? */ - size_t sc_size; /* size of the raid device */ - dev_t sc_parent; /* Parent device */ - struct disk *sc_disk; /* generic disk device info */ - uma_zone_t sc_cbufpool; /* component buffer pool */ - RF_Raid_t *raidPtr; /* Raid information struct */ - struct bio_queue_head bio_queue; /* used for the device queue */ -}; -/* sc_flags */ -#define RAIDF_OPEN 0x01 /* unit has been initialized */ -#define RAIDF_WLABEL 0x02 /* label area is writable */ -#define RAIDF_LABELLING 0x04 /* unit is currently being labelled */ -#define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */ -#define RAIDF_LOCKED 0x80 /* unit is locked */ - -/* - * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. - * Be aware that large numbers can allow the driver to consume a lot of - * kernel memory, especially on writes, and in degraded mode reads. - * - * For example: with a stripe width of 64 blocks (32k) and 5 disks, - * a single 64K write will typically require 64K for the old data, - * 64K for the old parity, and 64K for the new parity, for a total - * of 192K (if the parity buffer is not re-used immediately). - * Even it if is used immedately, that's still 128K, which when multiplied - * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. - * - * Now in degraded mode, for example, a 64K read on the above setup may - * require data reconstruction, which will require *all* of the 4 remaining - * disks to participate -- 4 * 32K/disk == 128K again. - */ - -#ifndef RAIDOUTSTANDING -#define RAIDOUTSTANDING 10 -#endif - -static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, struct disk*); -static int raidlock(struct raid_softc *); -static void raidunlock(struct raid_softc *); - -static void rf_markalldirty(RF_Raid_t *); - -static dev_t raidctl_dev; - -void rf_ReconThread(struct rf_recon_req *); -/* XXX what I want is: */ -/*void rf_ReconThread(RF_Raid_t *raidPtr); */ -void rf_RewriteParityThread(RF_Raid_t *raidPtr); -void rf_CopybackThread(RF_Raid_t *raidPtr); -void rf_ReconstructInPlaceThread(struct rf_recon_req *); -void rf_buildroothack(void *, struct raidctl_softc *); - -RF_AutoConfig_t *rf_find_raid_components(void); -RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); -static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); -static int rf_reasonable_label(RF_ComponentLabel_t *); -void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); -int rf_set_autoconfig(RF_Raid_t *, int); -int rf_set_rootpartition(RF_Raid_t *, int); -void rf_release_all_vps(RF_ConfigSet_t *); -void rf_cleanup_config_set(RF_ConfigSet_t *); -int rf_have_enough_components(RF_ConfigSet_t *); -int rf_auto_config_set(RF_ConfigSet_t *, int *, struct raidctl_softc *); -static int raidgetunit(struct raidctl_softc *, int); -static int raidshutdown(void); - -void -raidattach(void) -{ - struct raidctl_softc *parent_sc = NULL; - RF_AutoConfig_t *ac_list; /* autoconfig list */ - RF_ConfigSet_t *config_sets; - int autoconfig = 0; - - /* This is where all the initialization stuff gets done. */ - - if(rf_mutex_init(&rf_sparet_wait_mutex, __FUNCTION__)) { - rf_printf(0, "RAIDframe: failed to initialize mutexes\n"); - return; - } - - rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; - - if (rf_BootRaidframe() != 0) { - rf_printf(0, "Serious error booting RAIDframe!!\n"); - return; - } - - rf_printf(0, "Kernelized RAIDframe activated\n"); - MALLOC(parent_sc, struct raidctl_softc *, sizeof(*parent_sc), - M_RAIDFRAME, M_NOWAIT|M_ZERO); - if (parent_sc == NULL) { - RF_PANIC(); - return; - } - - parent_sc->sc_dev= make_dev(&raidctl_cdevsw, 0, UID_ROOT, GID_WHEEL, - 0600, "raidctl"); - parent_sc->sc_dev->si_drv1 = parent_sc; - raidctl_dev = parent_sc->sc_dev; - -#if RAID_AUTOCONFIG - autoconfig = 1; -#endif - - if (autoconfig) { - /* 1. locate all RAID components on the system */ - - rf_printf(0, "Searching for raid components...\n"); - ac_list = rf_find_raid_components(); - if (ac_list == NULL) - return; - - /* 2. sort them into their respective sets */ - - config_sets = rf_create_auto_sets(ac_list); - - /* 3. evaluate each set and configure the valid ones - This gets done in rf_buildroothack() */ - - /* schedule the creation of the thread to do the - "/ on RAID" stuff */ - - rf_buildroothack(config_sets, parent_sc); -#if 0 - kthread_create(rf_buildroothack,config_sets); - -#endif /* RAID_AUTOCONFIG */ - } -} - -void -rf_buildroothack(arg, parent_sc) - void *arg; - struct raidctl_softc *parent_sc; -{ - RF_ConfigSet_t *config_sets = arg; - RF_ConfigSet_t *cset; - RF_ConfigSet_t *next_cset; - int retcode; - int raidID; - int rootID; - int num_root; - - rootID = 0; - num_root = 0; - cset = config_sets; - while(cset != NULL ) { - next_cset = cset->next; - if (rf_have_enough_components(cset) && - cset->ac->clabel->autoconfigure==1) { - retcode = rf_auto_config_set(cset, &raidID, parent_sc); - if (!retcode) { - if (cset->rootable) { - rootID = raidID; - num_root++; - } - } else { - /* The autoconfig didn't work :( */ - rf_printf(1, "Autoconfig failed with code %d" - "for raid%d\n", retcode, raidID); - rf_release_all_vps(cset); - } - } else { - /* we're not autoconfiguring this set... - release the associated resources */ - rf_release_all_vps(cset); - } - /* cleanup */ - rf_cleanup_config_set(cset); - cset = next_cset; - } - if (boothowto & RB_ASKNAME) { - /* We don't auto-config... */ - } else { - /* They didn't ask, and we found something bootable... */ - -#if 0 - if (num_root == 1) { - booted_device = &raidrootdev[rootID]; - } else if (num_root > 1) { - /* we can't guess.. require the user to answer... */ - boothowto |= RB_ASKNAME; - } -#endif - } -} - -int -raidctlopen(dev_t dev, int flags, int fmt, struct thread *td) -{ - struct raidctl_softc *parent_sc; - - parent_sc = dev->si_drv1; - - if ((parent_sc->sc_flags & RAIDF_OPEN) != 0) - return (EBUSY); - - parent_sc->sc_flags |= RAIDF_OPEN; - return (0); -} - -int -raidctlclose(dev_t dev, int flags, int fmt, struct thread *td) -{ - struct raidctl_softc *parent_sc; - - parent_sc = dev->si_drv1; - - parent_sc->sc_flags &= ~RAIDF_OPEN; - return (0); -} - -int -raidctlioctl(dev_t dev, u_long cmd, caddr_t data, int flags, struct thread *td) -{ - struct raidctl_softc *parent_sc; - struct raid_softc *sc; - RF_Config_t *u_cfg, *k_cfg; - RF_Raid_t *raidPtr; - u_char *specific_buf; - u_int unit; - int retcode = 0; - - parent_sc = dev->si_drv1; - - switch (cmd) { - /* configure the system */ - case RAIDFRAME_CONFIGURE: - - /* copy-in the configuration information */ - /* data points to a pointer to the configuration structure */ - - u_cfg = *((RF_Config_t **) data); - RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *)); - if (k_cfg == NULL) { - return (ENOMEM); - } - retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg, - sizeof(RF_Config_t)); - if (retcode) { - RF_Free(k_cfg, sizeof(RF_Config_t)); - rf_printf(2, "raidctlioctl: retcode=%d copyin.1\n", - retcode); - return (retcode); - } - /* allocate a buffer for the layout-specific data, and copy it - * in */ - if (k_cfg->layoutSpecificSize) { - if (k_cfg->layoutSpecificSize > 10000) { - /* sanity check */ - RF_Free(k_cfg, sizeof(RF_Config_t)); - return (EINVAL); - } - RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, - (u_char *)); - if (specific_buf == NULL) { - RF_Free(k_cfg, sizeof(RF_Config_t)); - return (ENOMEM); - } - retcode = copyin(k_cfg->layoutSpecific, - (caddr_t) specific_buf, - k_cfg->layoutSpecificSize); - if (retcode) { - RF_Free(specific_buf, - k_cfg->layoutSpecificSize); - RF_Free(k_cfg, sizeof(RF_Config_t)); - rf_printf(2, "raidctlioctl: retcode=%d " - "copyin.2\n", retcode); - return (retcode); - } - } else - specific_buf = NULL; - k_cfg->layoutSpecific = specific_buf; - - /* should do some kind of sanity check on the configuration. - * Store the sum of all the bytes in the last byte? */ - - /* configure the system */ - - RF_Malloc(raidPtr, sizeof(*raidPtr), (RF_Raid_t *)); - if (raidPtr == NULL) { - rf_printf(0, "No memory for raid device\n"); - RF_Free(k_cfg, sizeof(RF_Config_t)); - retcode = ENOMEM; - } - bzero((char *) raidPtr, sizeof(RF_Raid_t)); - - /* Request a unit number for this soon-to-be device. */ - unit = raidgetunit(parent_sc, 0); - if (unit == -1) { - rf_printf(0, "Cannot allocate raid unit\n"); - RF_Free(raidPtr, sizeof(*raidPtr)); - goto out; - } - raidPtr->raidid = unit; - - if ((retcode = rf_Configure(raidPtr, k_cfg, NULL)) == 0) { - - /* allow this many simultaneous IO's to - this RAID device */ - raidPtr->openings = RAIDOUTSTANDING; - - parent_sc->sc_raiddevs[unit] = raidinit(raidPtr); - if (parent_sc->sc_raiddevs[unit] == NULL) { - rf_printf(0, "Could not create raid device\n"); - RF_Free(raidPtr, sizeof(*raidPtr)); - goto out; - } - parent_sc->sc_numraid++; - ((struct raid_softc *)raidPtr->sc)->sc_parent_dev = dev; - rf_markalldirty(raidPtr); - } else { - parent_sc->sc_raiddevs[unit] = NULL; - RF_Free(raidPtr, sizeof(*raidPtr)); - } - -out: - /* free the buffers. No return code here. */ - if (k_cfg->layoutSpecificSize) { - RF_Free(specific_buf, k_cfg->layoutSpecificSize); - } - RF_Free(k_cfg, sizeof(RF_Config_t)); - break; - - case RAIDFRAME_SHUTDOWN: - - unit = *(u_int *)data; - if ((unit >= RF_MAX_ARRAYS) || - (parent_sc->sc_raiddevs[unit] == NULL)) - return (EINVAL); - - sc = parent_sc->sc_raiddevs[unit]; - if ((retcode = raidlock(sc)) != 0) - return (retcode); - - /* - * If somebody has a partition mounted, we shouldn't - * shutdown. - */ - - if ((sc->sc_flags & RAIDF_OPEN) != 0) { - raidunlock(sc); - return (EBUSY); - } - - rf_printf(0, "Shutting down RAIDframe engine\n"); - retcode = rf_Shutdown(sc->raidPtr); - RF_THREADGROUP_WAIT_STOP(&sc->raidPtr->engine_tg); - - disk_destroy(sc->sc_disk); - raidunlock(sc); - - /* XXX Need to be able to destroy the zone */ - uma_zdestroy(sc->sc_cbufpool); - - parent_sc->sc_numraid--; - parent_sc->sc_raiddevs[unit] = NULL; - - RF_Free(sc->raidPtr, sizeof(*raidPtr)); - RF_Free(sc, sizeof(*sc)); - - break; - - default: - retcode = ENOIOCTL; - } - - return (retcode); -} - -/* ARGSUSED */ -int -raidopen(struct disk *dp) -{ - struct raid_softc *sc; - int error = 0; - - sc = dp->d_drv1; - - if ((error = raidlock(sc)) != 0) - return (error); - dp = sc->sc_disk; - - rf_printf(1, "Opening raid device %s%d\n", dp->d_name, dp->d_unit); - - /* Generate overall disklabel */ - raidgetdefaultlabel(sc->raidPtr, sc, dp); - - if (sc->sc_busycount == 0) { - /* First one... mark things as dirty... Note that we *MUST* - have done a configure before this. I DO NOT WANT TO BE - SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED - THAT THEY BELONG TOGETHER!!!!! */ - /* XXX should check to see if we're only open for reading - here... If so, we needn't do this, but then need some - other way of keeping track of what's happened.. */ - - rf_markalldirty( sc->raidPtr ); - sc->sc_flags |= RAIDF_OPEN; - } - - /* Prevent this unit from being unconfigured while open. */ - sc->sc_busycount++; - - raidunlock(sc); - - return (error); - - -} -/* ARGSUSED */ -int -raidclose(struct disk *dp) -{ - struct raid_softc *sc; - int error = 0; - - sc = dp->d_drv1; - - if ((error = raidlock(sc)) != 0) - return (error); - - sc->sc_busycount--; - if (sc->sc_busycount == 0) { - sc->sc_flags &= ~RAIDF_OPEN; - rf_update_component_labels(sc->raidPtr, - RF_FINAL_COMPONENT_UPDATE); - } - - raidunlock(sc); - return (0); - -} - -void -raidstrategy(bp) - struct bio *bp; -{ - RF_Raid_t *raidPtr; - struct raid_softc *sc = bp->bio_disk->d_drv1; - int s; - - raidPtr = sc->raidPtr; - if (raidPtr == NULL) { - bp->bio_error = ENODEV; - bp->bio_flags |= BIO_ERROR; - bp->bio_resid = bp->bio_bcount; - biodone(bp); - return; - } - if (!raidPtr->valid) { - bp->bio_error = ENODEV; - bp->bio_flags |= BIO_ERROR; - bp->bio_resid = bp->bio_bcount; - biodone(bp); - return; - } - if (bp->bio_bcount == 0) { - rf_printf(2, "b_bcount is zero..\n"); - biodone(bp); - return; - } - - s = splbio(); - - bp->bio_resid = 0; - - /* stuff it onto our queue. XXX locking? */ - bioq_insert_tail(&sc->bio_queue, bp); - - raidstart(raidPtr); - - splx(s); -} - -int -raidioctl(dp, cmd, data, flag, td) - struct disk *dp; - u_long cmd; - void *data; - int flag; - struct thread *td; -{ - struct raid_softc *sc; - RF_Raid_t *raidPtr; - RF_RaidDisk_t *diskPtr; - RF_AccTotals_t *totals; - RF_DeviceConfig_t *d_cfg, **ucfgp; - struct rf_recon_req *rrcopy, *rr; - RF_ComponentLabel_t *clabel; - RF_ComponentLabel_t *ci_label; - RF_SingleComponent_t *sparePtr,*componentPtr; - RF_SingleComponent_t *hot_spare, *component; - RF_ProgressInfo_t progressInfo; - int retcode = 0; - int row, column; - int unit; - int i, j, d; - - sc = dp->d_drv1; - raidPtr = sc->raidPtr; - - rf_printf(2, "raidioctl: %s%d %ld\n", dp->d_name, dp->d_unit, cmd); - - switch (cmd) { - - case RAIDFRAME_GET_COMPONENT_LABEL: - /* need to read the component label for the disk indicated - by row,column in clabel */ - - /* For practice, let's get it directly fromdisk, rather - than from the in-core copy */ - RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ), - (RF_ComponentLabel_t *)); - if (clabel == NULL) - return (ENOMEM); - - bzero((char *) clabel, sizeof(RF_ComponentLabel_t)); - - bcopy(data, clabel, sizeof(RF_ComponentLabel_t)); - - row = clabel->row; - column = clabel->column; - - if ((row < 0) || (row >= raidPtr->numRow) || - (column < 0) || (column >= raidPtr->numCol + - raidPtr->numSpare)) { - RF_Free( clabel, sizeof(RF_ComponentLabel_t)); - return(EINVAL); - } - - raidread_component_label(raidPtr->Disks[row][column].dev, - raidPtr->raid_cinfo[row][column].ci_vp, - clabel ); - - bcopy(clabel, data, sizeof(RF_ComponentLabel_t)); - RF_Free( clabel, sizeof(RF_ComponentLabel_t)); - return (retcode); - - case RAIDFRAME_SET_COMPONENT_LABEL: - clabel = (RF_ComponentLabel_t *) data; - - /* XXX check the label for valid stuff... */ - /* Note that some things *should not* get modified -- - the user should be re-initing the labels instead of - trying to patch things. - */ - - rf_printf(1, "Got component label:\n"); - rf_printf(1, "Version: %d\n",clabel->version); - rf_printf(1, "Serial Number: %d\n",clabel->serial_number); - rf_printf(1, "Mod counter: %d\n",clabel->mod_counter); - rf_printf(1, "Row: %d\n", clabel->row); - rf_printf(1, "Column: %d\n", clabel->column); - rf_printf(1, "Num Rows: %d\n", clabel->num_rows); - rf_printf(1, "Num Columns: %d\n", clabel->num_columns); - rf_printf(1, "Clean: %d\n", clabel->clean); - rf_printf(1, "Status: %d\n", clabel->status); - - row = clabel->row; - column = clabel->column; - - if ((row < 0) || (row >= raidPtr->numRow) || - (column < 0) || (column >= raidPtr->numCol)) { - return(EINVAL); - } - - /* XXX this isn't allowed to do anything for now :-) */ - - /* XXX and before it is, we need to fill in the rest - of the fields!?!?!?! */ -#if 0 - raidwrite_component_label( - raidPtr->Disks[row][column].dev, - raidPtr->raid_cinfo[row][column].ci_vp, - clabel ); -#endif - return (0); - - case RAIDFRAME_INIT_LABELS: - MALLOC(ci_label, RF_ComponentLabel_t *, - sizeof(RF_ComponentLabel_t), M_RAIDFRAME, - M_WAITOK | M_ZERO); - clabel = (RF_ComponentLabel_t *) data; - /* - we only want the serial number from - the above. We get all the rest of the information - from the config that was used to create this RAID - set. - */ - - raidPtr->serial_number = clabel->serial_number; - - raid_init_component_label(raidPtr, ci_label); - ci_label->serial_number = clabel->serial_number; - - for(row=0;row<raidPtr->numRow;row++) { - ci_label->row = row; - for(column=0;column<raidPtr->numCol;column++) { - diskPtr = &raidPtr->Disks[row][column]; - if (!RF_DEAD_DISK(diskPtr->status)) { - ci_label->partitionSize = - diskPtr->partitionSize; - ci_label->column = column; - raidwrite_component_label( - raidPtr->Disks[row][column].dev, - raidPtr->raid_cinfo[row][column].ci_vp, - ci_label ); - } - } - } - - FREE(ci_label, M_RAIDFRAME); - return (retcode); - case RAIDFRAME_SET_AUTOCONFIG: - d = rf_set_autoconfig(raidPtr, *(int *) data); - rf_printf(1, "New autoconfig value is: %d\n", d); - *(int *) data = d; - return (retcode); - - case RAIDFRAME_SET_ROOT: - d = rf_set_rootpartition(raidPtr, *(int *) data); - rf_printf(1, "New rootpartition value is: %d\n", d); - *(int *) data = d; - return (retcode); - - /* initialize all parity */ - case RAIDFRAME_REWRITEPARITY: - - if (raidPtr->Layout.map->faultsTolerated == 0) { - /* Parity for RAID 0 is trivially correct */ - raidPtr->parity_good = RF_RAID_CLEAN; - return(0); - } - - if (raidPtr->parity_rewrite_in_progress == 1) { - /* Re-write is already in progress! */ - return(EINVAL); - } - - retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, - rf_RewriteParityThread, - raidPtr,"raid_parity"); - return (retcode); - - - case RAIDFRAME_ADD_HOT_SPARE: - MALLOC(hot_spare, RF_SingleComponent_t *, - sizeof(RF_SingleComponent_t), M_RAIDFRAME, - M_WAITOK | M_ZERO); - sparePtr = (RF_SingleComponent_t *) data; - memcpy( hot_spare, sparePtr, sizeof(RF_SingleComponent_t)); - retcode = rf_add_hot_spare(raidPtr, hot_spare); - FREE(hot_spare, M_RAIDFRAME); - return(retcode); - - case RAIDFRAME_REMOVE_HOT_SPARE: - return(retcode); - - case RAIDFRAME_DELETE_COMPONENT: - MALLOC(component, RF_SingleComponent_t *, - sizeof(RF_SingleComponent_t), M_RAIDFRAME, - M_WAITOK | M_ZERO); - componentPtr = (RF_SingleComponent_t *)data; - memcpy( component, componentPtr, - sizeof(RF_SingleComponent_t)); - retcode = rf_delete_component(raidPtr, component); - FREE(component, M_RAIDFRAME); - return(retcode); - - case RAIDFRAME_INCORPORATE_HOT_SPARE: - MALLOC(component, RF_SingleComponent_t *, - sizeof(RF_SingleComponent_t), M_RAIDFRAME, - M_WAITOK | M_ZERO); - componentPtr = (RF_SingleComponent_t *)data; - memcpy( component, componentPtr, - sizeof(RF_SingleComponent_t)); - retcode = rf_incorporate_hot_spare(raidPtr, component); - FREE(component, M_RAIDFRAME); - return(retcode); - - case RAIDFRAME_REBUILD_IN_PLACE: - - MALLOC(component, RF_SingleComponent_t *, - sizeof(RF_SingleComponent_t), M_RAIDFRAME, - M_WAITOK | M_ZERO); - if (raidPtr->Layout.map->faultsTolerated == 0) { - /* Can't do this on a RAID 0!! */ - FREE(component, M_RAIDFRAME); - return(EINVAL); - } - - if (raidPtr->recon_in_progress == 1) { - /* a reconstruct is already in progress! */ - FREE(component, M_RAIDFRAME); - return(EINVAL); - } - - componentPtr = (RF_SingleComponent_t *) data; - memcpy( component, componentPtr, - sizeof(RF_SingleComponent_t)); - row = component->row; - column = component->column; - unit = raidPtr->raidid; - rf_printf(0, "raid%d Rebuild: %d %d\n", unit, row, column); - if ((row < 0) || (row >= raidPtr->numRow) || - (column < 0) || (column >= raidPtr->numCol)) { - FREE(component, M_RAIDFRAME); - return(EINVAL); - } - - RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); - if (rrcopy == NULL) { - FREE(component, M_RAIDFRAME); - return(ENOMEM); - } - - rrcopy->raidPtr = (void *) raidPtr; - rrcopy->row = row; - rrcopy->col = column; - - retcode = RF_CREATE_THREAD(raidPtr->recon_thread, - rf_ReconstructInPlaceThread, - rrcopy,"raid_reconip"); - FREE(component, M_RAIDFRAME); - return(retcode); - - case RAIDFRAME_GET_UNIT: - - *(int *)data = raidPtr->raidid; - return (0); - - case RAIDFRAME_GET_INFO: - if (!raidPtr->valid) - return (ENODEV); - ucfgp = (RF_DeviceConfig_t **) data; - RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t), - (RF_DeviceConfig_t *)); - if (d_cfg == NULL) - return (ENOMEM); - bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t)); - d_cfg->rows = raidPtr->numRow; - d_cfg->cols = raidPtr->numCol; - d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol; - if (d_cfg->ndevs >= RF_MAX_DISKS) { - RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); - return (ENOMEM); - } - d_cfg->nspares = raidPtr->numSpare; - if (d_cfg->nspares >= RF_MAX_DISKS) { - RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); - return (ENOMEM); - } - d_cfg->maxqdepth = raidPtr->maxQueueDepth; - d = 0; - for (i = 0; i < d_cfg->rows; i++) { - for (j = 0; j < d_cfg->cols; j++) { - d_cfg->devs[d] = raidPtr->Disks[i][j]; - d++; - } - } - for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) { - d_cfg->spares[i] = raidPtr->Disks[0][j]; - } - - retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t)); - - RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); - - return (retcode); - - case RAIDFRAME_CHECK_PARITY: - *(int *) data = raidPtr->parity_good; - return (0); - - case RAIDFRAME_RESET_ACCTOTALS: - bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals)); - return (0); - - case RAIDFRAME_GET_ACCTOTALS: - totals = (RF_AccTotals_t *) data; - *totals = raidPtr->acc_totals; - return (0); - - case RAIDFRAME_KEEP_ACCTOTALS: - raidPtr->keep_acc_totals = *(int *)data; - return (0); - - case RAIDFRAME_GET_SIZE: - *(int *) data = raidPtr->totalSectors; - return (0); - - /* fail a disk & optionally start reconstruction */ - case RAIDFRAME_FAIL_DISK: - - if (raidPtr->Layout.map->faultsTolerated == 0) { - /* Can't do this on a RAID 0!! */ - return(EINVAL); - } - - rr = (struct rf_recon_req *) data; - - if (rr->row < 0 || rr->row >= raidPtr->numRow - || rr->col < 0 || rr->col >= raidPtr->numCol) - return (EINVAL); - - rf_printf(0, "%s%d: Failing the disk: row: %d col: %d\n", - dp->d_name, dp->d_unit, rr->row, rr->col); - - /* make a copy of the recon request so that we don't rely on - * the user's buffer */ - RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); - if (rrcopy == NULL) - return(ENOMEM); - bcopy(rr, rrcopy, sizeof(*rr)); - rrcopy->raidPtr = (void *) raidPtr; - - retcode = RF_CREATE_THREAD(raidPtr->recon_thread, - rf_ReconThread, - rrcopy,"raid_recon"); - return (0); - - /* invoke a copyback operation after recon on whatever disk - * needs it, if any */ - case RAIDFRAME_COPYBACK: - - if (raidPtr->Layout.map->faultsTolerated == 0) { - /* This makes no sense on a RAID 0!! */ - return(EINVAL); - } - - if (raidPtr->copyback_in_progress == 1) { - /* Copyback is already in progress! */ - return(EINVAL); - } - - retcode = RF_CREATE_THREAD(raidPtr->copyback_thread, - rf_CopybackThread, - raidPtr,"raid_copyback"); - return (retcode); - - /* return the percentage completion of reconstruction */ - case RAIDFRAME_CHECK_RECON_STATUS: - if (raidPtr->Layout.map->faultsTolerated == 0) { - /* This makes no sense on a RAID 0, so tell the - user it's done. */ - *(int *) data = 100; - return(0); - } - row = 0; /* XXX we only consider a single row... */ - if (raidPtr->status[row] != rf_rs_reconstructing) - *(int *) data = 100; - else - *(int *) data = raidPtr->reconControl[row]->percentComplete; - return (0); - case RAIDFRAME_CHECK_RECON_STATUS_EXT: - row = 0; /* XXX we only consider a single row... */ - if (raidPtr->status[row] != rf_rs_reconstructing) { - progressInfo.remaining = 0; - progressInfo.completed = 100; - progressInfo.total = 100; - } else { - progressInfo.total = - raidPtr->reconControl[row]->numRUsTotal; - progressInfo.completed = - raidPtr->reconControl[row]->numRUsComplete; - progressInfo.remaining = progressInfo.total - - progressInfo.completed; - } - bcopy((caddr_t) &progressInfo, data, sizeof(RF_ProgressInfo_t)); - return (retcode); - - case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: - if (raidPtr->Layout.map->faultsTolerated == 0) { - /* This makes no sense on a RAID 0, so tell the - user it's done. */ - *(int *) data = 100; - return(0); - } - if (raidPtr->parity_rewrite_in_progress == 1) { - *(int *) data = 100 * - raidPtr->parity_rewrite_stripes_done / - raidPtr->Layout.numStripe; - } else { - *(int *) data = 100; - } - return (0); - - case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: - if (raidPtr->parity_rewrite_in_progress == 1) { - progressInfo.total = raidPtr->Layout.numStripe; - progressInfo.completed = - raidPtr->parity_rewrite_stripes_done; - progressInfo.remaining = progressInfo.total - - progressInfo.completed; - } else { - progressInfo.remaining = 0; - progressInfo.completed = 100; - progressInfo.total = 100; - } - bcopy((caddr_t) &progressInfo, data, sizeof(RF_ProgressInfo_t)); - return (retcode); - - case RAIDFRAME_CHECK_COPYBACK_STATUS: - if (raidPtr->Layout.map->faultsTolerated == 0) { - /* This makes no sense on a RAID 0 */ - *(int *) data = 100; - return(0); - } - if (raidPtr->copyback_in_progress == 1) { - *(int *) data = 100 * raidPtr->copyback_stripes_done / - raidPtr->Layout.numStripe; - } else { - *(int *) data = 100; - } - return (0); - - case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: - if (raidPtr->copyback_in_progress == 1) { - progressInfo.total = raidPtr->Layout.numStripe; - progressInfo.completed = - raidPtr->copyback_stripes_done; - progressInfo.remaining = progressInfo.total - - progressInfo.completed; - } else { - progressInfo.remaining = 0; - progressInfo.completed = 100; - progressInfo.total = 100; - } - bcopy((caddr_t) &progressInfo, data, sizeof(RF_ProgressInfo_t)); - return (retcode); - - /* the sparetable daemon calls this to wait for the kernel to - * need a spare table. this ioctl does not return until a - * spare table is needed. XXX -- calling mpsleep here in the - * ioctl code is almost certainly wrong and evil. -- XXX XXX - * -- I should either compute the spare table in the kernel, - * or have a different -- XXX XXX -- interface (a different - * character device) for delivering the table -- XXX */ -#if 0 - case RAIDFRAME_SPARET_WAIT: - RF_LOCK_MUTEX(rf_sparet_wait_mutex); - while (!rf_sparet_wait_queue) - mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE); - waitreq = rf_sparet_wait_queue; - rf_sparet_wait_queue = rf_sparet_wait_queue->next; - RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); - - /* structure assignment */ - *((RF_SparetWait_t *) data) = *waitreq; - - RF_Free(waitreq, sizeof(*waitreq)); - return (0); - - /* wakes up a process waiting on SPARET_WAIT and puts an error - * code in it that will cause the dameon to exit */ - case RAIDFRAME_ABORT_SPARET_WAIT: - RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); - waitreq->fcol = -1; - RF_LOCK_MUTEX(rf_sparet_wait_mutex); - waitreq->next = rf_sparet_wait_queue; - rf_sparet_wait_queue = waitreq; - RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); - wakeup(&rf_sparet_wait_queue); - return (0); - - /* used by the spare table daemon to deliver a spare table - * into the kernel */ - case RAIDFRAME_SEND_SPARET: - - /* install the spare table */ - retcode = rf_SetSpareTable(raidPtr, *(void **) data); - - /* respond to the requestor. the return status of the spare - * table installation is passed in the "fcol" field */ - RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); - waitreq->fcol = retcode; - RF_LOCK_MUTEX(rf_sparet_wait_mutex); - waitreq->next = rf_sparet_resp_queue; - rf_sparet_resp_queue = waitreq; - wakeup(&rf_sparet_resp_queue); - RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); - - return (retcode); -#endif - - default: - retcode = ENOIOCTL; - break; /* fall through to the os-specific code below */ - - } - - return (retcode); - -} - - -/* raidinit -- complete the rest of the initialization for the - RAIDframe device. */ - - -static struct raid_softc * -raidinit(raidPtr) - RF_Raid_t *raidPtr; -{ - struct raid_softc *sc; - - RF_Malloc(sc, sizeof(struct raid_softc), (struct raid_softc *)); - if (sc == NULL) { - rf_printf(1, "No memory for raid device\n"); - return(NULL); - } - - sc->raidPtr = raidPtr; - - /* XXX Should check return code here */ - bioq_init(&sc->bio_queue); - sc->sc_cbufpool = uma_zcreate("raidpl", sizeof(struct raidbuf), NULL, - NULL, NULL, NULL, 0, 0); - - /* XXX There may be a weird interaction here between this, and - * protectedSectors, as used in RAIDframe. */ - - sc->sc_size = raidPtr->totalSectors; - - /* Create the disk device */ - sc->sc_disk = disk_alloc(); - sc->sc_disk->d_open = raidopen; - sc->sc_disk->d_close = raidclose; - sc->sc_disk->d_ioctl = raidioctl; - sc->sc_disk->d_strategy = raidstrategy; - sc->sc_disk->d_drv1 = sc; - sc->sc_disk->d_maxsize = DFLTPHYS; - sc->sc_disk->d_name = "raid"; - sc->sc_disk->d_unit = raidPtr->raidid; - sc->sc_disk->d_flags = DISKFLAG_NEEDSGIANT; - disk_create(sc->sc_disk, DISK_VERSION); - raidPtr->sc = sc; - - return (sc); -} - -/* wake up the daemon & tell it to get us a spare table - * XXX - * the entries in the queues should be tagged with the raidPtr - * so that in the extremely rare case that two recons happen at once, - * we know for which device were requesting a spare table - * XXX - * - * XXX This code is not currently used. GO - */ -int -rf_GetSpareTableFromDaemon(req) - RF_SparetWait_t *req; -{ - int retcode; - - RF_LOCK_MUTEX(rf_sparet_wait_mutex); - req->next = rf_sparet_wait_queue; - rf_sparet_wait_queue = req; - wakeup(&rf_sparet_wait_queue); - - /* mpsleep unlocks the mutex */ - while (!rf_sparet_resp_queue) { - tsleep(&rf_sparet_resp_queue, PRIBIO, - "raidframe getsparetable", 0); - } - req = rf_sparet_resp_queue; - rf_sparet_resp_queue = req->next; - RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); - - retcode = req->fcol; - RF_Free(req, sizeof(*req)); /* this is not the same req as we - * alloc'd */ - return (retcode); -} - -/* a wrapper around rf_DoAccess that extracts appropriate info from the - * bp & passes it down. - * any calls originating in the kernel must use non-blocking I/O - * do some extra sanity checking to return "appropriate" error values for - * certain conditions (to make some standard utilities work) - * - * Formerly known as: rf_DoAccessKernel - */ -void -raidstart(raidPtr) - RF_Raid_t *raidPtr; -{ - RF_SectorCount_t num_blocks, pb, sum; - RF_RaidAddr_t raid_addr; - struct raid_softc *sc; - struct bio *bp; - daddr_t blocknum; - int unit, retcode, do_async; - - unit = raidPtr->raidid; - sc = raidPtr->sc; - - /* quick check to see if anything has died recently */ - RF_LOCK_MUTEX(raidPtr->mutex); - if (raidPtr->numNewFailures > 0) { - raidPtr->numNewFailures--; - RF_UNLOCK_MUTEX(raidPtr->mutex); - rf_update_component_labels(raidPtr, - RF_NORMAL_COMPONENT_UPDATE); - } else - RF_UNLOCK_MUTEX(raidPtr->mutex); - - /* Check to see if we're at the limit... */ - RF_LOCK_MUTEX(raidPtr->mutex); - while (raidPtr->openings > 0) { - RF_UNLOCK_MUTEX(raidPtr->mutex); - - /* get the next item, if any, from the queue */ - if ((bp = bioq_first(&sc->bio_queue)) == NULL) { - /* nothing more to do */ - return; - } - bioq_remove(&sc->bio_queue, bp); - - /* Ok, for the bp we have here, bp->b_blkno is relative to the - * partition.. Need to make it absolute to the underlying - * device.. */ - - blocknum = bp->bio_pblkno = - bp->bio_offset >> raidPtr->logBytesPerSector; - - rf_printf(3, "Blocks: %ld, %ld\n", (long)bp->bio_pblkno, (long)blocknum); - - rf_printf(3, "bp->bio_bcount = %d\n", (int) bp->bio_bcount); - rf_printf(3, "bp->bio_resid = %d\n", (int) bp->bio_resid); - - /* *THIS* is where we adjust what block we're going to... - * but DO NOT TOUCH bp->bio_pblkno!!! */ - raid_addr = blocknum; - - num_blocks = bp->bio_bcount >> raidPtr->logBytesPerSector; - pb = (bp->bio_bcount & raidPtr->sectorMask) ? 1 : 0; - sum = raid_addr + num_blocks + pb; - if (rf_debugKernelAccess) { - rf_printf(0, "raid_addr=0x%x sum=%d num_blocks=%d(+%d) " - "(%d)\n", (int)raid_addr, (int)sum, - (int)num_blocks, (int)pb, - (int)bp->bio_resid); - } - if ((sum > raidPtr->totalSectors) || (sum < raid_addr) - || (sum < num_blocks) || (sum < pb)) { - bp->bio_error = ENOSPC; - bp->bio_flags |= BIO_ERROR; - bp->bio_resid = bp->bio_bcount; - biodone(bp); - RF_LOCK_MUTEX(raidPtr->mutex); - continue; - } - /* - * XXX rf_DoAccess() should do this, not just DoAccessKernel() - */ - - if (bp->bio_bcount & raidPtr->sectorMask) { - bp->bio_error = EINVAL; - bp->bio_flags |= BIO_ERROR; - bp->bio_resid = bp->bio_bcount; - biodone(bp); - RF_LOCK_MUTEX(raidPtr->mutex); - continue; - - } - rf_printf(3, "Calling DoAccess..\n"); - - - RF_LOCK_MUTEX(raidPtr->mutex); - raidPtr->openings--; - RF_UNLOCK_MUTEX(raidPtr->mutex); - - /* - * Everything is async. - */ - do_async = 1; - - /* XXX we're still at splbio() here... do we *really* - need to be? */ - - /* don't ever condition on bp->bio_cmd & BIO_WRITE. - * always condition on BIO_READ instead */ - - retcode = rf_DoAccess(raidPtr, (bp->bio_cmd & BIO_READ) ? - RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, - do_async, raid_addr, num_blocks, - bp->bio_data, bp, NULL, NULL, - RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL); - - - RF_LOCK_MUTEX(raidPtr->mutex); - } - RF_UNLOCK_MUTEX(raidPtr->mutex); -} - - - - -/* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ - -int -rf_DispatchKernelIO(queue, req) - RF_DiskQueue_t *queue; - RF_DiskQueueData_t *req; -{ - int op = (req->type == RF_IO_TYPE_READ) ? BIO_READ : BIO_WRITE; - struct bio *bp; - struct raidbuf *raidbp = NULL; - struct raid_softc *sc; - - /* XXX along with the vnode, we also need the softc associated with - * this device.. */ - - req->queue = queue; - - sc = queue->raidPtr->sc; - - rf_printf(3, "DispatchKernelIO %s\n", sc->sc_disk->d_name); - - bp = req->bp; -#if 1 - /* XXX when there is a physical disk failure, someone is passing us a - * buffer that contains old stuff!! Attempt to deal with this problem - * without taking a performance hit... (not sure where the real bug - * is. It's buried in RAIDframe somewhere) :-( GO ) */ - - if (bp->bio_flags & BIO_ERROR) { - bp->bio_flags &= ~BIO_ERROR; - } - if (bp->bio_error != 0) { - bp->bio_error = 0; - } -#endif - raidbp = RAIDGETBUF(sc); - - raidbp->rf_flags = 0; /* XXX not really used anywhere... */ - - /* - * context for raidiodone - */ - raidbp->rf_obp = bp; - raidbp->req = req; - -#if 0 /* XXX */ - LIST_INIT(&raidbp->rf_buf.b_dep); -#endif - - switch (req->type) { - case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ - /* XXX need to do something extra here.. */ - /* I'm leaving this in, as I've never actually seen it used, - * and I'd like folks to report it... GO */ - rf_printf(2, "WAKEUP CALLED\n"); - queue->numOutstanding++; - - /* XXX need to glue the original buffer into this? */ - - KernelWakeupFunc(&raidbp->rf_buf); - break; - - case RF_IO_TYPE_READ: - case RF_IO_TYPE_WRITE: - - if (req->tracerec) { - RF_ETIMER_START(req->tracerec->timer); - } - InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp, - op | bp->bio_cmd, queue->rf_cinfo->ci_dev, - req->sectorOffset, req->numSector, - req->buf, KernelWakeupFunc, (void *) req, - queue->raidPtr->logBytesPerSector, req->b_proc); - - if (rf_debugKernelAccess) { - rf_printf(0, "dispatch: bp->bio_pblkno = %ld\n", - (long) bp->bio_pblkno); - } - queue->numOutstanding++; - queue->last_deq_sector = req->sectorOffset; - /* acc wouldn't have been let in if there were any pending - * reqs at any other priority */ - queue->curPriority = req->priority; - - rf_printf(3, "Going for %c to %s%d row %d col %d\n", - req->type, sc->sc_disk->d_name, - sc->sc_disk->d_unit, queue->row, queue->col); - rf_printf(3, "sector %d count %d (%d bytes) %d\n", - (int) req->sectorOffset, (int) req->numSector, - (int) (req->numSector << - queue->raidPtr->logBytesPerSector), - (int) queue->raidPtr->logBytesPerSector); -#if 0 /* XXX */ - if ((raidbp->rf_buf.bio_cmd & BIO_READ) == 0) { - raidbp->rf_buf.b_vp->v_numoutput++; - } -#endif - (*devsw(raidbp->rf_buf.bio_dev)->d_strategy)(&raidbp->rf_buf); - - break; - - default: - panic("bad req->type in rf_DispatchKernelIO"); - } - rf_printf(3, "Exiting from DispatchKernelIO\n"); - /* splx(s); */ /* want to test this */ - return (0); -} -/* This is the callback function associated with an I/O invoked from - kernel code. - */ -static void -KernelWakeupFunc(vbp) - struct bio *vbp; -{ - RF_DiskQueueData_t *req = NULL; - RF_DiskQueue_t *queue; - struct raidbuf *raidbp = (struct raidbuf *) vbp; - struct bio *bp; - struct raid_softc *sc; - int s; - - s = splbio(); - rf_printf(2, "recovering the request queue:\n"); - req = raidbp->req; - - bp = raidbp->rf_obp; - queue = (RF_DiskQueue_t *) req->queue; - sc = queue->raidPtr->sc; - - if (raidbp->rf_buf.bio_flags & BIO_ERROR) { - bp->bio_flags |= BIO_ERROR; - bp->bio_error = raidbp->rf_buf.bio_error ? - raidbp->rf_buf.bio_error : EIO; - } - - /* XXX methinks this could be wrong... */ -#if 1 - bp->bio_resid = raidbp->rf_buf.bio_resid; -#endif - - if (req->tracerec) { - RF_ETIMER_STOP(req->tracerec->timer); - RF_ETIMER_EVAL(req->tracerec->timer); - RF_LOCK_MUTEX(rf_tracing_mutex); - req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); - req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); - req->tracerec->num_phys_ios++; - RF_UNLOCK_MUTEX(rf_tracing_mutex); - } - bp->bio_bcount = raidbp->rf_buf.bio_bcount; /* XXXX ? */ - - /* XXX Ok, let's get aggressive... If BIO_ERROR is set, let's go - * ballistic, and mark the component as hosed... */ - - if (bp->bio_flags & BIO_ERROR) { - /* Mark the disk as dead */ - /* but only mark it once... */ - if (queue->raidPtr->Disks[queue->row][queue->col].status == - rf_ds_optimal) { - rf_printf(0, "%s%d: IO Error. Marking %s as " - "failed.\n", sc->sc_disk->d_name, sc->sc_disk->d_unit, - queue->raidPtr->Disks[queue->row][queue->col].devname); - queue->raidPtr->Disks[queue->row][queue->col].status = - rf_ds_failed; - queue->raidPtr->status[queue->row] = rf_rs_degraded; - queue->raidPtr->numFailures++; - queue->raidPtr->numNewFailures++; - } else { /* Disk is already dead... */ - /* printf("Disk already marked as dead!\n"); */ - } - - } - - RAIDPUTBUF(sc, raidbp); - - rf_DiskIOComplete(queue, req, (bp->bio_flags & BIO_ERROR) ? 1 : 0); - (req->CompleteFunc)(req->argument, (bp->bio_flags & BIO_ERROR) ? 1 : 0); - - splx(s); -} - - - -/* - * initialize a buf structure for doing an I/O in the kernel. - */ -static void -InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg, - logBytesPerSector, b_proc) - struct bio *bp; - struct vnode *b_vp; - unsigned rw_flag; - dev_t dev; - RF_SectorNum_t startSect; - RF_SectorCount_t numSect; - caddr_t buf; - void (*cbFunc) (struct bio *); - void *cbArg; - int logBytesPerSector; - struct proc *b_proc; -{ - bp->bio_cmd = rw_flag; - bp->bio_bcount = numSect << logBytesPerSector; -#if 0 /* XXX */ - bp->bio_bufsize = bp->bio_bcount; -#endif - bp->bio_error = 0; - bp->bio_dev = dev; - bp->bio_data = buf; - bp->bio_resid = bp->bio_bcount; /* XXX is this right!?!?!! */ - bp->bio_offset = startSect << logBytesPerSector; - if (bp->bio_bcount == 0) { - panic("bp->bio_bcount is zero in InitBP!!\n"); - } -/* - bp->b_proc = b_proc; - bp->b_vp = b_vp; -*/ - bp->bio_done = cbFunc; - -} - -static void -raidgetdefaultlabel(raidPtr, sc, dp) - RF_Raid_t *raidPtr; - struct raid_softc *sc; - struct disk *dp; -{ - rf_printf(1, "Building a default label...\n"); - if (dp == NULL) - panic("raidgetdefaultlabel(): dp is NULL\n"); - - /* fabricate a label... */ - dp->d_mediasize = raidPtr->totalSectors * raidPtr->bytesPerSector; - dp->d_sectorsize = raidPtr->bytesPerSector; - dp->d_fwsectors = raidPtr->Layout.dataSectorsPerStripe; - dp->d_fwheads = 4 * raidPtr->numCol; - -} -/* - * Lookup the provided name in the filesystem. If the file exists, - * is a valid block device, and isn't being used by anyone else, - * set *vpp to the file's vnode. - * You'll find the original of this in ccd.c - */ -int -raidlookup(path, td, vpp) - char *path; - struct thread *td; - struct vnode **vpp; /* result */ -{ - struct nameidata *nd; - struct vnode *vp; - struct vattr *va; - struct proc *p; - int error = 0, flags; - - MALLOC(nd, struct nameidata *, sizeof(struct nameidata), M_TEMP, M_NOWAIT | M_ZERO); - MALLOC(va, struct vattr *, sizeof(struct vattr), M_TEMP, M_NOWAIT | M_ZERO); - if ((nd == NULL) || (va == NULL)) { - printf("Out of memory?\n"); - return (ENOMEM); - } - - /* Sanity check the p_fd fields. This is really just a hack */ - p = td->td_proc; - if (!p->p_fd->fd_rdir || !p->p_fd->fd_cdir) - printf("Warning: p_fd fields not set\n"); - - if (!td->td_proc->p_fd->fd_rdir) - p->p_fd->fd_rdir = rootvnode; - - if (!p->p_fd->fd_cdir) - p->p_fd->fd_cdir = rootvnode; - - NDINIT(nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, curthread); - flags = FREAD | FWRITE; - if ((error = vn_open(nd, &flags, 0, -1)) != 0) { - rf_printf(2, "RAIDframe: vn_open returned %d\n", error); - goto end1; - } - vp = nd->ni_vp; - if (vp->v_usecount > 1) { - rf_printf(1, "raidlookup() vp->v_usecount= %d\n", vp->v_usecount); - error = EBUSY; - goto end; - } - if ((error = VOP_GETATTR(vp, va, td->td_ucred, td)) != 0) { - rf_printf(1, "raidlookup() VOP_GETATTR returned %d", error); - goto end; - } - /* XXX: eventually we should handle VREG, too. */ - if (va->va_type != VCHR) { - rf_printf(1, "Returning ENOTBLK\n"); - error = ENOTBLK; - } - *vpp = vp; - -end: - VOP_UNLOCK(vp, 0, td); - NDFREE(nd, NDF_ONLY_PNBUF); -end1: - FREE(nd, M_TEMP); - FREE(va, M_TEMP); - return (error); -} -/* - * Wait interruptibly for an exclusive lock. - * - * XXX - * Several drivers do this; it should be abstracted and made MP-safe. - * (Hmm... where have we seen this warning before :-> GO ) - */ -static int -raidlock(sc) - struct raid_softc *sc; -{ - int error; - - while ((sc->sc_flags & RAIDF_LOCKED) != 0) { - sc->sc_flags |= RAIDF_WANTED; - if ((error = - tsleep(sc, PRIBIO | PCATCH, "raidlck", 0)) != 0) - return (error); - } - sc->sc_flags |= RAIDF_LOCKED; - return (0); -} -/* - * Unlock and wake up any waiters. - */ -static void -raidunlock(sc) - struct raid_softc *sc; -{ - - sc->sc_flags &= ~RAIDF_LOCKED; - if ((sc->sc_flags & RAIDF_WANTED) != 0) { - sc->sc_flags &= ~RAIDF_WANTED; - wakeup(sc); - } -} - - -#define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ -#define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ - -int -raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter) -{ - RF_ComponentLabel_t *clabel; - - MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t), - M_RAIDFRAME, M_NOWAIT | M_ZERO); - if (clabel == NULL) { - printf("raidmarkclean: Out of memory?\n"); - return (ENOMEM); - } - - raidread_component_label(dev, b_vp, clabel); - clabel->mod_counter = mod_counter; - clabel->clean = RF_RAID_CLEAN; - raidwrite_component_label(dev, b_vp, clabel); - FREE(clabel, M_RAIDFRAME); - return(0); -} - - -int -raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter) -{ - RF_ComponentLabel_t *clabel; - - MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t), - M_RAIDFRAME, M_NOWAIT | M_ZERO); - if (clabel == NULL) { - printf("raidmarkclean: Out of memory?\n"); - return (ENOMEM); - } - - raidread_component_label(dev, b_vp, clabel); - clabel->mod_counter = mod_counter; - clabel->clean = RF_RAID_DIRTY; - raidwrite_component_label(dev, b_vp, clabel); - FREE(clabel, M_RAIDFRAME); - return(0); -} - -/* ARGSUSED */ -int -raidread_component_label(dev, b_vp, clabel) - dev_t dev; - struct vnode *b_vp; - RF_ComponentLabel_t *clabel; -{ - struct buf *bp; - int error; - - /* XXX should probably ensure that we don't try to do this if - someone has changed rf_protected_sectors. */ - - if (b_vp == NULL) { - /* For whatever reason, this component is not valid. - Don't try to read a component label from it. */ - return(EINVAL); - } - - /* get a block of the appropriate size... */ - bp = geteblk((int)RF_COMPONENT_INFO_SIZE); - bp->b_dev = dev; - - /* get our ducks in a row for the read */ - bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; - bp->b_iooffset = RF_COMPONENT_INFO_OFFSET; - bp->b_bcount = RF_COMPONENT_INFO_SIZE; - bp->b_iocmd = BIO_READ; - bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; - - DEV_STRATEGY(bp); - error = bufwait(bp); - - if (!error) { - memcpy(clabel, bp->b_data, sizeof(RF_ComponentLabel_t)); -#if 0 - rf_print_component_label( clabel ); -#endif - } else { -#if 0 - rf_printf(0, "Failed to read RAID component label!\n"); -#endif - } - - bp->b_flags |= B_INVAL | B_AGE; - brelse(bp); - return(error); -} -/* ARGSUSED */ -int -raidwrite_component_label(dev, b_vp, clabel) - dev_t dev; - struct vnode *b_vp; - RF_ComponentLabel_t *clabel; -{ - struct buf *bp; - int error; - - /* get a block of the appropriate size... */ - bp = geteblk((int)RF_COMPONENT_INFO_SIZE); - bp->b_dev = dev; - - /* get our ducks in a row for the write */ - bp->b_flags = 0; - bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; - bp->b_iooffset = RF_COMPONENT_INFO_OFFSET; - bp->b_bcount = RF_COMPONENT_INFO_SIZE; - bp->b_iocmd = BIO_WRITE; - bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; - - memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE ); - - memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t)); - - DEV_STRATEGY(bp); - error = bufwait(bp); - - bp->b_flags |= B_INVAL | B_AGE; - brelse(bp); - if (error) { -#if 1 - rf_printf(0, "Failed to write RAID component info!\n"); - rf_printf(0, "b_error= %d\n", bp->b_error); -#endif - } - - return(error); -} - -void -rf_markalldirty(raidPtr) - RF_Raid_t *raidPtr; -{ - RF_ComponentLabel_t *clabel; - int r,c; - - MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t), - M_RAIDFRAME, M_NOWAIT | M_ZERO); - - if (clabel == NULL) { - printf("rf_markalldirty: Out of memory?\n"); - return; - } - - raidPtr->mod_counter++; - for (r = 0; r < raidPtr->numRow; r++) { - for (c = 0; c < raidPtr->numCol; c++) { - /* we don't want to touch (at all) a disk that has - failed */ - if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) { - raidread_component_label( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - clabel); - if (clabel->status == rf_ds_spared) { - /* XXX do something special... - but whatever you do, don't - try to access it!! */ - } else { -#if 0 - clabel->status = - raidPtr->Disks[r][c].status; - raidwrite_component_label( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - clabel); -#endif - raidmarkdirty( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - raidPtr->mod_counter); - } - } - } - } - /* printf("Component labels marked dirty.\n"); */ -#if 0 - for( c = 0; c < raidPtr->numSpare ; c++) { - sparecol = raidPtr->numCol + c; - if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) { - /* - - XXX this is where we get fancy and map this spare - into it's correct spot in the array. - - */ - /* - - we claim this disk is "optimal" if it's - rf_ds_used_spare, as that means it should be - directly substitutable for the disk it replaced. - We note that too... - - */ - - for(i=0;i<raidPtr->numRow;i++) { - for(j=0;j<raidPtr->numCol;j++) { - if ((raidPtr->Disks[i][j].spareRow == - r) && - (raidPtr->Disks[i][j].spareCol == - sparecol)) { - srow = r; - scol = sparecol; - break; - } - } - } - - raidread_component_label( - raidPtr->Disks[r][sparecol].dev, - raidPtr->raid_cinfo[r][sparecol].ci_vp, - &clabel); - /* make sure status is noted */ - clabel.version = RF_COMPONENT_LABEL_VERSION; - clabel.mod_counter = raidPtr->mod_counter; - clabel.serial_number = raidPtr->serial_number; - clabel.row = srow; - clabel.column = scol; - clabel.num_rows = raidPtr->numRow; - clabel.num_columns = raidPtr->numCol; - clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/ - clabel.status = rf_ds_optimal; - raidwrite_component_label( - raidPtr->Disks[r][sparecol].dev, - raidPtr->raid_cinfo[r][sparecol].ci_vp, - &clabel); - raidmarkclean( raidPtr->Disks[r][sparecol].dev, - raidPtr->raid_cinfo[r][sparecol].ci_vp); - } - } - -#endif - FREE(clabel, M_RAIDFRAME); -} - - -void -rf_update_component_labels(raidPtr, final) - RF_Raid_t *raidPtr; - int final; -{ - RF_ComponentLabel_t *clabel; - int sparecol; - int r,c; - int i,j; - int srow, scol; - - srow = -1; - scol = -1; - - MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t), - M_RAIDFRAME, M_NOWAIT | M_ZERO); - if (clabel == NULL) { - printf("rf_update_component_labels: Out of memory?\n"); - return; - } - - /* XXX should do extra checks to make sure things really are clean, - rather than blindly setting the clean bit... */ - - raidPtr->mod_counter++; - - for (r = 0; r < raidPtr->numRow; r++) { - for (c = 0; c < raidPtr->numCol; c++) { - if (raidPtr->Disks[r][c].status == rf_ds_optimal) { - raidread_component_label( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - clabel); - /* make sure status is noted */ - clabel->status = rf_ds_optimal; - /* bump the counter */ - clabel->mod_counter = raidPtr->mod_counter; - - raidwrite_component_label( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - clabel); - if (final == RF_FINAL_COMPONENT_UPDATE) { - if (raidPtr->parity_good == RF_RAID_CLEAN) { - raidmarkclean( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - raidPtr->mod_counter); - } - } - } - /* else we don't touch it.. */ - } - } - - for( c = 0; c < raidPtr->numSpare ; c++) { - sparecol = raidPtr->numCol + c; - if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) { - /* - - we claim this disk is "optimal" if it's - rf_ds_used_spare, as that means it should be - directly substitutable for the disk it replaced. - We note that too... - - */ - - for(i=0;i<raidPtr->numRow;i++) { - for(j=0;j<raidPtr->numCol;j++) { - if ((raidPtr->Disks[i][j].spareRow == - 0) && - (raidPtr->Disks[i][j].spareCol == - sparecol)) { - srow = i; - scol = j; - break; - } - } - } - - /* XXX shouldn't *really* need this... */ - raidread_component_label( - raidPtr->Disks[0][sparecol].dev, - raidPtr->raid_cinfo[0][sparecol].ci_vp, - clabel); - /* make sure status is noted */ - - raid_init_component_label(raidPtr, clabel); - - clabel->mod_counter = raidPtr->mod_counter; - clabel->row = srow; - clabel->column = scol; - clabel->status = rf_ds_optimal; - - raidwrite_component_label( - raidPtr->Disks[0][sparecol].dev, - raidPtr->raid_cinfo[0][sparecol].ci_vp, - clabel); - if (final == RF_FINAL_COMPONENT_UPDATE) { - if (raidPtr->parity_good == RF_RAID_CLEAN) { - raidmarkclean( raidPtr->Disks[0][sparecol].dev, - raidPtr->raid_cinfo[0][sparecol].ci_vp, - raidPtr->mod_counter); - } - } - } - } - FREE(clabel, M_RAIDFRAME); - rf_printf(1, "Component labels updated\n"); -} - -void -rf_close_component(raidPtr, vp, auto_configured) - RF_Raid_t *raidPtr; - struct vnode *vp; - int auto_configured; -{ - struct thread *td; - - td = raidPtr->engine_thread; - - if (vp != NULL) { - if (auto_configured == 1) { - VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td); - - vrele(vp); - } else { - vn_close(vp, FREAD | FWRITE, td->td_ucred, td); - } - } else { - rf_printf(1, "vnode was NULL\n"); - } -} - - -void -rf_UnconfigureVnodes(raidPtr) - RF_Raid_t *raidPtr; -{ - int r,c; - struct thread *td; - struct vnode *vp; - int acd; - - - /* We take this opportunity to close the vnodes like we should.. */ - - td = raidPtr->engine_thread; - - for (r = 0; r < raidPtr->numRow; r++) { - for (c = 0; c < raidPtr->numCol; c++) { - rf_printf(1, "Closing vnode for row: %d col: %d\n", r, c); - vp = raidPtr->raid_cinfo[r][c].ci_vp; - acd = raidPtr->Disks[r][c].auto_configured; - rf_close_component(raidPtr, vp, acd); - raidPtr->raid_cinfo[r][c].ci_vp = NULL; - raidPtr->Disks[r][c].auto_configured = 0; - } - } - for (r = 0; r < raidPtr->numSpare; r++) { - rf_printf(1, "Closing vnode for spare: %d\n", r); - vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp; - acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured; - rf_close_component(raidPtr, vp, acd); - raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL; - raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0; - } -} - - -void -rf_ReconThread(req) - struct rf_recon_req *req; -{ - RF_Raid_t *raidPtr; - - mtx_lock(&Giant); - raidPtr = (RF_Raid_t *) req->raidPtr; - raidPtr->recon_in_progress = 1; - - rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col, - ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); - - /* XXX get rid of this! we don't need it at all.. */ - RF_Free(req, sizeof(*req)); - - raidPtr->recon_in_progress = 0; - - /* That's all... */ - RF_THREAD_EXIT(0); /* does not return */ -} - -void -rf_RewriteParityThread(raidPtr) - RF_Raid_t *raidPtr; -{ - int retcode; - - mtx_lock(&Giant); - raidPtr->parity_rewrite_in_progress = 1; - retcode = rf_RewriteParity(raidPtr); - if (retcode) { - rf_printf(0, "raid%d: Error re-writing parity!\n",raidPtr->raidid); - } else { - /* set the clean bit! If we shutdown correctly, - the clean bit on each component label will get - set */ - raidPtr->parity_good = RF_RAID_CLEAN; - } - raidPtr->parity_rewrite_in_progress = 0; - - /* Anyone waiting for us to stop? If so, inform them... */ - if (raidPtr->waitShutdown) { - wakeup(&raidPtr->parity_rewrite_in_progress); - } - - /* That's all... */ - RF_THREAD_EXIT(0); /* does not return */ -} - - -void -rf_CopybackThread(raidPtr) - RF_Raid_t *raidPtr; -{ - mtx_lock(&Giant); - raidPtr->copyback_in_progress = 1; - rf_CopybackReconstructedData(raidPtr); - raidPtr->copyback_in_progress = 0; - - /* That's all... */ - RF_THREAD_EXIT(0); /* does not return */ -} - - -void -rf_ReconstructInPlaceThread(req) - struct rf_recon_req *req; -{ - int retcode; - RF_Raid_t *raidPtr; - - mtx_lock(&Giant); - raidPtr = req->raidPtr; - raidPtr->recon_in_progress = 1; - retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col); - RF_Free(req, sizeof(*req)); - raidPtr->recon_in_progress = 0; - - /* That's all... */ - RF_THREAD_EXIT(0); /* does not return */ -} - -RF_AutoConfig_t * -rf_find_raid_components() -{ - RF_AutoConfig_t *ac_list = NULL; -#if 0 /* XXX GEOM */ - struct vnode *vp; - struct disklabel *label; - struct diskslice *slice; - struct diskslices *slices; - struct disk *disk; - struct thread *td; - dev_t dev; - char *devname; - int error, j; - int nslices; - - td = curthread; - - MALLOC(label, struct disklabel *, sizeof(struct disklabel), - M_RAIDFRAME, M_NOWAIT|M_ZERO); - MALLOC(slices, struct diskslices *, sizeof(struct diskslices), - M_RAIDFRAME, M_NOWAIT|M_ZERO); - if ((label == NULL) || (slices == NULL)) { - printf("rf_find_raid_components: Out of Memory?\n"); - return (NULL); - } - - /* initialize the AutoConfig list */ - ac_list = NULL; - - /* we begin by trolling through *all* the disk devices on the system */ - - disk = NULL; - while ((disk = disk_enumerate(disk))) { - - /* we don't care about floppies... */ - devname = disk->d_dev->si_name; - if (!strncmp(devname, "fd", 2) || - !strncmp(devname, "cd", 2) || - !strncmp(devname, "acd", 3)) - continue; - - rf_printf(1, "Examining %s\n", disk->d_dev->si_name); - if (bdevvp(disk->d_dev, &vp)) - panic("RAIDframe can't alloc vnode"); - vref(vp); - - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); - error = VOP_OPEN(vp, FREAD, td->td_ucred, td, -1); - VOP_UNLOCK(vp, 0, td); - if (error) { - vput(vp); - continue; - } - - error = VOP_IOCTL(vp, DIOCGSLICEINFO, (caddr_t)slices, - FREAD, td->td_ucred, td); - VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td); - vrele(vp); - if (error) { - /* No slice table. */ - continue; - } - - nslices = slices->dss_nslices; - if ((nslices == 0) || (nslices > MAX_SLICES)) - continue; - - /* Iterate through the slices */ - for (j = 1; j < nslices; j++) { - - rf_printf(1, "Examining slice %d\n", j); - slice = &slices->dss_slices[j - 1]; - dev = dkmodslice(disk->d_dev, j); - if (bdevvp(dev, &vp)) - panic("RAIDframe can't alloc vnode"); - - vref(vp); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); - error = VOP_OPEN(vp, FREAD, td->td_ucred, td, -1); - VOP_UNLOCK(vp, 0, td); - if (error) { - continue; - } - - error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)label, - FREAD, td->td_ucred, td); - VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td); - vrele(vp); - if (error) - continue; - - rf_search_label(dev, label, &ac_list); - } - } - - FREE(label, M_RAIDFRAME); - FREE(slices, M_RAIDFRAME); -#endif - return (ac_list); -} - -static void -rf_search_label(dev_t dev, struct disklabel *label, RF_AutoConfig_t **ac_list) -{ - RF_AutoConfig_t *ac; - RF_ComponentLabel_t *clabel; - struct vnode *vp; - struct thread *td; - dev_t dev1; - int i, error, good_one; - - td = curthread; - - /* Iterate through the partitions */ - for (i=0; i < label->d_npartitions; i++) { - /* We only support partitions marked as RAID */ - if (label->d_partitions[i].p_fstype != FS_RAID) - continue; - -#if 0 /* GEOM */ - dev1 = dkmodpart(dev, i); -#else - dev1 = NULL; -#endif - if (dev1 == NULL) { - rf_printf(1, "dev1 == null\n"); - continue; - } - if (bdevvp(dev1, &vp)) - panic("RAIDframe can't alloc vnode"); - - vref(vp); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); - error = VOP_OPEN(vp, FREAD, td->td_ucred, td, -1); - VOP_UNLOCK(vp, 0, td); - if (error) { - /* Whatever... */ - continue; - } - - good_one = 0; - - clabel = (RF_ComponentLabel_t *) - malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, - M_NOWAIT); - if (clabel == NULL) { - /* XXX CLEANUP HERE */ - panic("RAID autoconfig: no memory!\n"); - } - - if (!raidread_component_label(dev1, vp, clabel)) { - /* Got the label. Is it reasonable? */ - if (rf_reasonable_label(clabel) && - (clabel->partitionSize <= - label->d_partitions[i].p_size)) { - rf_printf(1, "Component on: %s: %d\n", - dev1->si_name, label->d_partitions[i].p_size); - rf_print_component_label(clabel); - /* if it's reasonable, add it, else ignore it */ - ac = (RF_AutoConfig_t *) - malloc(sizeof(RF_AutoConfig_t), - M_RAIDFRAME, M_NOWAIT); - if (ac == NULL) { - /* XXX should panic? */ - panic("RAID autoconfig: no memory!\n"); - } - - sprintf(ac->devname, "%s", dev->si_name); - ac->dev = dev1; - ac->vp = vp; - ac->clabel = clabel; - ac->next = *ac_list; - *ac_list = ac; - good_one = 1; - } - } - if (!good_one) { - /* cleanup */ - free(clabel, M_RAIDFRAME); - VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td); - vrele(vp); - } - } -} - -static int -rf_reasonable_label(clabel) - RF_ComponentLabel_t *clabel; -{ - - if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || - (clabel->version==RF_COMPONENT_LABEL_VERSION)) && - ((clabel->clean == RF_RAID_CLEAN) || - (clabel->clean == RF_RAID_DIRTY)) && - clabel->row >=0 && - clabel->column >= 0 && - clabel->num_rows > 0 && - clabel->num_columns > 0 && - clabel->row < clabel->num_rows && - clabel->column < clabel->num_columns && - clabel->blockSize > 0 && - clabel->numBlocks > 0) { - /* label looks reasonable enough... */ - return(1); - } - return(0); -} - - -void -rf_print_component_label(clabel) - RF_ComponentLabel_t *clabel; -{ - rf_printf(1, " Row: %d Column: %d Num Rows: %d Num Columns: %d\n", - clabel->row, clabel->column, - clabel->num_rows, clabel->num_columns); - rf_printf(1, " Version: %d Serial Number: %d Mod Counter: %d\n", - clabel->version, clabel->serial_number, - clabel->mod_counter); - rf_printf(1, " Clean: %s Status: %d\n", - clabel->clean ? "Yes" : "No", clabel->status ); - rf_printf(1, " sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", - clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); - rf_printf(1, " RAID Level: %c blocksize: %d numBlocks: %d\n", - (char) clabel->parityConfig, clabel->blockSize, - clabel->numBlocks); - rf_printf(1, " Autoconfig: %s\n", clabel->autoconfigure ? "Yes":"No"); - rf_printf(1, " Contains root partition: %s\n", - clabel->root_partition ? "Yes" : "No" ); - rf_printf(1, " Last configured as: raid%d\n", clabel->last_unit ); -#if 0 - rf_printf(1, " Config order: %d\n", clabel->config_order); -#endif - -} - -RF_ConfigSet_t * -rf_create_auto_sets(ac_list) - RF_AutoConfig_t *ac_list; -{ - RF_AutoConfig_t *ac; - RF_ConfigSet_t *config_sets; - RF_ConfigSet_t *cset; - RF_AutoConfig_t *ac_next; - - - config_sets = NULL; - - /* Go through the AutoConfig list, and figure out which components - belong to what sets. */ - ac = ac_list; - while(ac!=NULL) { - /* we're going to putz with ac->next, so save it here - for use at the end of the loop */ - ac_next = ac->next; - - if (config_sets == NULL) { - /* will need at least this one... */ - config_sets = (RF_ConfigSet_t *) - malloc(sizeof(RF_ConfigSet_t), - M_RAIDFRAME, M_NOWAIT); - if (config_sets == NULL) { - panic("rf_create_auto_sets: No memory!\n"); - } - /* this one is easy :) */ - config_sets->ac = ac; - config_sets->next = NULL; - config_sets->rootable = 0; - ac->next = NULL; - } else { - /* which set does this component fit into? */ - cset = config_sets; - while(cset!=NULL) { - if (rf_does_it_fit(cset, ac)) { - /* looks like it matches... */ - ac->next = cset->ac; - cset->ac = ac; - break; - } - cset = cset->next; - } - if (cset==NULL) { - /* didn't find a match above... new set..*/ - cset = (RF_ConfigSet_t *) - malloc(sizeof(RF_ConfigSet_t), - M_RAIDFRAME, M_NOWAIT); - if (cset == NULL) { - panic("rf_create_auto_sets: No memory!\n"); - } - cset->ac = ac; - ac->next = NULL; - cset->next = config_sets; - cset->rootable = 0; - config_sets = cset; - } - } - ac = ac_next; - } - - - return(config_sets); -} - -static int -rf_does_it_fit(cset, ac) - RF_ConfigSet_t *cset; - RF_AutoConfig_t *ac; -{ - RF_ComponentLabel_t *clabel1, *clabel2; - - /* If this one matches the *first* one in the set, that's good - enough, since the other members of the set would have been - through here too... */ - /* note that we are not checking partitionSize here.. - - Note that we are also not checking the mod_counters here. - If everything else matches execpt the mod_counter, that's - good enough for this test. We will deal with the mod_counters - a little later in the autoconfiguration process. - - (clabel1->mod_counter == clabel2->mod_counter) && - - The reason we don't check for this is that failed disks - will have lower modification counts. If those disks are - not added to the set they used to belong to, then they will - form their own set, which may result in 2 different sets, - for example, competing to be configured at raid0, and - perhaps competing to be the root filesystem set. If the - wrong ones get configured, or both attempt to become /, - weird behaviour and or serious lossage will occur. Thus we - need to bring them into the fold here, and kick them out at - a later point. - - */ - - clabel1 = cset->ac->clabel; - clabel2 = ac->clabel; - if ((clabel1->version == clabel2->version) && - (clabel1->serial_number == clabel2->serial_number) && - (clabel1->num_rows == clabel2->num_rows) && - (clabel1->num_columns == clabel2->num_columns) && - (clabel1->sectPerSU == clabel2->sectPerSU) && - (clabel1->SUsPerPU == clabel2->SUsPerPU) && - (clabel1->SUsPerRU == clabel2->SUsPerRU) && - (clabel1->parityConfig == clabel2->parityConfig) && - (clabel1->maxOutstanding == clabel2->maxOutstanding) && - (clabel1->blockSize == clabel2->blockSize) && - (clabel1->numBlocks == clabel2->numBlocks) && - (clabel1->autoconfigure == clabel2->autoconfigure) && - (clabel1->root_partition == clabel2->root_partition) && - (clabel1->last_unit == clabel2->last_unit) && - (clabel1->config_order == clabel2->config_order)) { - /* if it get's here, it almost *has* to be a match */ - } else { - /* it's not consistent with somebody in the set.. - punt */ - return(0); - } - /* all was fine.. it must fit... */ - return(1); -} - -int -rf_have_enough_components(cset) - RF_ConfigSet_t *cset; -{ - RF_AutoConfig_t *ac; - RF_AutoConfig_t *auto_config; - RF_ComponentLabel_t *clabel; - int r,c; - int num_rows; - int num_cols; - int num_missing; - int mod_counter; - int mod_counter_found; - int even_pair_failed; - char parity_type; - - - /* check to see that we have enough 'live' components - of this set. If so, we can configure it if necessary */ - - num_rows = cset->ac->clabel->num_rows; - num_cols = cset->ac->clabel->num_columns; - parity_type = cset->ac->clabel->parityConfig; - - /* XXX Check for duplicate components!?!?!? */ - - /* Determine what the mod_counter is supposed to be for this set. */ - - mod_counter_found = 0; - mod_counter = 0; - ac = cset->ac; - while(ac!=NULL) { - if (mod_counter_found==0) { - mod_counter = ac->clabel->mod_counter; - mod_counter_found = 1; - } else { - if (ac->clabel->mod_counter > mod_counter) { - mod_counter = ac->clabel->mod_counter; - } - } - ac = ac->next; - } - - num_missing = 0; - auto_config = cset->ac; - - for(r=0; r<num_rows; r++) { - even_pair_failed = 0; - for(c=0; c<num_cols; c++) { - ac = auto_config; - while(ac!=NULL) { - if ((ac->clabel->row == r) && - (ac->clabel->column == c) && - (ac->clabel->mod_counter == mod_counter)) { - /* it's this one... */ - rf_printf(1, "Found: %s at %d,%d\n", - ac->devname,r,c); - break; - } - ac=ac->next; - } - if (ac==NULL) { - /* Didn't find one here! */ - /* special case for RAID 1, especially - where there are more than 2 - components (where RAIDframe treats - things a little differently :( ) */ - if (parity_type == '1') { - if (c%2 == 0) { /* even component */ - even_pair_failed = 1; - } else { /* odd component. If - we're failed, and - so is the even - component, it's - "Good Night, Charlie" */ - if (even_pair_failed == 1) { - return(0); - } - } - } else { - /* normal accounting */ - num_missing++; - } - } - if ((parity_type == '1') && (c%2 == 1)) { - /* Just did an even component, and we didn't - bail.. reset the even_pair_failed flag, - and go on to the next component.... */ - even_pair_failed = 0; - } - } - } - - clabel = cset->ac->clabel; - - if (((clabel->parityConfig == '0') && (num_missing > 0)) || - ((clabel->parityConfig == '4') && (num_missing > 1)) || - ((clabel->parityConfig == '5') && (num_missing > 1))) { - /* XXX this needs to be made *much* more general */ - /* Too many failures */ - return(0); - } - /* otherwise, all is well, and we've got enough to take a kick - at autoconfiguring this set */ - return(1); -} - -void -rf_create_configuration(ac,config,raidPtr) - RF_AutoConfig_t *ac; - RF_Config_t *config; - RF_Raid_t *raidPtr; -{ - RF_ComponentLabel_t *clabel; - int i; - - clabel = ac->clabel; - - /* 1. Fill in the common stuff */ - config->numRow = clabel->num_rows; - config->numCol = clabel->num_columns; - config->numSpare = 0; /* XXX should this be set here? */ - config->sectPerSU = clabel->sectPerSU; - config->SUsPerPU = clabel->SUsPerPU; - config->SUsPerRU = clabel->SUsPerRU; - config->parityConfig = clabel->parityConfig; - /* XXX... */ - strcpy(config->diskQueueType,"fifo"); - config->maxOutstandingDiskReqs = clabel->maxOutstanding; - config->layoutSpecificSize = 0; /* XXX ? */ - - while(ac!=NULL) { - /* row/col values will be in range due to the checks - in reasonable_label() */ - strcpy(config->devnames[ac->clabel->row][ac->clabel->column], - ac->devname); - ac = ac->next; - } - - for(i=0;i<RF_MAXDBGV;i++) { - config->debugVars[i][0] = '\0'; - } -} - -int -rf_set_autoconfig(raidPtr, new_value) - RF_Raid_t *raidPtr; - int new_value; -{ - RF_ComponentLabel_t *clabel; - struct vnode *vp; - dev_t dev; - int row, column; - - MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t), - M_RAIDFRAME, M_WAITOK | M_ZERO); - - raidPtr->autoconfigure = new_value; - for(row=0; row<raidPtr->numRow; row++) { - for(column=0; column<raidPtr->numCol; column++) { - if (raidPtr->Disks[row][column].status == - rf_ds_optimal) { - dev = raidPtr->Disks[row][column].dev; - vp = raidPtr->raid_cinfo[row][column].ci_vp; - raidread_component_label(dev, vp, clabel); - clabel->autoconfigure = new_value; - raidwrite_component_label(dev, vp, clabel); - } - } - } - FREE(clabel, M_RAIDFRAME); - return(new_value); -} - -int -rf_set_rootpartition(raidPtr, new_value) - RF_Raid_t *raidPtr; - int new_value; -{ - RF_ComponentLabel_t *clabel; - struct vnode *vp; - dev_t dev; - int row, column; - - MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t), - M_RAIDFRAME, M_WAITOK | M_ZERO); - - raidPtr->root_partition = new_value; - for(row=0; row<raidPtr->numRow; row++) { - for(column=0; column<raidPtr->numCol; column++) { - if (raidPtr->Disks[row][column].status == - rf_ds_optimal) { - dev = raidPtr->Disks[row][column].dev; - vp = raidPtr->raid_cinfo[row][column].ci_vp; - raidread_component_label(dev, vp, clabel); - clabel->root_partition = new_value; - raidwrite_component_label(dev, vp, clabel); - } - } - } - FREE(clabel, M_RAIDFRAME); - return(new_value); -} - -void -rf_release_all_vps(cset) - RF_ConfigSet_t *cset; -{ - RF_AutoConfig_t *ac; - struct thread *td; - - td = curthread; - ac = cset->ac; - while(ac!=NULL) { - /* Close the vp, and give it back */ - if (ac->vp) { - VOP_CLOSE(ac->vp, FREAD, td->td_ucred, td); - vrele(ac->vp); - ac->vp = NULL; - } - ac = ac->next; - } -} - - -void -rf_cleanup_config_set(cset) - RF_ConfigSet_t *cset; -{ - RF_AutoConfig_t *ac; - RF_AutoConfig_t *next_ac; - - ac = cset->ac; - while(ac!=NULL) { - next_ac = ac->next; - /* nuke the label */ - free(ac->clabel, M_RAIDFRAME); - /* cleanup the config structure */ - free(ac, M_RAIDFRAME); - /* "next.." */ - ac = next_ac; - } - /* and, finally, nuke the config set */ - free(cset, M_RAIDFRAME); -} - - -void -raid_init_component_label(raidPtr, clabel) - RF_Raid_t *raidPtr; - RF_ComponentLabel_t *clabel; -{ - /* current version number */ - clabel->version = RF_COMPONENT_LABEL_VERSION; - clabel->serial_number = raidPtr->serial_number; - clabel->mod_counter = raidPtr->mod_counter; - clabel->num_rows = raidPtr->numRow; - clabel->num_columns = raidPtr->numCol; - clabel->clean = RF_RAID_DIRTY; /* not clean */ - clabel->status = rf_ds_optimal; /* "It's good!" */ - - clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; - clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; - clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; - - clabel->blockSize = raidPtr->bytesPerSector; - clabel->numBlocks = raidPtr->sectorsPerDisk; - - /* XXX not portable */ - clabel->parityConfig = raidPtr->Layout.map->parityConfig; - clabel->maxOutstanding = raidPtr->maxOutstanding; - clabel->autoconfigure = raidPtr->autoconfigure; - clabel->root_partition = raidPtr->root_partition; - clabel->last_unit = raidPtr->raidid; - clabel->config_order = raidPtr->config_order; -} - -int -rf_auto_config_set(cset, unit, parent_sc) - RF_ConfigSet_t *cset; - int *unit; - struct raidctl_softc *parent_sc; -{ - int retcode = 0; - RF_Raid_t *raidPtr; - RF_Config_t *config; - int raidID; - - rf_printf(0, "RAIDframe autoconfigure\n"); - - *unit = -1; - - /* 1. Create a config structure */ - - config = (RF_Config_t *)malloc(sizeof(RF_Config_t), M_RAIDFRAME, - M_NOWAIT|M_ZERO); - if (config==NULL) { - rf_printf(0, "Out of mem at rf_auto_config_set\n"); - /* XXX do something more intelligent here. */ - return(1); - } - - /* XXX raidID needs to be set correctly.. */ - - /* - 2. Figure out what RAID ID this one is supposed to live at - See if we can get the same RAID dev that it was configured - on last time.. - */ - - raidID = cset->ac->clabel->last_unit; - if (raidID < 0) { - /* let's not wander off into lala land. */ - raidID = raidgetunit(parent_sc, 0); - } else { - raidID = raidgetunit(parent_sc, raidID); - } - - if (raidID < 0) { - /* punt... */ - rf_printf(0, "Unable to auto configure this set!\n"); - rf_printf(1, "Out of RAID devs!\n"); - return(1); - } - rf_printf(0, "Configuring raid%d:\n",raidID); - RF_Malloc(raidPtr, sizeof(*raidPtr), (RF_Raid_t *)); - if (raidPtr == NULL) { - rf_printf(0, "Out of mem at rf_auto_config_set\n"); - return (1); - } - bzero((char *)raidPtr, sizeof(RF_Raid_t)); - - /* XXX all this stuff should be done SOMEWHERE ELSE! */ - raidPtr->raidid = raidID; - raidPtr->openings = RAIDOUTSTANDING; - - /* 3. Build the configuration structure */ - rf_create_configuration(cset->ac, config, raidPtr); - - /* 4. Do the configuration */ - retcode = rf_Configure(raidPtr, config, cset->ac); - - if (retcode == 0) { - - parent_sc->sc_raiddevs[raidID] = raidinit(raidPtr); - if (parent_sc->sc_raiddevs[raidID] == NULL) { - rf_printf(0, "Could not create RAID device\n"); - RF_Free(raidPtr, sizeof(RF_Raid_t)); - free(config, M_RAIDFRAME); - return (1); - } - - parent_sc->sc_numraid++; - ((struct raid_softc *)raidPtr->sc)->sc_parent_dev = - parent_sc->sc_dev; - rf_markalldirty(raidPtr); - raidPtr->autoconfigure = 1; /* XXX do this here? */ - if (cset->ac->clabel->root_partition==1) { - /* everything configured just fine. Make a note - that this set is eligible to be root. */ - cset->rootable = 1; - /* XXX do this here? */ - raidPtr->root_partition = 1; - } - } - - /* 5. Cleanup */ - free(config, M_RAIDFRAME); - - *unit = raidID; - return(retcode); -} - -void -rf_disk_unbusy(desc) - RF_RaidAccessDesc_t *desc; -{ - struct raid_softc *sc; - struct bio *bp; - - sc = desc->raidPtr->sc; - bp = (struct bio *)desc->bp; -} - -/* - * Get the next available unit number from the bitmap. You can also request - * a particular unit number by passing it in the second arg. If it's not - * available, then grab the next free one. Return -1 if none are available. - */ -static int -raidgetunit(struct raidctl_softc *parent_sc, int id) -{ - int i; - - if (id >= RF_MAX_ARRAYS) - return (-1); - - for (i = id; i < RF_MAX_ARRAYS; i++) { - if (parent_sc->sc_raiddevs[i] == NULL) - return (i); - } - - if (id != 0) { - for (i = 0; i < id; i++) { - if (parent_sc->sc_raiddevs[i] == NULL) - return (i); - } - } - - return (-1); -} - -static int -raidshutdown(void) -{ - struct raidctl_softc *parent_sc; - int i, error = 0; - - parent_sc = raidctl_dev->si_drv1; - - if (parent_sc->sc_numraid != 0) { -#if XXX_KTHREAD_EXIT_RACE - return (EBUSY); -#else - for (i = 0; i < RF_MAX_ARRAYS; i++) { - if (parent_sc->sc_raiddevs[i] != NULL) { - rf_printf(0, "Shutting down raid%d\n", i); - error = raidctlioctl(raidctl_dev, - RAIDFRAME_SHUTDOWN, (caddr_t)&i, 0, NULL); - if (error) - return (error); - if (parent_sc->sc_numraid == 0) - break; - } - } -#endif - } - - destroy_dev(raidctl_dev); - - return (error); -} - -int -raid_getcomponentsize(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col) -{ - struct vnode *vp; - struct vattr va; - RF_Thread_t td; - off_t mediasize; - u_int secsize; - int retcode; - - td = raidPtr->engine_thread; - - retcode = raidlookup(raidPtr->Disks[row][col].devname, td, &vp); - - if (retcode) { - printf("raid%d: rebuilding: raidlookup on device: %s failed: %d!\n",raidPtr->raidid, - raidPtr->Disks[row][col].devname, retcode); - - /* XXX the component isn't responding properly... - must be still dead :-( */ - raidPtr->reconInProgress--; - return(retcode); - - } else { - - /* Ok, so we can at least do a lookup... - How about actually getting a vp for it? */ - - if ((retcode = VOP_GETATTR(vp, &va, rf_getucred(td), - td)) != 0) { - raidPtr->reconInProgress--; - return(retcode); - } - - retcode = VOP_IOCTL(vp, DIOCGSECTORSIZE, (caddr_t)&secsize, - FREAD, rf_getucred(td), td); - if (retcode) - return (retcode); - raidPtr->Disks[row][col].blockSize = secsize; - - retcode = VOP_IOCTL(vp, DIOCGMEDIASIZE, (caddr_t)&mediasize, - FREAD, rf_getucred(td), td); - if (retcode) - return (retcode); - raidPtr->Disks[row][col].numBlocks = mediasize / secsize; - - raidPtr->raid_cinfo[row][col].ci_vp = vp; - raidPtr->raid_cinfo[row][col].ci_dev = udev2dev(va.va_rdev); - raidPtr->Disks[row][col].dev = udev2dev(va.va_rdev); - - /* we allow the user to specify that only a - fraction of the disks should be used this is - just for debug: it speeds up - * the parity scan */ - raidPtr->Disks[row][col].numBlocks = - raidPtr->Disks[row][col].numBlocks * - rf_sizePercentage / 100; - } - - return(retcode); -} - -static int -raid_modevent(mod, type, data) - module_t mod; - int type; - void *data; -{ - int error = 0; - - switch (type) { - case MOD_LOAD: - raidattach(); - break; - - case MOD_UNLOAD: - case MOD_SHUTDOWN: - error = raidshutdown(); - break; - - default: - break; - } - - return (error); -} - -moduledata_t raid_mod = { - "raidframe", - (modeventhand_t) raid_modevent, - 0}; - -DECLARE_MODULE(raidframe, raid_mod, SI_SUB_RAID, SI_ORDER_MIDDLE); diff --git a/sys/dev/raidframe/rf_freelist.h b/sys/dev/raidframe/rf_freelist.h deleted file mode 100644 index 13a5e83..0000000 --- a/sys/dev/raidframe/rf_freelist.h +++ /dev/null @@ -1,702 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_freelist.h,v 1.6 2002/08/08 02:53:01 oster Exp $ */ -/* - * rf_freelist.h - */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * rf_freelist.h -- code to manage counted freelists - * - * Keep an arena of fixed-size objects. When a new object is needed, - * allocate it as necessary. When an object is freed, either put it - * in the arena, or really free it, depending on the maximum arena - * size. - */ - -#ifndef _RF__RF_FREELIST_H_ -#define _RF__RF_FREELIST_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_threadstuff.h> - -#define RF_FREELIST_STATS 0 - -#if RF_FREELIST_STATS > 0 -typedef struct RF_FreeListStats_s { - char *file; - int line; - int allocations; - int frees; - int max_free; - int grows; - int outstanding; - int max_outstanding; -} RF_FreeListStats_t; -#define RF_FREELIST_STAT_INIT(_fl_) { \ - bzero((char *)&((_fl_)->stats), sizeof(RF_FreeListStats_t)); \ - (_fl_)->stats.file = __FILE__; \ - (_fl_)->stats.line = __LINE__; \ -} - -#define RF_FREELIST_STAT_ALLOC(_fl_) { \ - (_fl_)->stats.allocations++; \ - (_fl_)->stats.outstanding++; \ - if ((_fl_)->stats.outstanding > (_fl_)->stats.max_outstanding) \ - (_fl_)->stats.max_outstanding = (_fl_)->stats.outstanding; \ -} - -#define RF_FREELIST_STAT_FREE_UPDATE(_fl_) { \ - if ((_fl_)->free_cnt > (_fl_)->stats.max_free) \ - (_fl_)->stats.max_free = (_fl_)->free_cnt; \ -} - -#define RF_FREELIST_STAT_FREE(_fl_) { \ - (_fl_)->stats.frees++; \ - (_fl_)->stats.outstanding--; \ - RF_FREELIST_STAT_FREE_UPDATE(_fl_); \ -} - -#define RF_FREELIST_STAT_GROW(_fl_) { \ - (_fl_)->stats.grows++; \ - RF_FREELIST_STAT_FREE_UPDATE(_fl_); \ -} - -#define RF_FREELIST_STAT_REPORT(_fl_) { \ - printf("Freelist at %s %d (%s)\n", (_fl_)->stats.file, (_fl_)->stats.line, RF_STRING(_fl_)); \ - printf(" %d allocations, %d frees\n", (_fl_)->stats.allocations, (_fl_)->stats.frees); \ - printf(" %d grows\n", (_fl_)->stats.grows); \ - printf(" %d outstanding\n", (_fl_)->stats.outstanding); \ - printf(" %d free (max)\n", (_fl_)->stats.max_free); \ - printf(" %d outstanding (max)\n", (_fl_)->stats.max_outstanding); \ -} - -#else /* RF_FREELIST_STATS > 0 */ - -#define RF_FREELIST_STAT_INIT(_fl_) -#define RF_FREELIST_STAT_ALLOC(_fl_) -#define RF_FREELIST_STAT_FREE_UPDATE(_fl_) -#define RF_FREELIST_STAT_FREE(_fl_) -#define RF_FREELIST_STAT_GROW(_fl_) -#define RF_FREELIST_STAT_REPORT(_fl_) - -#endif /* RF_FREELIST_STATS > 0 */ - -struct RF_FreeList_s { - void *objlist; /* list of free obj */ - int free_cnt; /* how many free obj */ - int max_free_cnt; /* max free arena size */ - int obj_inc; /* how many to allocate at a time */ - int obj_size; /* size of objects */ - RF_DECLARE_MUTEX(lock) -#if RF_FREELIST_STATS > 0 - RF_FreeListStats_t stats; /* statistics */ -#endif /* RF_FREELIST_STATS > 0 */ -}; -/* - * fl = freelist - * maxcnt = max number of items in arena - * inc = how many to allocate at a time - * size = size of object - */ -#define RF_FREELIST_CREATE(_fl_,_maxcnt_,_inc_,_size_) { \ - int rc; \ - RF_ASSERT((_inc_) > 0); \ - RF_Malloc(_fl_, sizeof(RF_FreeList_t), (RF_FreeList_t *)); \ - (_fl_)->objlist = NULL; \ - (_fl_)->free_cnt = 0; \ - (_fl_)->max_free_cnt = _maxcnt_; \ - (_fl_)->obj_inc = _inc_; \ - (_fl_)->obj_size = _size_; \ - rc = rf_mutex_init(&(_fl_)->lock, "RF_FREELIST"); \ - if (rc) { \ - RF_Free(_fl_, sizeof(RF_FreeList_t)); \ - _fl_ = NULL; \ - } \ - RF_FREELIST_STAT_INIT(_fl_); \ -} - -/* - * fl = freelist - * cnt = number to prime with - * nextp = name of "next" pointer in obj - * cast = object cast - */ -#define RF_FREELIST_PRIME(_fl_,_cnt_,_nextp_,_cast_) { \ - void *_p; \ - int _i; \ - for(_i=0;_i<(_cnt_);_i++) { \ - RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \ - if (_p) { \ - RF_LOCK_MUTEX((_fl_)->lock); \ - (_cast_(_p))->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = _p; \ - (_fl_)->free_cnt++; \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - else { \ - break; \ - } \ - } \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_FREELIST_STAT_FREE_UPDATE(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} - -#define RF_FREELIST_MUTEX_OF(_fl_) ((_fl_)->lock) - -#define RF_FREELIST_DO_UNLOCK(_fl_) { \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} - -#define RF_FREELIST_DO_LOCK(_fl_) { \ - RF_LOCK_MUTEX((_fl_)->lock); \ -} - -/* - * fl = freelist - * cnt = number to prime with - * nextp = name of "next" pointer in obj - * cast = object cast - * init = func to call to init obj - */ -#define RF_FREELIST_PRIME_INIT(_fl_,_cnt_,_nextp_,_cast_,_init_) { \ - void *_p; \ - int _i; \ - for(_i=0;_i<(_cnt_);_i++) { \ - RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \ - if (_init_ (_cast_ _p)) { \ - RF_Free(_p,(_fl_)->obj_size); \ - _p = NULL; \ - } \ - if (_p) { \ - RF_LOCK_MUTEX((_fl_)->lock); \ - (_cast_(_p))->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = _p; \ - (_fl_)->free_cnt++; \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - else { \ - break; \ - } \ - } \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_FREELIST_STAT_FREE_UPDATE(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} - -/* - * fl = freelist - * cnt = number to prime with - * nextp = name of "next" pointer in obj - * cast = object cast - * init = func to call to init obj - * arg = arg to init obj func - */ -#define RF_FREELIST_PRIME_INIT_ARG(_fl_,_cnt_,_nextp_,_cast_,_init_,_arg_) { \ - void *_p; \ - int _i; \ - for(_i=0;_i<(_cnt_);_i++) { \ - RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \ - if (_init_ (_cast_ _p,_arg_)) { \ - RF_Free(_p,(_fl_)->obj_size); \ - _p = NULL; \ - } \ - if (_p) { \ - RF_LOCK_MUTEX((_fl_)->lock); \ - (_cast_(_p))->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = _p; \ - (_fl_)->free_cnt++; \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - else { \ - break; \ - } \ - } \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_FREELIST_STAT_FREE_UPDATE(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} - -/* - * fl = freelist - * obj = object to allocate - * nextp = name of "next" pointer in obj - * cast = cast of obj assignment - * init = init obj func - */ -#define RF_FREELIST_GET_INIT(_fl_,_obj_,_nextp_,_cast_,_init_) { \ - void *_p; \ - int _i; \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \ - if (_fl_->objlist) { \ - _obj_ = _cast_((_fl_)->objlist); \ - (_fl_)->objlist = (void *)((_obj_)->_nextp_); \ - (_fl_)->free_cnt--; \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - else { \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - /* \ - * Allocate one at a time so we can free \ - * one at a time without cleverness when arena \ - * is full. \ - */ \ - RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \ - if (_obj_) { \ - if (_init_ (_obj_)) { \ - RF_Free(_obj_,(_fl_)->obj_size); \ - _obj_ = NULL; \ - } \ - else { \ - for(_i=1;_i<(_fl_)->obj_inc;_i++) { \ - RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \ - if (_p) { \ - if (_init_ (_p)) { \ - RF_Free(_p,(_fl_)->obj_size); \ - _p = NULL; \ - break; \ - } \ - RF_LOCK_MUTEX((_fl_)->lock); \ - (_cast_(_p))->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = _p; \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - else { \ - break; \ - } \ - } \ - } \ - } \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_FREELIST_STAT_GROW(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_FREELIST_STAT_ALLOC(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} - -/* - * fl = freelist - * obj = object to allocate - * nextp = name of "next" pointer in obj - * cast = cast of obj assignment - * init = init obj func - * arg = arg to init obj func - */ -#define RF_FREELIST_GET_INIT_ARG(_fl_,_obj_,_nextp_,_cast_,_init_,_arg_) { \ - void *_p; \ - int _i; \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \ - if (_fl_->objlist) { \ - _obj_ = _cast_((_fl_)->objlist); \ - (_fl_)->objlist = (void *)((_obj_)->_nextp_); \ - (_fl_)->free_cnt--; \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - else { \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - /* \ - * Allocate one at a time so we can free \ - * one at a time without cleverness when arena \ - * is full. \ - */ \ - RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \ - if (_obj_) { \ - if (_init_ (_obj_,_arg_)) { \ - RF_Free(_obj_,(_fl_)->obj_size); \ - _obj_ = NULL; \ - } \ - else { \ - for(_i=1;_i<(_fl_)->obj_inc;_i++) { \ - RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \ - if (_p) { \ - if (_init_ (_p,_arg_)) { \ - RF_Free(_p,(_fl_)->obj_size); \ - _p = NULL; \ - break; \ - } \ - RF_LOCK_MUTEX((_fl_)->lock); \ - (_cast_(_p))->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = _p; \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - else { \ - break; \ - } \ - } \ - } \ - } \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_FREELIST_STAT_GROW(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_FREELIST_STAT_ALLOC(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} - -/* - * fl = freelist - * obj = object to allocate - * nextp = name of "next" pointer in obj - * cast = cast of obj assignment - * init = init obj func - */ -#define RF_FREELIST_GET_INIT_NOUNLOCK(_fl_,_obj_,_nextp_,_cast_,_init_) { \ - void *_p; \ - int _i; \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \ - if (_fl_->objlist) { \ - _obj_ = _cast_((_fl_)->objlist); \ - (_fl_)->objlist = (void *)((_obj_)->_nextp_); \ - (_fl_)->free_cnt--; \ - } \ - else { \ - /* \ - * Allocate one at a time so we can free \ - * one at a time without cleverness when arena \ - * is full. \ - */ \ - RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \ - if (_obj_) { \ - if (_init_ (_obj_)) { \ - RF_Free(_obj_,(_fl_)->obj_size); \ - _obj_ = NULL; \ - } \ - else { \ - for(_i=1;_i<(_fl_)->obj_inc;_i++) { \ - RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \ - if (_p) { \ - if (_init_ (_p)) { \ - RF_Free(_p,(_fl_)->obj_size); \ - _p = NULL; \ - break; \ - } \ - (_cast_(_p))->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = _p; \ - } \ - else { \ - break; \ - } \ - } \ - } \ - } \ - RF_FREELIST_STAT_GROW(_fl_); \ - } \ - RF_FREELIST_STAT_ALLOC(_fl_); \ -} - -/* - * fl = freelist - * obj = object to allocate - * nextp = name of "next" pointer in obj - * cast = cast of obj assignment - */ -#define RF_FREELIST_GET(_fl_,_obj_,_nextp_,_cast_) { \ - void *_p; \ - int _i; \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \ - if (_fl_->objlist) { \ - _obj_ = _cast_((_fl_)->objlist); \ - (_fl_)->objlist = (void *)((_obj_)->_nextp_); \ - (_fl_)->free_cnt--; \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - else { \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - /* \ - * Allocate one at a time so we can free \ - * one at a time without cleverness when arena \ - * is full. \ - */ \ - RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \ - if (_obj_) { \ - for(_i=1;_i<(_fl_)->obj_inc;_i++) { \ - RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \ - if (_p) { \ - RF_LOCK_MUTEX((_fl_)->lock); \ - (_cast_(_p))->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = _p; \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - else { \ - break; \ - } \ - } \ - } \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_FREELIST_STAT_GROW(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_FREELIST_STAT_ALLOC(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} - -/* - * fl = freelist - * obj = object to allocate - * nextp = name of "next" pointer in obj - * cast = cast of obj assignment - * num = num objs to return - */ -#define RF_FREELIST_GET_N(_fl_,_obj_,_nextp_,_cast_,_num_) { \ - void *_p, *_l, *_f; \ - int _i, _n; \ - _l = _f = NULL; \ - _n = 0; \ - RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \ - for(_n=0;_n<_num_;_n++) { \ - RF_LOCK_MUTEX((_fl_)->lock); \ - if (_fl_->objlist) { \ - _obj_ = _cast_((_fl_)->objlist); \ - (_fl_)->objlist = (void *)((_obj_)->_nextp_); \ - (_fl_)->free_cnt--; \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - else { \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - /* \ - * Allocate one at a time so we can free \ - * one at a time without cleverness when arena \ - * is full. \ - */ \ - RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \ - if (_obj_) { \ - for(_i=1;_i<(_fl_)->obj_inc;_i++) { \ - RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \ - if (_p) { \ - RF_LOCK_MUTEX((_fl_)->lock); \ - (_cast_(_p))->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = _p; \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - else { \ - break; \ - } \ - } \ - } \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_FREELIST_STAT_GROW(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ - RF_LOCK_MUTEX((_fl_)->lock); \ - if (_f == NULL) \ - _f = _obj_; \ - if (_obj_) { \ - (_cast_(_obj_))->_nextp_ = _l; \ - _l = _obj_; \ - RF_FREELIST_STAT_ALLOC(_fl_); \ - } \ - else { \ - (_cast_(_f))->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = _l; \ - _n = _num_; \ - } \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ - } \ -} - -/* - * fl = freelist - * obj = object to free - * nextp = name of "next" pointer in obj - */ -#define RF_FREELIST_FREE(_fl_,_obj_,_nextp_) { \ - RF_LOCK_MUTEX((_fl_)->lock); \ - if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \ - RF_Free(_obj_,(_fl_)->obj_size); \ - } \ - else { \ - RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \ - (_obj_)->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = (void *)(_obj_); \ - (_fl_)->free_cnt++; \ - } \ - RF_FREELIST_STAT_FREE(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} - -/* - * fl = freelist - * obj = object to free - * nextp = name of "next" pointer in obj - * num = num to free (debugging) - */ -#define RF_FREELIST_FREE_N(_fl_,_obj_,_nextp_,_cast_,_num_) { \ - void *_no; \ - int _n; \ - _n = 0; \ - RF_LOCK_MUTEX((_fl_)->lock); \ - while(_obj_) { \ - _no = (_cast_(_obj_))->_nextp_; \ - if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \ - RF_Free(_obj_,(_fl_)->obj_size); \ - } \ - else { \ - RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \ - (_obj_)->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = (void *)(_obj_); \ - (_fl_)->free_cnt++; \ - } \ - _n++; \ - _obj_ = _no; \ - RF_FREELIST_STAT_FREE(_fl_); \ - } \ - RF_ASSERT(_n==(_num_)); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} - -/* - * fl = freelist - * obj = object to free - * nextp = name of "next" pointer in obj - * clean = undo for init - */ -#define RF_FREELIST_FREE_CLEAN(_fl_,_obj_,_nextp_,_clean_) { \ - RF_LOCK_MUTEX((_fl_)->lock); \ - if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \ - _clean_ (_obj_); \ - RF_Free(_obj_,(_fl_)->obj_size); \ - } \ - else { \ - RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \ - (_obj_)->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = (void *)(_obj_); \ - (_fl_)->free_cnt++; \ - } \ - RF_FREELIST_STAT_FREE(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} - -/* - * fl = freelist - * obj = object to free - * nextp = name of "next" pointer in obj - * clean = undo for init - * arg = arg for undo func - */ -#define RF_FREELIST_FREE_CLEAN_ARG(_fl_,_obj_,_nextp_,_clean_,_arg_) { \ - RF_LOCK_MUTEX((_fl_)->lock); \ - if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \ - _clean_ (_obj_,_arg_); \ - RF_Free(_obj_,(_fl_)->obj_size); \ - } \ - else { \ - RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \ - (_obj_)->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = (void *)(_obj_); \ - (_fl_)->free_cnt++; \ - } \ - RF_FREELIST_STAT_FREE(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} - -/* - * fl = freelist - * obj = object to free - * nextp = name of "next" pointer in obj - * clean = undo for init - */ -#define RF_FREELIST_FREE_CLEAN_NOUNLOCK(_fl_,_obj_,_nextp_,_clean_) { \ - RF_LOCK_MUTEX((_fl_)->lock); \ - if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \ - _clean_ (_obj_); \ - RF_Free(_obj_,(_fl_)->obj_size); \ - } \ - else { \ - RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \ - (_obj_)->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = (void *)(_obj_); \ - (_fl_)->free_cnt++; \ - } \ - RF_FREELIST_STAT_FREE(_fl_); \ -} - -/* - * fl = freelist - * nextp = name of "next" pointer in obj - * cast = cast to object type - */ -#define RF_FREELIST_DESTROY(_fl_,_nextp_,_cast_) { \ - void *_cur, *_next; \ - RF_FREELIST_STAT_REPORT(_fl_); \ - rf_mutex_destroy(&((_fl_)->lock)); \ - for(_cur=(_fl_)->objlist;_cur;_cur=_next) { \ - _next = (_cast_ _cur)->_nextp_; \ - RF_Free(_cur,(_fl_)->obj_size); \ - } \ - RF_Free(_fl_,sizeof(RF_FreeList_t)); \ -} - -/* - * fl = freelist - * nextp = name of "next" pointer in obj - * cast = cast to object type - * clean = func to undo obj init - */ -#define RF_FREELIST_DESTROY_CLEAN(_fl_,_nextp_,_cast_,_clean_) { \ - void *_cur, *_next; \ - RF_FREELIST_STAT_REPORT(_fl_); \ - rf_mutex_destroy(&((_fl_)->lock)); \ - for(_cur=(_fl_)->objlist;_cur;_cur=_next) { \ - _next = (_cast_ _cur)->_nextp_; \ - _clean_ (_cur); \ - RF_Free(_cur,(_fl_)->obj_size); \ - } \ - RF_Free(_fl_,sizeof(RF_FreeList_t)); \ -} - -/* - * fl = freelist - * nextp = name of "next" pointer in obj - * cast = cast to object type - * clean = func to undo obj init - * arg = arg for undo func - */ -#define RF_FREELIST_DESTROY_CLEAN_ARG(_fl_,_nextp_,_cast_,_clean_,_arg_) { \ - void *_cur, *_next; \ - RF_FREELIST_STAT_REPORT(_fl_); \ - rf_mutex_destroy(&((_fl_)->lock)); \ - for(_cur=(_fl_)->objlist;_cur;_cur=_next) { \ - _next = (_cast_ _cur)->_nextp_; \ - _clean_ (_cur,_arg_); \ - RF_Free(_cur,(_fl_)->obj_size); \ - } \ - RF_Free(_fl_,sizeof(RF_FreeList_t)); \ -} - -#endif /* !_RF__RF_FREELIST_H_ */ diff --git a/sys/dev/raidframe/rf_general.h b/sys/dev/raidframe/rf_general.h deleted file mode 100644 index e709899..0000000 --- a/sys/dev/raidframe/rf_general.h +++ /dev/null @@ -1,107 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_general.h,v 1.6 2000/12/15 02:12:58 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * rf_general.h -- some general-use definitions - */ - -/*#define NOASSERT*/ - -#ifndef _RF__RF_GENERAL_H_ -#define _RF__RF_GENERAL_H_ - -/* error reporting and handling */ - -#ifdef _KERNEL -#include<sys/systm.h> /* printf, sprintf, and friends */ -#endif - -#define RF_ERRORMSG(s) printf((s)) -#define RF_ERRORMSG1(s,a) printf((s),(a)) -#define RF_ERRORMSG2(s,a,b) printf((s),(a),(b)) -#define RF_ERRORMSG3(s,a,b,c) printf((s),(a),(b),(c)) - -void rf_print_panic_message(int, char *); -void rf_print_assert_panic_message(int, char *, char *); - -extern char rf_panicbuf[]; -#define RF_PANIC() {rf_print_panic_message(__LINE__,__FILE__); panic(rf_panicbuf);} - -#ifdef _KERNEL -#ifdef RF_ASSERT -#undef RF_ASSERT -#endif /* RF_ASSERT */ -#ifndef NOASSERT -#define RF_ASSERT(_x_) { \ - if (!(_x_)) { \ - rf_print_assert_panic_message(__LINE__, __FILE__, #_x_); \ - panic(rf_panicbuf); \ - } \ -} -#else /* !NOASSERT */ -#define RF_ASSERT(x) {/*noop*/} -#endif /* !NOASSERT */ -#else /* _KERNEL */ -#define RF_ASSERT(x) {/*noop*/} -#endif /* _KERNEL */ - -/* random stuff */ -#define RF_MAX(a,b) (((a) > (b)) ? (a) : (b)) -#define RF_MIN(a,b) (((a) < (b)) ? (a) : (b)) - -/* divide-by-zero check */ -#define RF_DB0_CHECK(a,b) ( ((b)==0) ? 0 : (a)/(b) ) - -/* get time of day */ -#define RF_GETTIME(_t) microtime(&(_t)) - -/* - * zero memory- not all bzero calls go through here, only - * those which in the kernel may have a user address - */ - -#define RF_BZERO(_bp,_b,_l) bzero(_b,_l) /* XXX This is likely - * incorrect. GO */ - -#if defined(__FreeBSD__) -#define NBPG PAGE_SIZE -#endif - -#define RF_UL(x) ((unsigned long) (x)) -#define RF_PGMASK RF_UL(NBPG-1) -#define RF_BLIP(x) (NBPG - (RF_UL(x) & RF_PGMASK)) /* bytes left in page */ -#define RF_PAGE_ALIGNED(x) ((RF_UL(x) & RF_PGMASK) == 0) - -#ifdef __STDC__ -#define RF_STRING(_str_) #_str_ -#else /* __STDC__ */ -#define RF_STRING(_str_) "_str_" -#endif /* __STDC__ */ - -#endif /* !_RF__RF_GENERAL_H_ */ diff --git a/sys/dev/raidframe/rf_geniq.c b/sys/dev/raidframe/rf_geniq.c deleted file mode 100644 index c21cb1b..0000000 --- a/sys/dev/raidframe/rf_geniq.c +++ /dev/null @@ -1,165 +0,0 @@ -/* $NetBSD: rf_geniq.c,v 1.3 1999/02/05 00:06:12 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Daniel Stodolsky - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_geniq.c - * code which implements Reed-Solomon encoding for RAID level 6 - */ - - -#define RF_UTILITY 1 -#include <dev/raidframe/rf_pqdeg.h> - -/* - five bit lfsr - poly - feedback connections - - val = value; -*/ -int -lsfr_shift(val, poly) - unsigned val, poly; -{ - unsigned new; - unsigned int i; - unsigned high = (val >> 4) & 1; - unsigned bit; - - new = (poly & 1) ? high : 0; - - for (i = 1; i <= 4; i++) { - bit = (val >> (i - 1)) & 1; - if (poly & (1 << i)) /* there is a feedback connection */ - new = new | ((bit ^ high) << i); - else - new = new | (bit << i); - } - return new; -} -/* generate Q matricies for the data */ - -RF_ua32_t rf_qfor[32]; - -void -main() -{ - unsigned int i, j, l, a, b; - unsigned int val; - unsigned int r; - unsigned int m, p, q; - - RF_ua32_t k; - - printf("/*\n"); - printf(" * rf_invertq.h\n"); - printf(" */\n"); - printf("/*\n"); - printf(" * GENERATED FILE -- DO NOT EDIT\n"); - printf(" */\n"); - printf("\n"); - printf("#ifndef _RF__RF_INVERTQ_H_\n"); - printf("#define _RF__RF_INVERTQ_H_\n"); - printf("\n"); - printf("/*\n"); - printf(" * rf_geniq.c must include rf_archs.h before including\n"); - printf(" * this file (to get VPATH magic right with the way we\n"); - printf(" * generate this file in kernel trees)\n"); - printf(" */\n"); - printf("/* #include \"rf_archs.h\" */\n"); - printf("\n"); - printf("#if (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0)\n"); - printf("\n"); - printf("#define RF_Q_COLS 32\n"); - printf("RF_ua32_t rf_rn = {\n"); - k[0] = 1; - for (j = 0; j < 31; j++) - k[j + 1] = lsfr_shift(k[j], 5); - for (j = 0; j < 32; j++) - printf("%d, ", k[j]); - printf("};\n"); - - printf("RF_ua32_t rf_qfor[32] = {\n"); - for (i = 0; i < 32; i++) { - printf("/* i = %d */ { 0, ", i); - rf_qfor[i][0] = 0; - for (j = 1; j < 32; j++) { - val = j; - for (l = 0; l < i; l++) - val = lsfr_shift(val, 5); - rf_qfor[i][j] = val; - printf("%d, ", val); - } - printf("},\n"); - } - printf("};\n"); - printf("#define RF_Q_DATA_COL(col_num) rf_rn[col_num],rf_qfor[28-(col_num)]\n"); - - /* generate the inverse tables. (i,j,p,q) */ - /* The table just stores a. Get b back from the parity */ - printf("#ifdef KERNEL\n"); - printf("RF_ua1024_t rf_qinv[1]; /* don't compile monster table into kernel */\n"); - printf("#elif defined(NO_PQ)\n"); - printf("RF_ua1024_t rf_qinv[29*29];\n"); - printf("#else /* !KERNEL && NO_PQ */\n"); - printf("RF_ua1024_t rf_qinv[29*29] = {\n"); - for (i = 0; i < 29; i++) { - for (j = 0; j < 29; j++) { - printf("/* i %d, j %d */{ ", i, j); - if (i == j) - for (l = 0; l < 1023; l++) - printf("0, "); - else { - for (p = 0; p < 32; p++) - for (q = 0; q < 32; q++) { - /* What are a, b such that a ^ - * b = p; and qfor[(28-i)][a - * ^ rf_rn[i+1]] ^ - * qfor[(28-j)][b ^ - * rf_rn[j+1]] = q. Solve by - * guessing a. Then testing. */ - for (a = 0; a < 32; a++) { - b = a ^ p; - if ((rf_qfor[28 - i][a ^ k[i + 1]] ^ rf_qfor[28 - j][b ^ k[j + 1]]) == q) - break; - } - if (a == 32) - printf("unable to solve %d %d %d %d\n", i, j, p, q); - printf("%d,", a); - } - } - printf("},\n"); - } - } - printf("};\n"); - printf("\n#endif /* (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */\n\n"); - printf("#endif /* !KERNEL && NO_PQ */\n"); - printf("#endif /* !_RF__RF_INVERTQ_H_ */\n"); - exit(0); -} diff --git a/sys/dev/raidframe/rf_hist.h b/sys/dev/raidframe/rf_hist.h deleted file mode 100644 index b8b12c3..0000000 --- a/sys/dev/raidframe/rf_hist.h +++ /dev/null @@ -1,57 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_hist.h,v 1.3 1999/02/05 00:06:12 oster Exp $ */ -/* - * rf_hist.h - * - * Histgram operations for RAIDframe stats - */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_HIST_H_ -#define _RF__RF_HIST_H_ - -#include <dev/raidframe/rf_types.h> - -#define RF_HIST_RESOLUTION 5 -#define RF_HIST_MIN_VAL 0 -#define RF_HIST_MAX_VAL 1000 -#define RF_HIST_RANGE (RF_HIST_MAX_VAL - RF_HIST_MIN_VAL) -#define RF_HIST_NUM_BUCKETS (RF_HIST_RANGE / RF_HIST_RESOLUTION + 1) - -typedef RF_uint32 RF_Hist_t; - -#define RF_HIST_ADD(_hist_,_val_) { \ - RF_Hist_t val; \ - val = ((RF_Hist_t)(_val_)) / 1000; \ - if (val >= RF_HIST_MAX_VAL) \ - _hist_[RF_HIST_NUM_BUCKETS-1]++; \ - else \ - _hist_[(val - RF_HIST_MIN_VAL) / RF_HIST_RESOLUTION]++; \ -} - -#endif /* !_RF__RF_HIST_H_ */ diff --git a/sys/dev/raidframe/rf_interdecluster.c b/sys/dev/raidframe/rf_interdecluster.c deleted file mode 100644 index 8b1dbdb..0000000 --- a/sys/dev/raidframe/rf_interdecluster.c +++ /dev/null @@ -1,285 +0,0 @@ -/* $NetBSD: rf_interdecluster.c,v 1.5 2001/01/26 05:09:13 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Khalil Amiri - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/************************************************************ - * - * rf_interdecluster.c -- implements interleaved declustering - * - ************************************************************/ - -#include <dev/raidframe/rf_archs.h> - -#if RF_INCLUDE_INTERDECLUSTER > 0 - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_interdecluster.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_utils.h> -#include <dev/raidframe/rf_dagffrd.h> -#include <dev/raidframe/rf_dagdegrd.h> -#include <dev/raidframe/rf_dagffwr.h> -#include <dev/raidframe/rf_dagdegwr.h> - -typedef struct RF_InterdeclusterConfigInfo_s { - RF_RowCol_t **stripeIdentifier; /* filled in at config time and used - * by IdentifyStripe */ - RF_StripeCount_t numSparingRegions; - RF_StripeCount_t stripeUnitsPerSparingRegion; - RF_SectorNum_t mirrorStripeOffset; -} RF_InterdeclusterConfigInfo_t; - -int -rf_ConfigureInterDecluster( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_StripeCount_t num_used_stripeUnitsPerDisk; - RF_InterdeclusterConfigInfo_t *info; - RF_RowCol_t i, tmp, SUs_per_region; - - /* create an Interleaved Declustering configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_InterdeclusterConfigInfo_t), (RF_InterdeclusterConfigInfo_t *), - raidPtr->cleanupList); - if (info == NULL) - return (ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - - /* fill in the config structure. */ - SUs_per_region = raidPtr->numCol * (raidPtr->numCol - 1); - info->stripeIdentifier = rf_make_2d_array(SUs_per_region, 2, raidPtr->cleanupList); - if (info->stripeIdentifier == NULL) - return (ENOMEM); - for (i = 0; i < SUs_per_region; i++) { - info->stripeIdentifier[i][0] = i / (raidPtr->numCol - 1); - tmp = i / raidPtr->numCol; - info->stripeIdentifier[i][1] = (i + 1 + tmp) % raidPtr->numCol; - } - - /* no spare tables */ - RF_ASSERT(raidPtr->numRow == 1); - - /* fill in the remaining layout parameters */ - - /* total number of stripes should a multiple of 2*numCol: Each sparing - * region consists of 2*numCol stripes: n-1 primary copy, n-1 - * secondary copy and 2 for spare .. */ - num_used_stripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk - (layoutPtr->stripeUnitsPerDisk % - (2 * raidPtr->numCol)); - info->numSparingRegions = num_used_stripeUnitsPerDisk / (2 * raidPtr->numCol); - /* this is in fact the number of stripe units (that are primary data - * copies) in the sparing region */ - info->stripeUnitsPerSparingRegion = raidPtr->numCol * (raidPtr->numCol - 1); - info->mirrorStripeOffset = info->numSparingRegions * (raidPtr->numCol + 1); - layoutPtr->numStripe = info->numSparingRegions * info->stripeUnitsPerSparingRegion; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = 1; - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numParityCol = 1; - - layoutPtr->dataStripeUnitsPerDisk = num_used_stripeUnitsPerDisk; - - raidPtr->sectorsPerDisk = - num_used_stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; - - raidPtr->totalSectors = - (layoutPtr->numStripe) * layoutPtr->sectorsPerStripeUnit; - - layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit; - - return (0); -} - -int -rf_GetDefaultNumFloatingReconBuffersInterDecluster(RF_Raid_t * raidPtr) -{ - return (30); -} - -RF_HeadSepLimit_t -rf_GetDefaultHeadSepLimitInterDecluster(RF_Raid_t * raidPtr) -{ - return (raidPtr->sectorsPerDisk); -} - -RF_ReconUnitCount_t -rf_GetNumSpareRUsInterDecluster( - RF_Raid_t * raidPtr) -{ - RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - - return (2 * ((RF_ReconUnitCount_t) info->numSparingRegions)); - /* the layout uses two stripe units per disk as spare within each - * sparing region */ -} -/* Maps to the primary copy of the data, i.e. the first mirror pair */ -void -rf_MapSectorInterDecluster( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - RF_StripeNum_t su_offset_into_disk, mirror_su_offset_into_disk; - RF_StripeNum_t sparing_region_id, index_within_region; - int col_before_remap; - - *row = 0; - sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; - index_within_region = SUID % info->stripeUnitsPerSparingRegion; - su_offset_into_disk = index_within_region % (raidPtr->numCol - 1); - mirror_su_offset_into_disk = index_within_region / raidPtr->numCol; - col_before_remap = index_within_region / (raidPtr->numCol - 1); - - if (!remap) { - *col = col_before_remap;; - *diskSector = (su_offset_into_disk + ((raidPtr->numCol - 1) * sparing_region_id)) * - raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - } else { - /* remap sector to spare space... */ - *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidPtr->numCol - 1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - *col = (index_within_region + 1 + mirror_su_offset_into_disk) % raidPtr->numCol; - *col = (*col + 1) % raidPtr->numCol; - if (*col == col_before_remap) - *col = (*col + 1) % raidPtr->numCol; - } -} -/* Maps to the second copy of the mirror pair. */ -void -rf_MapParityInterDecluster( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - RF_StripeNum_t sparing_region_id, index_within_region, mirror_su_offset_into_disk; - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - int col_before_remap; - - sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; - index_within_region = SUID % info->stripeUnitsPerSparingRegion; - mirror_su_offset_into_disk = index_within_region / raidPtr->numCol; - col_before_remap = (index_within_region + 1 + mirror_su_offset_into_disk) % raidPtr->numCol; - - *row = 0; - if (!remap) { - *col = col_before_remap; - *diskSector = info->mirrorStripeOffset * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += sparing_region_id * (raidPtr->numCol - 1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += mirror_su_offset_into_disk * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - } else { - /* remap parity to spare space ... */ - *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - *col = index_within_region / (raidPtr->numCol - 1); - *col = (*col + 1) % raidPtr->numCol; - if (*col == col_before_remap) - *col = (*col + 1) % raidPtr->numCol; - } -} - -void -rf_IdentifyStripeInterDecluster( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) -{ - RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - RF_StripeNum_t SUID; - - SUID = addr / raidPtr->Layout.sectorsPerStripeUnit; - SUID = SUID % info->stripeUnitsPerSparingRegion; - - *outRow = 0; - *diskids = info->stripeIdentifier[SUID]; -} - -void -rf_MapSIDToPSIDInterDecluster( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru) -{ - *which_ru = 0; - *psID = stripeID; -} -/****************************************************************************** - * select a graph to perform a single-stripe access - * - * Parameters: raidPtr - description of the physical array - * type - type of operation (read or write) requested - * asmap - logical & physical addresses for this access - * createFunc - name of function to use to create the graph - *****************************************************************************/ - -void -rf_RAIDIDagSelect( - RF_Raid_t * raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t * asmap, - RF_VoidFuncPtr * createFunc) -{ - RF_ASSERT(RF_IO_IS_R_OR_W(type)); - - if (asmap->numDataFailed + asmap->numParityFailed > 1) { - RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); - *createFunc = NULL; - return; - } - *createFunc = (type == RF_IO_TYPE_READ) ? (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG : (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; - if (type == RF_IO_TYPE_READ) { - if (asmap->numDataFailed == 0) - *createFunc = (RF_VoidFuncPtr) rf_CreateMirrorPartitionReadDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneDegradedReadDAG; - } else - *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; -} -#endif /* RF_INCLUDE_INTERDECLUSTER > 0 */ diff --git a/sys/dev/raidframe/rf_interdecluster.h b/sys/dev/raidframe/rf_interdecluster.h deleted file mode 100644 index 9bf3825..0000000 --- a/sys/dev/raidframe/rf_interdecluster.h +++ /dev/null @@ -1,60 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_interdecluster.h,v 1.3 1999/02/05 00:06:12 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Khalil Amiri - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_interdecluster.h - * header file for Interleaved Declustering - */ - -#ifndef _RF__RF_INTERDECLUSTER_H_ -#define _RF__RF_INTERDECLUSTER_H_ - -int -rf_ConfigureInterDecluster(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int rf_GetDefaultNumFloatingReconBuffersInterDecluster(RF_Raid_t * raidPtr); -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitInterDecluster(RF_Raid_t * raidPtr); -RF_ReconUnitCount_t rf_GetNumSpareRUsInterDecluster(RF_Raid_t * raidPtr); -void -rf_MapSectorInterDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapParityInterDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_IdentifyStripeInterDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -void -rf_MapSIDToPSIDInterDecluster(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru); -void -rf_RAIDIDagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); - -#endif /* !_RF__RF_INTERDECLUSTER_H_ */ diff --git a/sys/dev/raidframe/rf_invertq.c b/sys/dev/raidframe/rf_invertq.c deleted file mode 100644 index 66337b6..0000000 --- a/sys/dev/raidframe/rf_invertq.c +++ /dev/null @@ -1,34 +0,0 @@ -/* $NetBSD: rf_invertq.c,v 1.3 1999/02/05 00:06:12 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Daniel Stodolsky - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_pqdeg.h> -#include <dev/raidframe/rf_invertq.h> diff --git a/sys/dev/raidframe/rf_invertq.h b/sys/dev/raidframe/rf_invertq.h deleted file mode 100644 index fde2cae..0000000 --- a/sys/dev/raidframe/rf_invertq.h +++ /dev/null @@ -1,64 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_invertq.h,v 1.3 1999/02/05 00:06:12 oster Exp $ */ -/* - * rf_invertq.h - */ -/* - * This is normally a generated file. Not so for NetBSD. - */ - -#ifndef _RF__RF_INVERTQ_H_ -#define _RF__RF_INVERTQ_H_ - -/* - * rf_geniq.c must include rf_archs.h before including - * this file (to get VPATH magic right with the way we - * generate this file in kernel trees) - */ -/* #include <dev/raidframe/rf_archs.h> */ - -#if (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0) - -#define RF_Q_COLS 32 -RF_ua32_t rf_rn = { -1, 2, 4, 8, 16, 5, 10, 20, 13, 26, 17, 7, 14, 28, 29, 31, 27, 19, 3, 6, 12, 24, 21, 15, 30, 25, 23, 11, 22, 9, 18, 1,}; -RF_ua32_t rf_qfor[32] = { - /* i = 0 */ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,}, - /* i = 1 */ {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 5, 7, 1, 3, 13, 15, 9, 11, 21, 23, 17, 19, 29, 31, 25, 27,}, - /* i = 2 */ {0, 4, 8, 12, 16, 20, 24, 28, 5, 1, 13, 9, 21, 17, 29, 25, 10, 14, 2, 6, 26, 30, 18, 22, 15, 11, 7, 3, 31, 27, 23, 19,}, - /* i = 3 */ {0, 8, 16, 24, 5, 13, 21, 29, 10, 2, 26, 18, 15, 7, 31, 23, 20, 28, 4, 12, 17, 25, 1, 9, 30, 22, 14, 6, 27, 19, 11, 3,}, - /* i = 4 */ {0, 16, 5, 21, 10, 26, 15, 31, 20, 4, 17, 1, 30, 14, 27, 11, 13, 29, 8, 24, 7, 23, 2, 18, 25, 9, 28, 12, 19, 3, 22, 6,}, - /* i = 5 */ {0, 5, 10, 15, 20, 17, 30, 27, 13, 8, 7, 2, 25, 28, 19, 22, 26, 31, 16, 21, 14, 11, 4, 1, 23, 18, 29, 24, 3, 6, 9, 12,}, - /* i = 6 */ {0, 10, 20, 30, 13, 7, 25, 19, 26, 16, 14, 4, 23, 29, 3, 9, 17, 27, 5, 15, 28, 22, 8, 2, 11, 1, 31, 21, 6, 12, 18, 24,}, - /* i = 7 */ {0, 20, 13, 25, 26, 14, 23, 3, 17, 5, 28, 8, 11, 31, 6, 18, 7, 19, 10, 30, 29, 9, 16, 4, 22, 2, 27, 15, 12, 24, 1, 21,}, - /* i = 8 */ {0, 13, 26, 23, 17, 28, 11, 6, 7, 10, 29, 16, 22, 27, 12, 1, 14, 3, 20, 25, 31, 18, 5, 8, 9, 4, 19, 30, 24, 21, 2, 15,}, - /* i = 9 */ {0, 26, 17, 11, 7, 29, 22, 12, 14, 20, 31, 5, 9, 19, 24, 2, 28, 6, 13, 23, 27, 1, 10, 16, 18, 8, 3, 25, 21, 15, 4, 30,}, - /* i = 10 */ {0, 17, 7, 22, 14, 31, 9, 24, 28, 13, 27, 10, 18, 3, 21, 4, 29, 12, 26, 11, 19, 2, 20, 5, 1, 16, 6, 23, 15, 30, 8, 25,}, - /* i = 11 */ {0, 7, 14, 9, 28, 27, 18, 21, 29, 26, 19, 20, 1, 6, 15, 8, 31, 24, 17, 22, 3, 4, 13, 10, 2, 5, 12, 11, 30, 25, 16, 23,}, - /* i = 12 */ {0, 14, 28, 18, 29, 19, 1, 15, 31, 17, 3, 13, 2, 12, 30, 16, 27, 21, 7, 9, 6, 8, 26, 20, 4, 10, 24, 22, 25, 23, 5, 11,}, - /* i = 13 */ {0, 28, 29, 1, 31, 3, 2, 30, 27, 7, 6, 26, 4, 24, 25, 5, 19, 15, 14, 18, 12, 16, 17, 13, 8, 20, 21, 9, 23, 11, 10, 22,}, - /* i = 14 */ {0, 29, 31, 2, 27, 6, 4, 25, 19, 14, 12, 17, 8, 21, 23, 10, 3, 30, 28, 1, 24, 5, 7, 26, 16, 13, 15, 18, 11, 22, 20, 9,}, - /* i = 15 */ {0, 31, 27, 4, 19, 12, 8, 23, 3, 28, 24, 7, 16, 15, 11, 20, 6, 25, 29, 2, 21, 10, 14, 17, 5, 26, 30, 1, 22, 9, 13, 18,}, - /* i = 16 */ {0, 27, 19, 8, 3, 24, 16, 11, 6, 29, 21, 14, 5, 30, 22, 13, 12, 23, 31, 4, 15, 20, 28, 7, 10, 17, 25, 2, 9, 18, 26, 1,}, - /* i = 17 */ {0, 19, 3, 16, 6, 21, 5, 22, 12, 31, 15, 28, 10, 25, 9, 26, 24, 11, 27, 8, 30, 13, 29, 14, 20, 7, 23, 4, 18, 1, 17, 2,}, - /* i = 18 */ {0, 3, 6, 5, 12, 15, 10, 9, 24, 27, 30, 29, 20, 23, 18, 17, 21, 22, 19, 16, 25, 26, 31, 28, 13, 14, 11, 8, 1, 2, 7, 4,}, - /* i = 19 */ {0, 6, 12, 10, 24, 30, 20, 18, 21, 19, 25, 31, 13, 11, 1, 7, 15, 9, 3, 5, 23, 17, 27, 29, 26, 28, 22, 16, 2, 4, 14, 8,}, - /* i = 20 */ {0, 12, 24, 20, 21, 25, 13, 1, 15, 3, 23, 27, 26, 22, 2, 14, 30, 18, 6, 10, 11, 7, 19, 31, 17, 29, 9, 5, 4, 8, 28, 16,}, - /* i = 21 */ {0, 24, 21, 13, 15, 23, 26, 2, 30, 6, 11, 19, 17, 9, 4, 28, 25, 1, 12, 20, 22, 14, 3, 27, 7, 31, 18, 10, 8, 16, 29, 5,}, - /* i = 22 */ {0, 21, 15, 26, 30, 11, 17, 4, 25, 12, 22, 3, 7, 18, 8, 29, 23, 2, 24, 13, 9, 28, 6, 19, 14, 27, 1, 20, 16, 5, 31, 10,}, - /* i = 23 */ {0, 15, 30, 17, 25, 22, 7, 8, 23, 24, 9, 6, 14, 1, 16, 31, 11, 4, 21, 26, 18, 29, 12, 3, 28, 19, 2, 13, 5, 10, 27, 20,}, - /* i = 24 */ {0, 30, 25, 7, 23, 9, 14, 16, 11, 21, 18, 12, 28, 2, 5, 27, 22, 8, 15, 17, 1, 31, 24, 6, 29, 3, 4, 26, 10, 20, 19, 13,}, - /* i = 25 */ {0, 25, 23, 14, 11, 18, 28, 5, 22, 15, 1, 24, 29, 4, 10, 19, 9, 16, 30, 7, 2, 27, 21, 12, 31, 6, 8, 17, 20, 13, 3, 26,}, - /* i = 26 */ {0, 23, 11, 28, 22, 1, 29, 10, 9, 30, 2, 21, 31, 8, 20, 3, 18, 5, 25, 14, 4, 19, 15, 24, 27, 12, 16, 7, 13, 26, 6, 17,}, - /* i = 27 */ {0, 11, 22, 29, 9, 2, 31, 20, 18, 25, 4, 15, 27, 16, 13, 6, 1, 10, 23, 28, 8, 3, 30, 21, 19, 24, 5, 14, 26, 17, 12, 7,}, - /* i = 28 */ {0, 22, 9, 31, 18, 4, 27, 13, 1, 23, 8, 30, 19, 5, 26, 12, 2, 20, 11, 29, 16, 6, 25, 15, 3, 21, 10, 28, 17, 7, 24, 14,}, - /* i = 29 */ {0, 9, 18, 27, 1, 8, 19, 26, 2, 11, 16, 25, 3, 10, 17, 24, 4, 13, 22, 31, 5, 12, 23, 30, 6, 15, 20, 29, 7, 14, 21, 28,}, - /* i = 30 */ {0, 18, 1, 19, 2, 16, 3, 17, 4, 22, 5, 23, 6, 20, 7, 21, 8, 26, 9, 27, 10, 24, 11, 25, 12, 30, 13, 31, 14, 28, 15, 29,}, - /* i = 31 */ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,}, -}; -#define RF_Q_DATA_COL(col_num) rf_rn[col_num],rf_qfor[28-(col_num)] -RF_ua1024_t rf_qinv[1]; /* don't compile monster table into kernel */ - -#endif /* (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > - * 0) */ -#endif /* !_RF__RF_INVERTQ_H_ */ diff --git a/sys/dev/raidframe/rf_kintf.h b/sys/dev/raidframe/rf_kintf.h deleted file mode 100644 index ae2697b..0000000 --- a/sys/dev/raidframe/rf_kintf.h +++ /dev/null @@ -1,82 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_kintf.h,v 1.15 2000/10/20 02:24:45 oster Exp $ */ -/* - * rf_kintf.h - * - * RAIDframe exported kernel interface - */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_KINTF_H_ -#define _RF__RF_KINTF_H_ - -#include <dev/raidframe/rf_types.h> - -#if defined(__NetBSD__) -#define RF_LTSLEEP(cond, pri, text, time, mutex) \ - ltsleep(cond, pri, text, time, mutex) -#elif defined(__FreeBSD__) -#if __FreeBSD_version > 500005 -#define RF_LTSLEEP(cond, pri, text, time, mutex) \ - msleep(cond, mutex, pri, text, time); -#else -static __inline int -RF_LTSLEEP(void *cond, int pri, const char *text, int time, struct simplelock *mutex) -{ - int ret; - if (mutex != NULL) - simple_unlock(mutex); - ret = tsleep(cond, pri, text, time); - if (mutex != NULL) - simple_lock(mutex); - return (ret); -} -#endif -#endif - -int rf_GetSpareTableFromDaemon(RF_SparetWait_t * req); - -void raidstart(RF_Raid_t * raidPtr); -int rf_DispatchKernelIO(RF_DiskQueue_t * queue, RF_DiskQueueData_t * req); - -int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *); -int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *); - -#define RF_NORMAL_COMPONENT_UPDATE 0 -#define RF_FINAL_COMPONENT_UPDATE 1 -void rf_update_component_labels(RF_Raid_t *, int); -int raidlookup(char *, RF_Thread_t, struct vnode **); -int raidmarkclean(dev_t dev, struct vnode *b_vp, int); -int raidmarkdirty(dev_t dev, struct vnode *b_vp, int); -void raid_init_component_label(RF_Raid_t *, RF_ComponentLabel_t *); -void rf_print_component_label(RF_ComponentLabel_t *); -void rf_UnconfigureVnodes( RF_Raid_t * ); -void rf_close_component( RF_Raid_t *, struct vnode *, int); -void rf_disk_unbusy(RF_RaidAccessDesc_t *); -int raid_getcomponentsize(RF_Raid_t *, RF_RowCol_t, RF_RowCol_t); -#endif /* _RF__RF_KINTF_H_ */ diff --git a/sys/dev/raidframe/rf_layout.c b/sys/dev/raidframe/rf_layout.c deleted file mode 100644 index 53badbd..0000000 --- a/sys/dev/raidframe/rf_layout.c +++ /dev/null @@ -1,492 +0,0 @@ -/* $NetBSD: rf_layout.c,v 1.9 2001/01/27 19:34:43 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_layout.c -- driver code dealing with layout and mapping issues - */ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_configure.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_desc.h> -#include <dev/raidframe/rf_decluster.h> -#include <dev/raidframe/rf_pq.h> -#include <dev/raidframe/rf_declusterPQ.h> -#include <dev/raidframe/rf_raid0.h> -#include <dev/raidframe/rf_raid1.h> -#include <dev/raidframe/rf_raid4.h> -#include <dev/raidframe/rf_raid5.h> -#include <dev/raidframe/rf_states.h> -#if RF_INCLUDE_RAID5_RS > 0 -#include <dev/raidframe/rf_raid5_rotatedspare.h> -#endif /* RF_INCLUDE_RAID5_RS > 0 */ -#if RF_INCLUDE_CHAINDECLUSTER > 0 -#include <dev/raidframe/rf_chaindecluster.h> -#endif /* RF_INCLUDE_CHAINDECLUSTER > 0 */ -#if RF_INCLUDE_INTERDECLUSTER > 0 -#include <dev/raidframe/rf_interdecluster.h> -#endif /* RF_INCLUDE_INTERDECLUSTER > 0 */ -#if RF_INCLUDE_PARITYLOGGING > 0 -#include <dev/raidframe/rf_paritylogging.h> -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ -#if RF_INCLUDE_EVENODD > 0 -#include <dev/raidframe/rf_evenodd.h> -#endif /* RF_INCLUDE_EVENODD > 0 */ -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_driver.h> -#include <dev/raidframe/rf_parityscan.h> -#include <dev/raidframe/rf_reconbuffer.h> -#include <dev/raidframe/rf_reconutil.h> - -/*********************************************************************** - * - * the layout switch defines all the layouts that are supported. - * fields are: layout ID, init routine, shutdown routine, map - * sector, map parity, identify stripe, dag selection, map stripeid - * to parity stripe id (optional), num faults tolerated, special - * flags. - * - ***********************************************************************/ - -static RF_AccessState_t DefaultStates[] = {rf_QuiesceState, - rf_IncrAccessesCountState, - rf_MapState, - rf_LockState, - rf_CreateDAGState, - rf_ExecuteDAGState, - rf_ProcessDAGState, - rf_DecrAccessesCountState, - rf_CleanupState, - rf_LastState}; - -#define RF_NU(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p - -/* Note that if you add any new RAID types to this list, that you must - also update the mapsw[] table in the raidctl sources */ - -static RF_LayoutSW_t mapsw[] = { -#if RF_INCLUDE_PARITY_DECLUSTERING > 0 - /* parity declustering */ - {'T', "Parity declustering", - RF_NU( - rf_ConfigureDeclustered, - rf_MapSectorDeclustered, rf_MapParityDeclustered, NULL, - rf_IdentifyStripeDeclustered, - rf_RaidFiveDagSelect, - rf_MapSIDToPSIDDeclustered, - rf_GetDefaultHeadSepLimitDeclustered, - rf_GetDefaultNumFloatingReconBuffersDeclustered, - NULL, NULL, - rf_SubmitReconBufferBasic, - rf_VerifyParityBasic, - 1, - DefaultStates, - 0) - }, -#endif - -#if RF_INCLUDE_PARITY_DECLUSTERING_DS > 0 - /* parity declustering with distributed sparing */ - {'D', "Distributed sparing parity declustering", - RF_NU( - rf_ConfigureDeclusteredDS, - rf_MapSectorDeclustered, rf_MapParityDeclustered, NULL, - rf_IdentifyStripeDeclustered, - rf_RaidFiveDagSelect, - rf_MapSIDToPSIDDeclustered, - rf_GetDefaultHeadSepLimitDeclustered, - rf_GetDefaultNumFloatingReconBuffersDeclustered, - rf_GetNumSpareRUsDeclustered, rf_InstallSpareTable, - rf_SubmitReconBufferBasic, - rf_VerifyParityBasic, - 1, - DefaultStates, - RF_DISTRIBUTE_SPARE | RF_BD_DECLUSTERED) - }, -#endif - -#if RF_INCLUDE_DECL_PQ > 0 - /* declustered P+Q */ - {'Q', "Declustered P+Q", - RF_NU( - rf_ConfigureDeclusteredPQ, - rf_MapSectorDeclusteredPQ, rf_MapParityDeclusteredPQ, rf_MapQDeclusteredPQ, - rf_IdentifyStripeDeclusteredPQ, - rf_PQDagSelect, - rf_MapSIDToPSIDDeclustered, - rf_GetDefaultHeadSepLimitDeclustered, - rf_GetDefaultNumFloatingReconBuffersPQ, - NULL, NULL, - NULL, - rf_VerifyParityBasic, - 2, - DefaultStates, - 0) - }, -#endif /* RF_INCLUDE_DECL_PQ > 0 */ - -#if RF_INCLUDE_RAID5_RS > 0 - /* RAID 5 with rotated sparing */ - {'R', "RAID Level 5 rotated sparing", - RF_NU( - rf_ConfigureRAID5_RS, - rf_MapSectorRAID5_RS, rf_MapParityRAID5_RS, NULL, - rf_IdentifyStripeRAID5_RS, - rf_RaidFiveDagSelect, - rf_MapSIDToPSIDRAID5_RS, - rf_GetDefaultHeadSepLimitRAID5, - rf_GetDefaultNumFloatingReconBuffersRAID5, - rf_GetNumSpareRUsRAID5_RS, NULL, - rf_SubmitReconBufferBasic, - rf_VerifyParityBasic, - 1, - DefaultStates, - RF_DISTRIBUTE_SPARE) - }, -#endif /* RF_INCLUDE_RAID5_RS > 0 */ - -#if RF_INCLUDE_CHAINDECLUSTER > 0 - /* Chained Declustering */ - {'C', "Chained Declustering", - RF_NU( - rf_ConfigureChainDecluster, - rf_MapSectorChainDecluster, rf_MapParityChainDecluster, NULL, - rf_IdentifyStripeChainDecluster, - rf_RAIDCDagSelect, - rf_MapSIDToPSIDChainDecluster, - NULL, - NULL, - rf_GetNumSpareRUsChainDecluster, NULL, - rf_SubmitReconBufferBasic, - rf_VerifyParityBasic, - 1, - DefaultStates, - 0) - }, -#endif /* RF_INCLUDE_CHAINDECLUSTER > 0 */ - -#if RF_INCLUDE_INTERDECLUSTER > 0 - /* Interleaved Declustering */ - {'I', "Interleaved Declustering", - RF_NU( - rf_ConfigureInterDecluster, - rf_MapSectorInterDecluster, rf_MapParityInterDecluster, NULL, - rf_IdentifyStripeInterDecluster, - rf_RAIDIDagSelect, - rf_MapSIDToPSIDInterDecluster, - rf_GetDefaultHeadSepLimitInterDecluster, - rf_GetDefaultNumFloatingReconBuffersInterDecluster, - rf_GetNumSpareRUsInterDecluster, NULL, - rf_SubmitReconBufferBasic, - rf_VerifyParityBasic, - 1, - DefaultStates, - RF_DISTRIBUTE_SPARE) - }, -#endif /* RF_INCLUDE_INTERDECLUSTER > 0 */ - -#if RF_INCLUDE_RAID0 > 0 - /* RAID level 0 */ - {'0', "RAID Level 0", - RF_NU( - rf_ConfigureRAID0, - rf_MapSectorRAID0, rf_MapParityRAID0, NULL, - rf_IdentifyStripeRAID0, - rf_RAID0DagSelect, - rf_MapSIDToPSIDRAID0, - NULL, - NULL, - NULL, NULL, - NULL, - rf_VerifyParityRAID0, - 0, - DefaultStates, - 0) - }, -#endif /* RF_INCLUDE_RAID0 > 0 */ - -#if RF_INCLUDE_RAID1 > 0 - /* RAID level 1 */ - {'1', "RAID Level 1", - RF_NU( - rf_ConfigureRAID1, - rf_MapSectorRAID1, rf_MapParityRAID1, NULL, - rf_IdentifyStripeRAID1, - rf_RAID1DagSelect, - rf_MapSIDToPSIDRAID1, - NULL, - NULL, - NULL, NULL, - rf_SubmitReconBufferRAID1, - rf_VerifyParityRAID1, - 1, - DefaultStates, - 0) - }, -#endif /* RF_INCLUDE_RAID1 > 0 */ - -#if RF_INCLUDE_RAID4 > 0 - /* RAID level 4 */ - {'4', "RAID Level 4", - RF_NU( - rf_ConfigureRAID4, - rf_MapSectorRAID4, rf_MapParityRAID4, NULL, - rf_IdentifyStripeRAID4, - rf_RaidFiveDagSelect, - rf_MapSIDToPSIDRAID4, - rf_GetDefaultHeadSepLimitRAID4, - rf_GetDefaultNumFloatingReconBuffersRAID4, - NULL, NULL, - rf_SubmitReconBufferBasic, - rf_VerifyParityBasic, - 1, - DefaultStates, - 0) - }, -#endif /* RF_INCLUDE_RAID4 > 0 */ - -#if RF_INCLUDE_RAID5 > 0 - /* RAID level 5 */ - {'5', "RAID Level 5", - RF_NU( - rf_ConfigureRAID5, - rf_MapSectorRAID5, rf_MapParityRAID5, NULL, - rf_IdentifyStripeRAID5, - rf_RaidFiveDagSelect, - rf_MapSIDToPSIDRAID5, - rf_GetDefaultHeadSepLimitRAID5, - rf_GetDefaultNumFloatingReconBuffersRAID5, - NULL, NULL, - rf_SubmitReconBufferBasic, - rf_VerifyParityBasic, - 1, - DefaultStates, - 0) - }, -#endif /* RF_INCLUDE_RAID5 > 0 */ - -#if RF_INCLUDE_EVENODD > 0 - /* Evenodd */ - {'E', "EvenOdd", - RF_NU( - rf_ConfigureEvenOdd, - rf_MapSectorRAID5, rf_MapParityEvenOdd, rf_MapEEvenOdd, - rf_IdentifyStripeEvenOdd, - rf_EODagSelect, - rf_MapSIDToPSIDRAID5, - NULL, - NULL, - NULL, NULL, - NULL, /* no reconstruction, yet */ - rf_VerifyParityEvenOdd, - 2, - DefaultStates, - 0) - }, -#endif /* RF_INCLUDE_EVENODD > 0 */ - -#if RF_INCLUDE_EVENODD > 0 - /* Declustered Evenodd */ - {'e', "Declustered EvenOdd", - RF_NU( - rf_ConfigureDeclusteredPQ, - rf_MapSectorDeclusteredPQ, rf_MapParityDeclusteredPQ, rf_MapQDeclusteredPQ, - rf_IdentifyStripeDeclusteredPQ, - rf_EODagSelect, - rf_MapSIDToPSIDRAID5, - rf_GetDefaultHeadSepLimitDeclustered, - rf_GetDefaultNumFloatingReconBuffersPQ, - NULL, NULL, - NULL, /* no reconstruction, yet */ - rf_VerifyParityEvenOdd, - 2, - DefaultStates, - 0) - }, -#endif /* RF_INCLUDE_EVENODD > 0 */ - -#if RF_INCLUDE_PARITYLOGGING > 0 - /* parity logging */ - {'L', "Parity logging", - RF_NU( - rf_ConfigureParityLogging, - rf_MapSectorParityLogging, rf_MapParityParityLogging, NULL, - rf_IdentifyStripeParityLogging, - rf_ParityLoggingDagSelect, - rf_MapSIDToPSIDParityLogging, - rf_GetDefaultHeadSepLimitParityLogging, - rf_GetDefaultNumFloatingReconBuffersParityLogging, - NULL, NULL, - rf_SubmitReconBufferBasic, - NULL, - 1, - DefaultStates, - 0) - }, -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ - - /* end-of-list marker */ - {'\0', NULL, - RF_NU( - NULL, - NULL, NULL, NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, NULL, - NULL, - NULL, - 0, - NULL, - 0) - } -}; - -RF_LayoutSW_t * -rf_GetLayout(RF_ParityConfig_t parityConfig) -{ - RF_LayoutSW_t *p; - - /* look up the specific layout */ - for (p = &mapsw[0]; p->parityConfig; p++) - if (p->parityConfig == parityConfig) - break; - if (!p->parityConfig) - return (NULL); - RF_ASSERT(p->parityConfig == parityConfig); - return (p); -} - -/***************************************************************************** - * - * ConfigureLayout -- - * - * read the configuration file and set up the RAID layout parameters. - * After reading common params, invokes the layout-specific - * configuration routine to finish the configuration. - * - ****************************************************************************/ -int -rf_ConfigureLayout( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_ParityConfig_t parityConfig; - RF_LayoutSW_t *p; - int retval; - - layoutPtr->sectorsPerStripeUnit = cfgPtr->sectPerSU; - layoutPtr->SUsPerPU = cfgPtr->SUsPerPU; - layoutPtr->SUsPerRU = cfgPtr->SUsPerRU; - parityConfig = cfgPtr->parityConfig; - - if (layoutPtr->sectorsPerStripeUnit <= 0) { - RF_ERRORMSG2("raid%d: Invalid sectorsPerStripeUnit: %d\n", - raidPtr->raidid, - (int)layoutPtr->sectorsPerStripeUnit ); - return (EINVAL); - } - - layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit; - - p = rf_GetLayout(parityConfig); - if (p == NULL) { - RF_ERRORMSG1("Unknown parity configuration '%c'", parityConfig); - return (EINVAL); - } - RF_ASSERT(p->parityConfig == parityConfig); - layoutPtr->map = p; - - /* initialize the specific layout */ - - retval = (p->Configure) (listp, raidPtr, cfgPtr); - - if (retval) - return (retval); - - layoutPtr->dataBytesPerStripe = layoutPtr->dataSectorsPerStripe << raidPtr->logBytesPerSector; - raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; - - if (rf_forceNumFloatingReconBufs >= 0) { - raidPtr->numFloatingReconBufs = rf_forceNumFloatingReconBufs; - } else { - raidPtr->numFloatingReconBufs = rf_GetDefaultNumFloatingReconBuffers(raidPtr); - } - - if (rf_forceHeadSepLimit >= 0) { - raidPtr->headSepLimit = rf_forceHeadSepLimit; - } else { - raidPtr->headSepLimit = rf_GetDefaultHeadSepLimit(raidPtr); - } - - printf("RAIDFRAME: Configure (%s): total number of sectors is %lu (%lu MB)\n", - layoutPtr->map->configName, - (unsigned long) raidPtr->totalSectors, - (unsigned long) (raidPtr->totalSectors / 1024 * (1 << raidPtr->logBytesPerSector) / 1024)); - if (raidPtr->headSepLimit >= 0) { - printf("RAIDFRAME(%s): Using %ld floating recon bufs with head sep limit %ld\n", - layoutPtr->map->configName, (long) raidPtr->numFloatingReconBufs, (long) raidPtr->headSepLimit); - } else { - printf("RAIDFRAME(%s): Using %ld floating recon bufs with no head sep limit\n", - layoutPtr->map->configName, (long) raidPtr->numFloatingReconBufs); - } - - return (0); -} -/* typically there is a 1-1 mapping between stripes and parity stripes. - * however, the declustering code supports packing multiple stripes into - * a single parity stripe, so as to increase the size of the reconstruction - * unit without affecting the size of the stripe unit. This routine finds - * the parity stripe identifier associated with a stripe ID. There is also - * a RaidAddressToParityStripeID macro in layout.h - */ -RF_StripeNum_t -rf_MapStripeIDToParityStripeID(layoutPtr, stripeID, which_ru) - RF_RaidLayout_t *layoutPtr; - RF_StripeNum_t stripeID; - RF_ReconUnitNum_t *which_ru; -{ - RF_StripeNum_t parityStripeID; - - /* quick exit in the common case of SUsPerPU==1 */ - if ((layoutPtr->SUsPerPU == 1) || !layoutPtr->map->MapSIDToPSID) { - *which_ru = 0; - return (stripeID); - } else { - (layoutPtr->map->MapSIDToPSID) (layoutPtr, stripeID, &parityStripeID, which_ru); - } - return (parityStripeID); -} diff --git a/sys/dev/raidframe/rf_layout.h b/sys/dev/raidframe/rf_layout.h deleted file mode 100644 index 2482556..0000000 --- a/sys/dev/raidframe/rf_layout.h +++ /dev/null @@ -1,349 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_layout.h,v 1.5 2001/01/26 04:14:14 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_layout.h -- header file defining layout data structures - */ - -#ifndef _RF__RF_LAYOUT_H_ -#define _RF__RF_LAYOUT_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_alloclist.h> - -#ifndef _KERNEL -#include <stdio.h> -#endif - -/***************************************************************************************** - * - * This structure identifies all layout-specific operations and parameters. - * - ****************************************************************************************/ - -typedef struct RF_LayoutSW_s { - RF_ParityConfig_t parityConfig; - const char *configName; - -#ifndef _KERNEL - /* layout-specific parsing */ - int (*MakeLayoutSpecific) (FILE * fp, RF_Config_t * cfgPtr, void *arg); - void *makeLayoutSpecificArg; -#endif /* !KERNEL */ - -#if RF_UTILITY == 0 - /* initialization routine */ - int (*Configure) (RF_ShutdownList_t ** shutdownListp, RF_Raid_t * raidPtr, RF_Config_t * cfgPtr); - - /* routine to map RAID sector address -> physical (row, col, offset) */ - void (*MapSector) (RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); - - /* routine to map RAID sector address -> physical (r,c,o) of parity - * unit */ - void (*MapParity) (RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); - - /* routine to map RAID sector address -> physical (r,c,o) of Q unit */ - void (*MapQ) (RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, RF_RowCol_t * row, - RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); - - /* routine to identify the disks comprising a stripe */ - void (*IdentifyStripe) (RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); - - /* routine to select a dag */ - void (*SelectionFunc) (RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, - RF_VoidFuncPtr *); -#if 0 - void (**createFunc) (RF_Raid_t *, - RF_AccessStripeMap_t *, - RF_DagHeader_t *, void *, - RF_RaidAccessFlags_t, - RF_AllocListElem_t *); - -#endif - - /* map a stripe ID to a parity stripe ID. This is typically the - * identity mapping */ - void (*MapSIDToPSID) (RF_RaidLayout_t * layoutPtr, RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, RF_ReconUnitNum_t * which_ru); - - /* get default head separation limit (may be NULL) */ - RF_HeadSepLimit_t(*GetDefaultHeadSepLimit) (RF_Raid_t * raidPtr); - - /* get default num recon buffers (may be NULL) */ - int (*GetDefaultNumFloatingReconBuffers) (RF_Raid_t * raidPtr); - - /* get number of spare recon units (may be NULL) */ - RF_ReconUnitCount_t(*GetNumSpareRUs) (RF_Raid_t * raidPtr); - - /* spare table installation (may be NULL) */ - int (*InstallSpareTable) (RF_Raid_t * raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol); - - /* recon buffer submission function */ - int (*SubmitReconBuffer) (RF_ReconBuffer_t * rbuf, int keep_it, - int use_committed); - - /* - * verify that parity information for a stripe is correct - * see rf_parityscan.h for return vals - */ - int (*VerifyParity) (RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t * parityPDA, int correct_it, RF_RaidAccessFlags_t flags); - - /* number of faults tolerated by this mapping */ - int faultsTolerated; - - /* states to step through in an access. Must end with "LastState". The - * default is DefaultStates in rf_layout.c */ - RF_AccessState_t *states; - - RF_AccessStripeMapFlags_t flags; -#endif /* RF_UTILITY == 0 */ -} RF_LayoutSW_t; -/* enables remapping to spare location under dist sparing */ -#define RF_REMAP 1 -#define RF_DONT_REMAP 0 - -/* - * Flags values for RF_AccessStripeMapFlags_t - */ -#define RF_NO_STRIPE_LOCKS 0x0001 /* suppress stripe locks */ -#define RF_DISTRIBUTE_SPARE 0x0002 /* distribute spare space in archs - * that support it */ -#define RF_BD_DECLUSTERED 0x0004 /* declustering uses block designs */ - -/************************************************************************* - * - * this structure forms the layout component of the main Raid - * structure. It describes everything needed to define and perform - * the mapping of logical RAID addresses <-> physical disk addresses. - * - *************************************************************************/ -struct RF_RaidLayout_s { - /* configuration parameters */ - RF_SectorCount_t sectorsPerStripeUnit; /* number of sectors in one - * stripe unit */ - RF_StripeCount_t SUsPerPU; /* stripe units per parity unit */ - RF_StripeCount_t SUsPerRU; /* stripe units per reconstruction - * unit */ - - /* redundant-but-useful info computed from the above, used in all - * layouts */ - RF_StripeCount_t numStripe; /* total number of stripes in the - * array */ - RF_SectorCount_t dataSectorsPerStripe; - RF_StripeCount_t dataStripeUnitsPerDisk; - u_int bytesPerStripeUnit; - u_int dataBytesPerStripe; - RF_StripeCount_t numDataCol; /* number of SUs of data per stripe - * (name here is a la RAID4) */ - RF_StripeCount_t numParityCol; /* number of SUs of parity per stripe. - * Always 1 for now */ - RF_StripeCount_t numParityLogCol; /* number of SUs of parity log - * per stripe. Always 1 for - * now */ - RF_StripeCount_t stripeUnitsPerDisk; - - RF_LayoutSW_t *map; /* ptr to struct holding mapping fns and - * information */ - void *layoutSpecificInfo; /* ptr to a structure holding - * layout-specific params */ -}; -/***************************************************************************************** - * - * The mapping code returns a pointer to a list of AccessStripeMap structures, which - * describes all the mapping information about an access. The list contains one - * AccessStripeMap structure per stripe touched by the access. Each element in the list - * contains a stripe identifier and a pointer to a list of PhysDiskAddr structuress. Each - * element in this latter list describes the physical location of a stripe unit accessed - * within the corresponding stripe. - * - ****************************************************************************************/ - -#define RF_PDA_TYPE_DATA 0 -#define RF_PDA_TYPE_PARITY 1 -#define RF_PDA_TYPE_Q 2 - -struct RF_PhysDiskAddr_s { - RF_RowCol_t row, col; /* disk identifier */ - RF_SectorNum_t startSector; /* sector offset into the disk */ - RF_SectorCount_t numSector; /* number of sectors accessed */ - int type; /* used by higher levels: currently, data, - * parity, or q */ - caddr_t bufPtr; /* pointer to buffer supplying/receiving data */ - RF_RaidAddr_t raidAddress; /* raid address corresponding to this - * physical disk address */ - RF_PhysDiskAddr_t *next; -}; -#define RF_MAX_FAILED_PDA RF_MAXCOL - -struct RF_AccessStripeMap_s { - RF_StripeNum_t stripeID;/* the stripe index */ - RF_RaidAddr_t raidAddress; /* the starting raid address within - * this stripe */ - RF_RaidAddr_t endRaidAddress; /* raid address one sector past the - * end of the access */ - RF_SectorCount_t totalSectorsAccessed; /* total num sectors - * identified in physInfo list */ - RF_StripeCount_t numStripeUnitsAccessed; /* total num elements in - * physInfo list */ - int numDataFailed; /* number of failed data disks accessed */ - int numParityFailed;/* number of failed parity disks accessed (0 - * or 1) */ - int numQFailed; /* number of failed Q units accessed (0 or 1) */ - RF_AccessStripeMapFlags_t flags; /* various flags */ -#if 0 - RF_PhysDiskAddr_t *failedPDA; /* points to the PDA that has failed */ - RF_PhysDiskAddr_t *failedPDAtwo; /* points to the second PDA - * that has failed, if any */ -#else - int numFailedPDAs; /* number of failed phys addrs */ - RF_PhysDiskAddr_t *failedPDAs[RF_MAX_FAILED_PDA]; /* array of failed phys - * addrs */ -#endif - RF_PhysDiskAddr_t *physInfo; /* a list of PhysDiskAddr structs */ - RF_PhysDiskAddr_t *parityInfo; /* list of physical addrs for the - * parity (P of P + Q ) */ - RF_PhysDiskAddr_t *qInfo; /* list of physical addrs for the Q of - * P + Q */ - RF_LockReqDesc_t lockReqDesc; /* used for stripe locking */ - RF_RowCol_t origRow; /* the original row: we may redirect the acc - * to a different row */ - RF_AccessStripeMap_t *next; -}; -/* flag values */ -#define RF_ASM_REDIR_LARGE_WRITE 0x00000001 /* allows large-write creation - * code to redirect failed - * accs */ -#define RF_ASM_BAILOUT_DAG_USED 0x00000002 /* allows us to detect - * recursive calls to the - * bailout write dag */ -#define RF_ASM_FLAGS_LOCK_TRIED 0x00000004 /* we've acquired the lock on - * the first parity range in - * this parity stripe */ -#define RF_ASM_FLAGS_LOCK_TRIED2 0x00000008 /* we've acquired the lock on - * the 2nd parity range in - * this parity stripe */ -#define RF_ASM_FLAGS_FORCE_TRIED 0x00000010 /* we've done the force-recon - * call on this parity stripe */ -#define RF_ASM_FLAGS_RECON_BLOCKED 0x00000020 /* we blocked recon => we must - * unblock it later */ - -struct RF_AccessStripeMapHeader_s { - RF_StripeCount_t numStripes; /* total number of stripes touched by - * this acc */ - RF_AccessStripeMap_t *stripeMap; /* pointer to the actual map. - * Also used for making lists */ - RF_AccessStripeMapHeader_t *next; -}; -/***************************************************************************************** - * - * various routines mapping addresses in the RAID address space. These work across - * all layouts. DON'T PUT ANY LAYOUT-SPECIFIC CODE HERE. - * - ****************************************************************************************/ - -/* return the identifier of the stripe containing the given address */ -#define rf_RaidAddressToStripeID(_layoutPtr_, _addr_) \ - ( ((_addr_) / (_layoutPtr_)->sectorsPerStripeUnit) / (_layoutPtr_)->numDataCol ) - -/* return the raid address of the start of the indicates stripe ID */ -#define rf_StripeIDToRaidAddress(_layoutPtr_, _sid_) \ - ( ((_sid_) * (_layoutPtr_)->sectorsPerStripeUnit) * (_layoutPtr_)->numDataCol ) - -/* return the identifier of the stripe containing the given stripe unit id */ -#define rf_StripeUnitIDToStripeID(_layoutPtr_, _addr_) \ - ( (_addr_) / (_layoutPtr_)->numDataCol ) - -/* return the identifier of the stripe unit containing the given address */ -#define rf_RaidAddressToStripeUnitID(_layoutPtr_, _addr_) \ - ( ((_addr_) / (_layoutPtr_)->sectorsPerStripeUnit) ) - -/* return the RAID address of next stripe boundary beyond the given address */ -#define rf_RaidAddressOfNextStripeBoundary(_layoutPtr_, _addr_) \ - ( (((_addr_)/(_layoutPtr_)->dataSectorsPerStripe)+1) * (_layoutPtr_)->dataSectorsPerStripe ) - -/* return the RAID address of the start of the stripe containing the given address */ -#define rf_RaidAddressOfPrevStripeBoundary(_layoutPtr_, _addr_) \ - ( (((_addr_)/(_layoutPtr_)->dataSectorsPerStripe)+0) * (_layoutPtr_)->dataSectorsPerStripe ) - -/* return the RAID address of next stripe unit boundary beyond the given address */ -#define rf_RaidAddressOfNextStripeUnitBoundary(_layoutPtr_, _addr_) \ - ( (((_addr_)/(_layoutPtr_)->sectorsPerStripeUnit)+1L)*(_layoutPtr_)->sectorsPerStripeUnit ) - -/* return the RAID address of the start of the stripe unit containing RAID address _addr_ */ -#define rf_RaidAddressOfPrevStripeUnitBoundary(_layoutPtr_, _addr_) \ - ( (((_addr_)/(_layoutPtr_)->sectorsPerStripeUnit)+0)*(_layoutPtr_)->sectorsPerStripeUnit ) - -/* returns the offset into the stripe. used by RaidAddressStripeAligned */ -#define rf_RaidAddressStripeOffset(_layoutPtr_, _addr_) \ - ( (_addr_) % ((_layoutPtr_)->dataSectorsPerStripe) ) - -/* returns the offset into the stripe unit. */ -#define rf_StripeUnitOffset(_layoutPtr_, _addr_) \ - ( (_addr_) % ((_layoutPtr_)->sectorsPerStripeUnit) ) - -/* returns nonzero if the given RAID address is stripe-aligned */ -#define rf_RaidAddressStripeAligned( __layoutPtr__, __addr__ ) \ - ( rf_RaidAddressStripeOffset(__layoutPtr__, __addr__) == 0 ) - -/* returns nonzero if the given address is stripe-unit aligned */ -#define rf_StripeUnitAligned( __layoutPtr__, __addr__ ) \ - ( rf_StripeUnitOffset(__layoutPtr__, __addr__) == 0 ) - -/* convert an address expressed in RAID blocks to/from an addr expressed in bytes */ -#define rf_RaidAddressToByte(_raidPtr_, _addr_) \ - ( (_addr_) << ( (_raidPtr_)->logBytesPerSector ) ) - -#define rf_ByteToRaidAddress(_raidPtr_, _addr_) \ - ( (_addr_) >> ( (_raidPtr_)->logBytesPerSector ) ) - -/* convert a raid address to/from a parity stripe ID. Conversion to raid address is easy, - * since we're asking for the address of the first sector in the parity stripe. Conversion to a - * parity stripe ID is more complex, since stripes are not contiguously allocated in - * parity stripes. - */ -#define rf_RaidAddressToParityStripeID(_layoutPtr_, _addr_, _ru_num_) \ - rf_MapStripeIDToParityStripeID( (_layoutPtr_), rf_RaidAddressToStripeID( (_layoutPtr_), (_addr_) ), (_ru_num_) ) - -#define rf_ParityStripeIDToRaidAddress(_layoutPtr_, _psid_) \ - ( (_psid_) * (_layoutPtr_)->SUsPerPU * (_layoutPtr_)->numDataCol * (_layoutPtr_)->sectorsPerStripeUnit ) - -RF_LayoutSW_t *rf_GetLayout(RF_ParityConfig_t parityConfig); -int -rf_ConfigureLayout(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -RF_StripeNum_t -rf_MapStripeIDToParityStripeID(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_ReconUnitNum_t * which_ru); - -#endif /* !_RF__RF_LAYOUT_H_ */ diff --git a/sys/dev/raidframe/rf_map.c b/sys/dev/raidframe/rf_map.c deleted file mode 100644 index 22af549..0000000 --- a/sys/dev/raidframe/rf_map.c +++ /dev/null @@ -1,909 +0,0 @@ -/* $NetBSD: rf_map.c,v 1.5 2000/06/29 00:22:27 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/************************************************************************** - * - * map.c -- main code for mapping RAID addresses to physical disk addresses - * - **************************************************************************/ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_map.h> -#include <dev/raidframe/rf_freelist.h> -#include <dev/raidframe/rf_shutdown.h> - -static void rf_FreePDAList(RF_PhysDiskAddr_t * start, RF_PhysDiskAddr_t * end, int count); -static void -rf_FreeASMList(RF_AccessStripeMap_t * start, RF_AccessStripeMap_t * end, - int count); - -/***************************************************************************************** - * - * MapAccess -- main 1st order mapping routine. - * - * Maps an access in the RAID address space to the corresponding set of physical disk - * addresses. The result is returned as a list of AccessStripeMap structures, one per - * stripe accessed. Each ASM structure contains a pointer to a list of PhysDiskAddr - * structures, which describe the physical locations touched by the user access. Note - * that this routine returns only static mapping information, i.e. the list of physical - * addresses returned does not necessarily identify the set of physical locations that - * will actually be read or written. - * - * The routine also maps the parity. The physical disk location returned always - * indicates the entire parity unit, even when only a subset of it is being accessed. - * This is because an access that is not stripe unit aligned but that spans a stripe - * unit boundary may require access two distinct portions of the parity unit, and we - * can't yet tell which portion(s) we'll actually need. We leave it up to the algorithm - * selection code to decide what subset of the parity unit to access. - * - * Note that addresses in the RAID address space must always be maintained as - * longs, instead of ints. - * - * This routine returns NULL if numBlocks is 0 - * - ****************************************************************************************/ - -RF_AccessStripeMapHeader_t * -rf_MapAccess(raidPtr, raidAddress, numBlocks, buffer, remap) - RF_Raid_t *raidPtr; - RF_RaidAddr_t raidAddress; /* starting address in RAID address - * space */ - RF_SectorCount_t numBlocks; /* number of blocks in RAID address - * space to access */ - caddr_t buffer; /* buffer to supply/receive data */ - int remap; /* 1 => remap addresses to spare space */ -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_AccessStripeMapHeader_t *asm_hdr = NULL; - RF_AccessStripeMap_t *asm_list = NULL, *asm_p = NULL; - int faultsTolerated = layoutPtr->map->faultsTolerated; - RF_RaidAddr_t startAddress = raidAddress; /* we'll change - * raidAddress along the - * way */ - RF_RaidAddr_t endAddress = raidAddress + numBlocks; - RF_RaidDisk_t **disks = raidPtr->Disks; - - RF_PhysDiskAddr_t *pda_p, *pda_q; - RF_StripeCount_t numStripes = 0; - RF_RaidAddr_t stripeRealEndAddress, stripeEndAddress, nextStripeUnitAddress; - RF_RaidAddr_t startAddrWithinStripe, lastRaidAddr; - RF_StripeCount_t totStripes; - RF_StripeNum_t stripeID, lastSID, SUID, lastSUID; - RF_AccessStripeMap_t *asmList, *t_asm; - RF_PhysDiskAddr_t *pdaList, *t_pda; - - /* allocate all the ASMs and PDAs up front */ - lastRaidAddr = raidAddress + numBlocks - 1; - stripeID = rf_RaidAddressToStripeID(layoutPtr, raidAddress); - lastSID = rf_RaidAddressToStripeID(layoutPtr, lastRaidAddr); - totStripes = lastSID - stripeID + 1; - SUID = rf_RaidAddressToStripeUnitID(layoutPtr, raidAddress); - lastSUID = rf_RaidAddressToStripeUnitID(layoutPtr, lastRaidAddr); - - asmList = rf_AllocASMList(totStripes); - pdaList = rf_AllocPDAList(lastSUID - SUID + 1 + faultsTolerated * totStripes); /* may also need pda(s) - * per stripe for parity */ - - if (raidAddress + numBlocks > raidPtr->totalSectors) { - RF_ERRORMSG1("Unable to map access because offset (%d) was invalid\n", - (int) raidAddress); - return (NULL); - } - if (rf_mapDebug) - rf_PrintRaidAddressInfo(raidPtr, raidAddress, numBlocks); - for (; raidAddress < endAddress;) { - /* make the next stripe structure */ - RF_ASSERT(asmList); - t_asm = asmList; - asmList = asmList->next; - bzero((char *) t_asm, sizeof(RF_AccessStripeMap_t)); - if (!asm_p) - asm_list = asm_p = t_asm; - else { - asm_p->next = t_asm; - asm_p = asm_p->next; - } - numStripes++; - - /* map SUs from current location to the end of the stripe */ - asm_p->stripeID = /* rf_RaidAddressToStripeID(layoutPtr, - raidAddress) */ stripeID++; - stripeRealEndAddress = rf_RaidAddressOfNextStripeBoundary(layoutPtr, raidAddress); - stripeEndAddress = RF_MIN(endAddress, stripeRealEndAddress); - asm_p->raidAddress = raidAddress; - asm_p->endRaidAddress = stripeEndAddress; - - /* map each stripe unit in the stripe */ - pda_p = NULL; - startAddrWithinStripe = raidAddress; /* Raid addr of start of - * portion of access - * that is within this - * stripe */ - for (; raidAddress < stripeEndAddress;) { - RF_ASSERT(pdaList); - t_pda = pdaList; - pdaList = pdaList->next; - bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t)); - if (!pda_p) - asm_p->physInfo = pda_p = t_pda; - else { - pda_p->next = t_pda; - pda_p = pda_p->next; - } - - pda_p->type = RF_PDA_TYPE_DATA; - (layoutPtr->map->MapSector) (raidPtr, raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), remap); - - /* mark any failures we find. failedPDA is don't-care - * if there is more than one failure */ - pda_p->raidAddress = raidAddress; /* the RAID address - * corresponding to this - * physical disk address */ - nextStripeUnitAddress = rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, raidAddress); - pda_p->numSector = RF_MIN(endAddress, nextStripeUnitAddress) - raidAddress; - RF_ASSERT(pda_p->numSector != 0); - rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 0); - pda_p->bufPtr = buffer + rf_RaidAddressToByte(raidPtr, (raidAddress - startAddress)); - asm_p->totalSectorsAccessed += pda_p->numSector; - asm_p->numStripeUnitsAccessed++; - asm_p->origRow = pda_p->row; /* redundant but - * harmless to do this - * in every loop - * iteration */ - - raidAddress = RF_MIN(endAddress, nextStripeUnitAddress); - } - - /* Map the parity. At this stage, the startSector and - * numSector fields for the parity unit are always set to - * indicate the entire parity unit. We may modify this after - * mapping the data portion. */ - switch (faultsTolerated) { - case 0: - break; - case 1: /* single fault tolerant */ - RF_ASSERT(pdaList); - t_pda = pdaList; - pdaList = pdaList->next; - bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t)); - pda_p = asm_p->parityInfo = t_pda; - pda_p->type = RF_PDA_TYPE_PARITY; - (layoutPtr->map->MapParity) (raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe), - &(pda_p->row), &(pda_p->col), &(pda_p->startSector), remap); - pda_p->numSector = layoutPtr->sectorsPerStripeUnit; - /* raidAddr may be needed to find unit to redirect to */ - pda_p->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe); - rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 1); - rf_ASMParityAdjust(asm_p->parityInfo, startAddrWithinStripe, endAddress, layoutPtr, asm_p); - - break; - case 2: /* two fault tolerant */ - RF_ASSERT(pdaList && pdaList->next); - t_pda = pdaList; - pdaList = pdaList->next; - bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t)); - pda_p = asm_p->parityInfo = t_pda; - pda_p->type = RF_PDA_TYPE_PARITY; - t_pda = pdaList; - pdaList = pdaList->next; - bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t)); - pda_q = asm_p->qInfo = t_pda; - pda_q->type = RF_PDA_TYPE_Q; - (layoutPtr->map->MapParity) (raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe), - &(pda_p->row), &(pda_p->col), &(pda_p->startSector), remap); - (layoutPtr->map->MapQ) (raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe), - &(pda_q->row), &(pda_q->col), &(pda_q->startSector), remap); - pda_q->numSector = pda_p->numSector = layoutPtr->sectorsPerStripeUnit; - /* raidAddr may be needed to find unit to redirect to */ - pda_p->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe); - pda_q->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe); - /* failure mode stuff */ - rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 1); - rf_ASMCheckStatus(raidPtr, pda_q, asm_p, disks, 1); - rf_ASMParityAdjust(asm_p->parityInfo, startAddrWithinStripe, endAddress, layoutPtr, asm_p); - rf_ASMParityAdjust(asm_p->qInfo, startAddrWithinStripe, endAddress, layoutPtr, asm_p); - break; - } - } - RF_ASSERT(asmList == NULL && pdaList == NULL); - /* make the header structure */ - asm_hdr = rf_AllocAccessStripeMapHeader(); - RF_ASSERT(numStripes == totStripes); - asm_hdr->numStripes = numStripes; - asm_hdr->stripeMap = asm_list; - - if (rf_mapDebug) - rf_PrintAccessStripeMap(asm_hdr); - return (asm_hdr); -} -/***************************************************************************************** - * This routine walks through an ASM list and marks the PDAs that have failed. - * It's called only when a disk failure causes an in-flight DAG to fail. - * The parity may consist of two components, but we want to use only one failedPDA - * pointer. Thus we set failedPDA to point to the first parity component, and rely - * on the rest of the code to do the right thing with this. - ****************************************************************************************/ - -void -rf_MarkFailuresInASMList(raidPtr, asm_h) - RF_Raid_t *raidPtr; - RF_AccessStripeMapHeader_t *asm_h; -{ - RF_RaidDisk_t **disks = raidPtr->Disks; - RF_AccessStripeMap_t *asmap; - RF_PhysDiskAddr_t *pda; - - for (asmap = asm_h->stripeMap; asmap; asmap = asmap->next) { - asmap->numDataFailed = asmap->numParityFailed = asmap->numQFailed = 0; - asmap->numFailedPDAs = 0; - bzero((char *) asmap->failedPDAs, - RF_MAX_FAILED_PDA * sizeof(RF_PhysDiskAddr_t *)); - for (pda = asmap->physInfo; pda; pda = pda->next) { - if (RF_DEAD_DISK(disks[pda->row][pda->col].status)) { - asmap->numDataFailed++; - asmap->failedPDAs[asmap->numFailedPDAs] = pda; - asmap->numFailedPDAs++; - } - } - pda = asmap->parityInfo; - if (pda && RF_DEAD_DISK(disks[pda->row][pda->col].status)) { - asmap->numParityFailed++; - asmap->failedPDAs[asmap->numFailedPDAs] = pda; - asmap->numFailedPDAs++; - } - pda = asmap->qInfo; - if (pda && RF_DEAD_DISK(disks[pda->row][pda->col].status)) { - asmap->numQFailed++; - asmap->failedPDAs[asmap->numFailedPDAs] = pda; - asmap->numFailedPDAs++; - } - } -} -/***************************************************************************************** - * - * DuplicateASM -- duplicates an ASM and returns the new one - * - ****************************************************************************************/ -RF_AccessStripeMap_t * -rf_DuplicateASM(asmap) - RF_AccessStripeMap_t *asmap; -{ - RF_AccessStripeMap_t *new_asm; - RF_PhysDiskAddr_t *pda, *new_pda, *t_pda; - - new_pda = NULL; - new_asm = rf_AllocAccessStripeMapComponent(); - bcopy((char *) asmap, (char *) new_asm, sizeof(RF_AccessStripeMap_t)); - new_asm->numFailedPDAs = 0; /* ??? */ - new_asm->failedPDAs[0] = NULL; - new_asm->physInfo = NULL; - new_asm->parityInfo = NULL; - new_asm->next = NULL; - - for (pda = asmap->physInfo; pda; pda = pda->next) { /* copy the physInfo - * list */ - t_pda = rf_AllocPhysDiskAddr(); - bcopy((char *) pda, (char *) t_pda, sizeof(RF_PhysDiskAddr_t)); - t_pda->next = NULL; - if (!new_asm->physInfo) { - new_asm->physInfo = t_pda; - new_pda = t_pda; - } else { - new_pda->next = t_pda; - new_pda = new_pda->next; - } - if (pda == asmap->failedPDAs[0]) - new_asm->failedPDAs[0] = t_pda; - } - for (pda = asmap->parityInfo; pda; pda = pda->next) { /* copy the parityInfo - * list */ - t_pda = rf_AllocPhysDiskAddr(); - bcopy((char *) pda, (char *) t_pda, sizeof(RF_PhysDiskAddr_t)); - t_pda->next = NULL; - if (!new_asm->parityInfo) { - new_asm->parityInfo = t_pda; - new_pda = t_pda; - } else { - new_pda->next = t_pda; - new_pda = new_pda->next; - } - if (pda == asmap->failedPDAs[0]) - new_asm->failedPDAs[0] = t_pda; - } - return (new_asm); -} -/***************************************************************************************** - * - * DuplicatePDA -- duplicates a PDA and returns the new one - * - ****************************************************************************************/ -RF_PhysDiskAddr_t * -rf_DuplicatePDA(pda) - RF_PhysDiskAddr_t *pda; -{ - RF_PhysDiskAddr_t *new; - - new = rf_AllocPhysDiskAddr(); - bcopy((char *) pda, (char *) new, sizeof(RF_PhysDiskAddr_t)); - return (new); -} -/***************************************************************************************** - * - * routines to allocate and free list elements. All allocation routines zero the - * structure before returning it. - * - * FreePhysDiskAddr is static. It should never be called directly, because - * FreeAccessStripeMap takes care of freeing the PhysDiskAddr list. - * - ****************************************************************************************/ - -static RF_FreeList_t *rf_asmhdr_freelist; -#define RF_MAX_FREE_ASMHDR 128 -#define RF_ASMHDR_INC 16 -#define RF_ASMHDR_INITIAL 32 - -static RF_FreeList_t *rf_asm_freelist; -#define RF_MAX_FREE_ASM 192 -#define RF_ASM_INC 24 -#define RF_ASM_INITIAL 64 - -static RF_FreeList_t *rf_pda_freelist; -#define RF_MAX_FREE_PDA 192 -#define RF_PDA_INC 24 -#define RF_PDA_INITIAL 64 - -/* called at shutdown time. So far, all that is necessary is to release all the free lists */ -static void rf_ShutdownMapModule(void *); -static void -rf_ShutdownMapModule(ignored) - void *ignored; -{ - RF_FREELIST_DESTROY(rf_asmhdr_freelist, next, (RF_AccessStripeMapHeader_t *)); - RF_FREELIST_DESTROY(rf_pda_freelist, next, (RF_PhysDiskAddr_t *)); - RF_FREELIST_DESTROY(rf_asm_freelist, next, (RF_AccessStripeMap_t *)); -} - -int -rf_ConfigureMapModule(listp) - RF_ShutdownList_t **listp; -{ - int rc; - - RF_FREELIST_CREATE(rf_asmhdr_freelist, RF_MAX_FREE_ASMHDR, - RF_ASMHDR_INC, sizeof(RF_AccessStripeMapHeader_t)); - if (rf_asmhdr_freelist == NULL) { - return (ENOMEM); - } - RF_FREELIST_CREATE(rf_asm_freelist, RF_MAX_FREE_ASM, - RF_ASM_INC, sizeof(RF_AccessStripeMap_t)); - if (rf_asm_freelist == NULL) { - RF_FREELIST_DESTROY(rf_asmhdr_freelist, next, (RF_AccessStripeMapHeader_t *)); - return (ENOMEM); - } - RF_FREELIST_CREATE(rf_pda_freelist, RF_MAX_FREE_PDA, - RF_PDA_INC, sizeof(RF_PhysDiskAddr_t)); - if (rf_pda_freelist == NULL) { - RF_FREELIST_DESTROY(rf_asmhdr_freelist, next, (RF_AccessStripeMapHeader_t *)); - RF_FREELIST_DESTROY(rf_pda_freelist, next, (RF_PhysDiskAddr_t *)); - return (ENOMEM); - } - rc = rf_ShutdownCreate(listp, rf_ShutdownMapModule, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownMapModule(NULL); - return (rc); - } - RF_FREELIST_PRIME(rf_asmhdr_freelist, RF_ASMHDR_INITIAL, next, - (RF_AccessStripeMapHeader_t *)); - RF_FREELIST_PRIME(rf_asm_freelist, RF_ASM_INITIAL, next, - (RF_AccessStripeMap_t *)); - RF_FREELIST_PRIME(rf_pda_freelist, RF_PDA_INITIAL, next, - (RF_PhysDiskAddr_t *)); - - return (0); -} - -RF_AccessStripeMapHeader_t * -rf_AllocAccessStripeMapHeader() -{ - RF_AccessStripeMapHeader_t *p; - - RF_FREELIST_GET(rf_asmhdr_freelist, p, next, (RF_AccessStripeMapHeader_t *)); - bzero((char *) p, sizeof(RF_AccessStripeMapHeader_t)); - - return (p); -} - - -void -rf_FreeAccessStripeMapHeader(p) - RF_AccessStripeMapHeader_t *p; -{ - RF_FREELIST_FREE(rf_asmhdr_freelist, p, next); -} - -RF_PhysDiskAddr_t * -rf_AllocPhysDiskAddr() -{ - RF_PhysDiskAddr_t *p; - - RF_FREELIST_GET(rf_pda_freelist, p, next, (RF_PhysDiskAddr_t *)); - bzero((char *) p, sizeof(RF_PhysDiskAddr_t)); - - return (p); -} -/* allocates a list of PDAs, locking the free list only once - * when we have to call calloc, we do it one component at a time to simplify - * the process of freeing the list at program shutdown. This should not be - * much of a performance hit, because it should be very infrequently executed. - */ -RF_PhysDiskAddr_t * -rf_AllocPDAList(count) - int count; -{ - RF_PhysDiskAddr_t *p = NULL; - - RF_FREELIST_GET_N(rf_pda_freelist, p, next, (RF_PhysDiskAddr_t *), count); - return (p); -} - -void -rf_FreePhysDiskAddr(p) - RF_PhysDiskAddr_t *p; -{ - RF_FREELIST_FREE(rf_pda_freelist, p, next); -} - -static void -rf_FreePDAList(l_start, l_end, count) - RF_PhysDiskAddr_t *l_start, *l_end; /* pointers to start and end - * of list */ - int count; /* number of elements in list */ -{ - RF_FREELIST_FREE_N(rf_pda_freelist, l_start, next, (RF_PhysDiskAddr_t *), count); -} - -RF_AccessStripeMap_t * -rf_AllocAccessStripeMapComponent() -{ - RF_AccessStripeMap_t *p; - - RF_FREELIST_GET(rf_asm_freelist, p, next, (RF_AccessStripeMap_t *)); - bzero((char *) p, sizeof(RF_AccessStripeMap_t)); - - return (p); -} -/* this is essentially identical to AllocPDAList. I should combine the two. - * when we have to call calloc, we do it one component at a time to simplify - * the process of freeing the list at program shutdown. This should not be - * much of a performance hit, because it should be very infrequently executed. - */ -RF_AccessStripeMap_t * -rf_AllocASMList(count) - int count; -{ - RF_AccessStripeMap_t *p = NULL; - - RF_FREELIST_GET_N(rf_asm_freelist, p, next, (RF_AccessStripeMap_t *), count); - return (p); -} - -void -rf_FreeAccessStripeMapComponent(p) - RF_AccessStripeMap_t *p; -{ - RF_FREELIST_FREE(rf_asm_freelist, p, next); -} - -static void -rf_FreeASMList(l_start, l_end, count) - RF_AccessStripeMap_t *l_start, *l_end; - int count; -{ - RF_FREELIST_FREE_N(rf_asm_freelist, l_start, next, (RF_AccessStripeMap_t *), count); -} - -void -rf_FreeAccessStripeMap(hdr) - RF_AccessStripeMapHeader_t *hdr; -{ - RF_AccessStripeMap_t *p, *pt = NULL; - RF_PhysDiskAddr_t *pdp, *trailer, *pdaList = NULL, *pdaEnd = NULL; - int count = 0, t, asm_count = 0; - - for (p = hdr->stripeMap; p; p = p->next) { - - /* link the 3 pda lists into the accumulating pda list */ - - if (!pdaList) - pdaList = p->qInfo; - else - pdaEnd->next = p->qInfo; - for (trailer = NULL, pdp = p->qInfo; pdp;) { - trailer = pdp; - pdp = pdp->next; - count++; - } - if (trailer) - pdaEnd = trailer; - - if (!pdaList) - pdaList = p->parityInfo; - else - pdaEnd->next = p->parityInfo; - for (trailer = NULL, pdp = p->parityInfo; pdp;) { - trailer = pdp; - pdp = pdp->next; - count++; - } - if (trailer) - pdaEnd = trailer; - - if (!pdaList) - pdaList = p->physInfo; - else - pdaEnd->next = p->physInfo; - for (trailer = NULL, pdp = p->physInfo; pdp;) { - trailer = pdp; - pdp = pdp->next; - count++; - } - if (trailer) - pdaEnd = trailer; - - pt = p; - asm_count++; - } - - /* debug only */ - for (t = 0, pdp = pdaList; pdp; pdp = pdp->next) - t++; - RF_ASSERT(t == count); - - if (pdaList) - rf_FreePDAList(pdaList, pdaEnd, count); - rf_FreeASMList(hdr->stripeMap, pt, asm_count); - rf_FreeAccessStripeMapHeader(hdr); -} -/* We can't use the large write optimization if there are any failures in the stripe. - * In the declustered layout, there is no way to immediately determine what disks - * constitute a stripe, so we actually have to hunt through the stripe looking for failures. - * The reason we map the parity instead of just using asm->parityInfo->col is because - * the latter may have been already redirected to a spare drive, which would - * mess up the computation of the stripe offset. - * - * ASSUMES AT MOST ONE FAILURE IN THE STRIPE. - */ -int -rf_CheckStripeForFailures(raidPtr, asmap) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *asmap; -{ - RF_RowCol_t trow, tcol, prow, pcol, *diskids, row, i; - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_StripeCount_t stripeOffset; - int numFailures; - RF_RaidAddr_t sosAddr; - RF_SectorNum_t diskOffset, poffset; - RF_RowCol_t testrow; - - /* quick out in the fault-free case. */ - RF_LOCK_MUTEX(raidPtr->mutex); - numFailures = raidPtr->numFailures; - RF_UNLOCK_MUTEX(raidPtr->mutex); - if (numFailures == 0) - return (0); - - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - row = asmap->physInfo->row; - (layoutPtr->map->IdentifyStripe) (raidPtr, asmap->raidAddress, &diskids, &testrow); - (layoutPtr->map->MapParity) (raidPtr, asmap->raidAddress, &prow, &pcol, &poffset, 0); /* get pcol */ - - /* this need not be true if we've redirected the access to a spare in - * another row RF_ASSERT(row == testrow); */ - stripeOffset = 0; - for (i = 0; i < layoutPtr->numDataCol + layoutPtr->numParityCol; i++) { - if (diskids[i] != pcol) { - if (RF_DEAD_DISK(raidPtr->Disks[testrow][diskids[i]].status)) { - if (raidPtr->status[testrow] != rf_rs_reconstructing) - return (1); - RF_ASSERT(raidPtr->reconControl[testrow]->fcol == diskids[i]); - layoutPtr->map->MapSector(raidPtr, - sosAddr + stripeOffset * layoutPtr->sectorsPerStripeUnit, - &trow, &tcol, &diskOffset, 0); - RF_ASSERT((trow == testrow) && (tcol == diskids[i])); - if (!rf_CheckRUReconstructed(raidPtr->reconControl[testrow]->reconMap, diskOffset)) - return (1); - asmap->flags |= RF_ASM_REDIR_LARGE_WRITE; - return (0); - } - stripeOffset++; - } - } - return (0); -} -/* - return the number of failed data units in the stripe. -*/ - -int -rf_NumFailedDataUnitsInStripe(raidPtr, asmap) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *asmap; -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_RowCol_t trow, tcol, row, i; - RF_SectorNum_t diskOffset; - RF_RaidAddr_t sosAddr; - int numFailures; - - /* quick out in the fault-free case. */ - RF_LOCK_MUTEX(raidPtr->mutex); - numFailures = raidPtr->numFailures; - RF_UNLOCK_MUTEX(raidPtr->mutex); - if (numFailures == 0) - return (0); - numFailures = 0; - - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - row = asmap->physInfo->row; - for (i = 0; i < layoutPtr->numDataCol; i++) { - (layoutPtr->map->MapSector) (raidPtr, sosAddr + i * layoutPtr->sectorsPerStripeUnit, - &trow, &tcol, &diskOffset, 0); - if (RF_DEAD_DISK(raidPtr->Disks[trow][tcol].status)) - numFailures++; - } - - return numFailures; -} - - -/***************************************************************************************** - * - * debug routines - * - ****************************************************************************************/ - -void -rf_PrintAccessStripeMap(asm_h) - RF_AccessStripeMapHeader_t *asm_h; -{ - rf_PrintFullAccessStripeMap(asm_h, 0); -} - -void -rf_PrintFullAccessStripeMap(asm_h, prbuf) - RF_AccessStripeMapHeader_t *asm_h; - int prbuf; /* flag to print buffer pointers */ -{ - int i; - RF_AccessStripeMap_t *asmap = asm_h->stripeMap; - RF_PhysDiskAddr_t *p; - printf("%d stripes total\n", (int) asm_h->numStripes); - for (; asmap; asmap = asmap->next) { - /* printf("Num failures: %d\n",asmap->numDataFailed); */ - /* printf("Num sectors: - * %d\n",(int)asmap->totalSectorsAccessed); */ - printf("Stripe %d (%d sectors), failures: %d data, %d parity: ", - (int) asmap->stripeID, - (int) asmap->totalSectorsAccessed, - (int) asmap->numDataFailed, - (int) asmap->numParityFailed); - if (asmap->parityInfo) { - printf("Parity [r%d c%d s%d-%d", asmap->parityInfo->row, asmap->parityInfo->col, - (int) asmap->parityInfo->startSector, - (int) (asmap->parityInfo->startSector + - asmap->parityInfo->numSector - 1)); - if (prbuf) - printf(" b0x%lx", (unsigned long) asmap->parityInfo->bufPtr); - if (asmap->parityInfo->next) { - printf(", r%d c%d s%d-%d", asmap->parityInfo->next->row, - asmap->parityInfo->next->col, - (int) asmap->parityInfo->next->startSector, - (int) (asmap->parityInfo->next->startSector + - asmap->parityInfo->next->numSector - 1)); - if (prbuf) - printf(" b0x%lx", (unsigned long) asmap->parityInfo->next->bufPtr); - RF_ASSERT(asmap->parityInfo->next->next == NULL); - } - printf("]\n\t"); - } - for (i = 0, p = asmap->physInfo; p; p = p->next, i++) { - printf("SU r%d c%d s%d-%d ", p->row, p->col, (int) p->startSector, - (int) (p->startSector + p->numSector - 1)); - if (prbuf) - printf("b0x%lx ", (unsigned long) p->bufPtr); - if (i && !(i & 1)) - printf("\n\t"); - } - printf("\n"); - p = asm_h->stripeMap->failedPDAs[0]; - if (asm_h->stripeMap->numDataFailed + asm_h->stripeMap->numParityFailed > 1) - printf("[multiple failures]\n"); - else - if (asm_h->stripeMap->numDataFailed + asm_h->stripeMap->numParityFailed > 0) - printf("\t[Failed PDA: r%d c%d s%d-%d]\n", p->row, p->col, - (int) p->startSector, (int) (p->startSector + p->numSector - 1)); - } -} - -void -rf_PrintRaidAddressInfo(raidPtr, raidAddr, numBlocks) - RF_Raid_t *raidPtr; - RF_RaidAddr_t raidAddr; - RF_SectorCount_t numBlocks; -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_RaidAddr_t ra, sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr); - - printf("Raid addrs of SU boundaries from start of stripe to end of access:\n\t"); - for (ra = sosAddr; ra <= raidAddr + numBlocks; ra += layoutPtr->sectorsPerStripeUnit) { - printf("%d (0x%x), ", (int) ra, (int) ra); - } - printf("\n"); - printf("Offset into stripe unit: %d (0x%x)\n", - (int) (raidAddr % layoutPtr->sectorsPerStripeUnit), - (int) (raidAddr % layoutPtr->sectorsPerStripeUnit)); -} -/* - given a parity descriptor and the starting address within a stripe, - range restrict the parity descriptor to touch only the correct stuff. -*/ -void -rf_ASMParityAdjust( - RF_PhysDiskAddr_t * toAdjust, - RF_StripeNum_t startAddrWithinStripe, - RF_SectorNum_t endAddress, - RF_RaidLayout_t * layoutPtr, - RF_AccessStripeMap_t * asm_p) -{ - RF_PhysDiskAddr_t *new_pda; - - /* when we're accessing only a portion of one stripe unit, we want the - * parity descriptor to identify only the chunk of parity associated - * with the data. When the access spans exactly one stripe unit - * boundary and is less than a stripe unit in size, it uses two - * disjoint regions of the parity unit. When an access spans more - * than one stripe unit boundary, it uses all of the parity unit. - * - * To better handle the case where stripe units are small, we may - * eventually want to change the 2nd case so that if the SU size is - * below some threshold, we just read/write the whole thing instead of - * breaking it up into two accesses. */ - if (asm_p->numStripeUnitsAccessed == 1) { - int x = (startAddrWithinStripe % layoutPtr->sectorsPerStripeUnit); - toAdjust->startSector += x; - toAdjust->raidAddress += x; - toAdjust->numSector = asm_p->physInfo->numSector; - RF_ASSERT(toAdjust->numSector != 0); - } else - if (asm_p->numStripeUnitsAccessed == 2 && asm_p->totalSectorsAccessed < layoutPtr->sectorsPerStripeUnit) { - int x = (startAddrWithinStripe % layoutPtr->sectorsPerStripeUnit); - - /* create a second pda and copy the parity map info - * into it */ - RF_ASSERT(toAdjust->next == NULL); - new_pda = toAdjust->next = rf_AllocPhysDiskAddr(); - *new_pda = *toAdjust; /* structure assignment */ - new_pda->next = NULL; - - /* adjust the start sector & number of blocks for the - * first parity pda */ - toAdjust->startSector += x; - toAdjust->raidAddress += x; - toAdjust->numSector = rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, startAddrWithinStripe) - startAddrWithinStripe; - RF_ASSERT(toAdjust->numSector != 0); - - /* adjust the second pda */ - new_pda->numSector = endAddress - rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, endAddress); - /* new_pda->raidAddress = - * rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, - * toAdjust->raidAddress); */ - RF_ASSERT(new_pda->numSector != 0); - } -} -/* - Check if a disk has been spared or failed. If spared, - redirect the I/O. - If it has been failed, record it in the asm pointer. - Fourth arg is whether data or parity. -*/ -void -rf_ASMCheckStatus( - RF_Raid_t * raidPtr, - RF_PhysDiskAddr_t * pda_p, - RF_AccessStripeMap_t * asm_p, - RF_RaidDisk_t ** disks, - int parity) -{ - RF_DiskStatus_t dstatus; - RF_RowCol_t frow, fcol; - - dstatus = disks[pda_p->row][pda_p->col].status; - - if (dstatus == rf_ds_spared) { - /* if the disk has been spared, redirect access to the spare */ - frow = pda_p->row; - fcol = pda_p->col; - pda_p->row = disks[frow][fcol].spareRow; - pda_p->col = disks[frow][fcol].spareCol; - } else - if (dstatus == rf_ds_dist_spared) { - /* ditto if disk has been spared to dist spare space */ - RF_RowCol_t or = pda_p->row, oc = pda_p->col; - RF_SectorNum_t oo = pda_p->startSector; - - if (pda_p->type == RF_PDA_TYPE_DATA) - raidPtr->Layout.map->MapSector(raidPtr, pda_p->raidAddress, &pda_p->row, &pda_p->col, &pda_p->startSector, RF_REMAP); - else - raidPtr->Layout.map->MapParity(raidPtr, pda_p->raidAddress, &pda_p->row, &pda_p->col, &pda_p->startSector, RF_REMAP); - - if (rf_mapDebug) { - printf("Redirected r %d c %d o %d -> r%d c %d o %d\n", or, oc, (int) oo, - pda_p->row, pda_p->col, (int) pda_p->startSector); - } - } else - if (RF_DEAD_DISK(dstatus)) { - /* if the disk is inaccessible, mark the - * failure */ - if (parity) - asm_p->numParityFailed++; - else { - asm_p->numDataFailed++; -#if 0 - /* XXX Do we really want this spewing - * out on the console? GO */ - printf("DATA_FAILED!\n"); -#endif - } - asm_p->failedPDAs[asm_p->numFailedPDAs] = pda_p; - asm_p->numFailedPDAs++; -#if 0 - switch (asm_p->numParityFailed + asm_p->numDataFailed) { - case 1: - asm_p->failedPDAs[0] = pda_p; - break; - case 2: - asm_p->failedPDAs[1] = pda_p; - default: - break; - } -#endif - } - /* the redirected access should never span a stripe unit boundary */ - RF_ASSERT(rf_RaidAddressToStripeUnitID(&raidPtr->Layout, pda_p->raidAddress) == - rf_RaidAddressToStripeUnitID(&raidPtr->Layout, pda_p->raidAddress + pda_p->numSector - 1)); - RF_ASSERT(pda_p->col != -1); -} diff --git a/sys/dev/raidframe/rf_map.h b/sys/dev/raidframe/rf_map.h deleted file mode 100644 index d7c6d19..0000000 --- a/sys/dev/raidframe/rf_map.h +++ /dev/null @@ -1,94 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_map.h,v 1.3 1999/02/05 00:06:12 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_map.h */ - -#ifndef _RF__RF_MAP_H_ -#define _RF__RF_MAP_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_raid.h> - -/* mapping structure allocation and free routines */ -RF_AccessStripeMapHeader_t * -rf_MapAccess(RF_Raid_t * raidPtr, - RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks, - caddr_t buffer, int remap); - -void -rf_MarkFailuresInASMList(RF_Raid_t * raidPtr, - RF_AccessStripeMapHeader_t * asm_h); - -RF_AccessStripeMap_t *rf_DuplicateASM(RF_AccessStripeMap_t * asmap); - -RF_PhysDiskAddr_t *rf_DuplicatePDA(RF_PhysDiskAddr_t * pda); - -int rf_ConfigureMapModule(RF_ShutdownList_t ** listp); - -RF_AccessStripeMapHeader_t *rf_AllocAccessStripeMapHeader(void); - -void rf_FreeAccessStripeMapHeader(RF_AccessStripeMapHeader_t * p); - -RF_PhysDiskAddr_t *rf_AllocPhysDiskAddr(void); - -RF_PhysDiskAddr_t *rf_AllocPDAList(int count); - -void rf_FreePhysDiskAddr(RF_PhysDiskAddr_t * p); - -RF_AccessStripeMap_t *rf_AllocAccessStripeMapComponent(void); - -RF_AccessStripeMap_t *rf_AllocASMList(int count); - -void rf_FreeAccessStripeMapComponent(RF_AccessStripeMap_t * p); - -void rf_FreeAccessStripeMap(RF_AccessStripeMapHeader_t * hdr); - -int rf_CheckStripeForFailures(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap); - -int rf_NumFailedDataUnitsInStripe(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap); - -void rf_PrintAccessStripeMap(RF_AccessStripeMapHeader_t * asm_h); - -void rf_PrintFullAccessStripeMap(RF_AccessStripeMapHeader_t * asm_h, int prbuf); - -void -rf_PrintRaidAddressInfo(RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr, - RF_SectorCount_t numBlocks); - -void -rf_ASMParityAdjust(RF_PhysDiskAddr_t * toAdjust, - RF_StripeNum_t startAddrWithinStripe, RF_SectorNum_t endAddress, - RF_RaidLayout_t * layoutPtr, RF_AccessStripeMap_t * asm_p); - -void -rf_ASMCheckStatus(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * pda_p, - RF_AccessStripeMap_t * asm_p, RF_RaidDisk_t ** disks, int parity); - -#endif /* !_RF__RF_MAP_H_ */ diff --git a/sys/dev/raidframe/rf_mcpair.c b/sys/dev/raidframe/rf_mcpair.c deleted file mode 100644 index 7b327ac..0000000 --- a/sys/dev/raidframe/rf_mcpair.c +++ /dev/null @@ -1,143 +0,0 @@ -/* $NetBSD: rf_mcpair.c,v 1.4 2000/09/11 02:23:14 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_mcpair.c - * an mcpair is a structure containing a mutex and a condition variable. - * it's used to block the current thread until some event occurs. - */ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_mcpair.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_freelist.h> -#include <dev/raidframe/rf_shutdown.h> - -#include <sys/proc.h> - -static RF_FreeList_t *rf_mcpair_freelist; - -#define RF_MAX_FREE_MCPAIR 128 -#define RF_MCPAIR_INC 16 -#define RF_MCPAIR_INITIAL 24 - -static int init_mcpair(RF_MCPair_t *); -static void clean_mcpair(RF_MCPair_t *); -static void rf_ShutdownMCPair(void *); - - - -static int -init_mcpair(t) - RF_MCPair_t *t; -{ - int rc; - - rc = rf_mutex_init(&t->mutex, __FUNCTION__); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return (rc); - } - rc = rf_cond_init(&t->cond); - if (rc) { - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_mutex_destroy(&t->mutex); - return (rc); - } - return (0); -} - -static void -clean_mcpair(t) - RF_MCPair_t *t; -{ - rf_mutex_destroy(&t->mutex); - rf_cond_destroy(&t->cond); -} - -static void -rf_ShutdownMCPair(ignored) - void *ignored; -{ - RF_FREELIST_DESTROY_CLEAN(rf_mcpair_freelist, next, (RF_MCPair_t *), clean_mcpair); -} - -int -rf_ConfigureMCPair(listp) - RF_ShutdownList_t **listp; -{ - int rc; - - RF_FREELIST_CREATE(rf_mcpair_freelist, RF_MAX_FREE_MCPAIR, - RF_MCPAIR_INC, sizeof(RF_MCPair_t)); - rc = rf_ShutdownCreate(listp, rf_ShutdownMCPair, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - rf_ShutdownMCPair(NULL); - return (rc); - } - RF_FREELIST_PRIME_INIT(rf_mcpair_freelist, RF_MCPAIR_INITIAL, next, - (RF_MCPair_t *), init_mcpair); - return (0); -} - -RF_MCPair_t * -rf_AllocMCPair() -{ - RF_MCPair_t *t; - - RF_FREELIST_GET_INIT(rf_mcpair_freelist, t, next, (RF_MCPair_t *), init_mcpair); - if (t) { - t->flag = 0; - t->next = NULL; - } - return (t); -} - -void -rf_FreeMCPair(t) - RF_MCPair_t *t; -{ - RF_FREELIST_FREE_CLEAN(rf_mcpair_freelist, t, next, clean_mcpair); -} -/* the callback function used to wake you up when you use an mcpair to wait for something */ -void -rf_MCPairWakeupFunc(mcpair) - RF_MCPair_t *mcpair; -{ - RF_LOCK_MUTEX(mcpair->mutex); - mcpair->flag = 1; - wakeup(&(mcpair->cond)); - RF_UNLOCK_MUTEX(mcpair->mutex); -} diff --git a/sys/dev/raidframe/rf_mcpair.h b/sys/dev/raidframe/rf_mcpair.h deleted file mode 100644 index d43c728..0000000 --- a/sys/dev/raidframe/rf_mcpair.h +++ /dev/null @@ -1,54 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_mcpair.h,v 1.6 2000/09/21 01:45:46 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_mcpair.h - * see comments in rf_mcpair.c - */ - -#ifndef _RF__RF_MCPAIR_H_ -#define _RF__RF_MCPAIR_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> - -struct RF_MCPair_s { - RF_DECLARE_MUTEX(mutex) - RF_DECLARE_COND(cond) - int flag; - RF_MCPair_t *next; -}; -#define RF_WAIT_MCPAIR(_mcp) \ - RF_LTSLEEP(&((_mcp)->cond), PRIBIO, "mcpair", 0, &((_mcp)->mutex)) - -int rf_ConfigureMCPair(RF_ShutdownList_t ** listp); -RF_MCPair_t *rf_AllocMCPair(void); -void rf_FreeMCPair(RF_MCPair_t * t); -void rf_MCPairWakeupFunc(RF_MCPair_t * t); - -#endif /* !_RF__RF_MCPAIR_H_ */ diff --git a/sys/dev/raidframe/rf_memchunk.c b/sys/dev/raidframe/rf_memchunk.c deleted file mode 100644 index b6e8bd9..0000000 --- a/sys/dev/raidframe/rf_memchunk.c +++ /dev/null @@ -1,213 +0,0 @@ -/* $NetBSD: rf_memchunk.c,v 1.4 1999/08/13 03:41:56 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/********************************************************************************* - * rf_memchunk.c - * - * experimental code. I've found that the malloc and free calls in the DAG - * creation code are very expensive. Since for any given workload the DAGs - * created for different accesses are likely to be similar to each other, the - * amount of memory used for any given DAG data structure is likely to be one - * of a small number of values. For example, in UNIX, all reads and writes will - * be less than 8k and will not span stripe unit boundaries. Thus in the absence - * of failure, the only DAGs that will ever get created are single-node reads - * and single-stripe-unit atomic read-modify-writes. So, I'm very likely to - * be continually asking for chunks of memory equal to the sizes of these two - * DAGs. - * - * This leads to the idea of holding on to these chunks of memory when the DAG is - * freed and then, when a new DAG is created, trying to find such a chunk before - * calling malloc. - * - * the "chunk list" is a list of lists. Each header node contains a size value - * and a pointer to a list of chunk descriptors, each of which holds a pointer - * to a chunk of memory of the indicated size. - * - * There is currently no way to purge memory out of the chunk list. My - * initial thought on this is to have a low-priority thread that wakes up every - * 1 or 2 seconds, purges all the chunks with low reuse counts, and sets all - * the reuse counts to zero. - * - * This whole idea may be bad, since malloc may be able to do this more efficiently. - * It's worth a try, though, and it can be turned off by setting useMemChunks to 0. - * - ********************************************************************************/ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_memchunk.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_options.h> -#include <dev/raidframe/rf_shutdown.h> - -typedef struct RF_ChunkHdr_s RF_ChunkHdr_t; -struct RF_ChunkHdr_s { - int size; - RF_ChunkDesc_t *list; - RF_ChunkHdr_t *next; -}; - -static RF_ChunkHdr_t *chunklist, *chunk_hdr_free_list; -static RF_ChunkDesc_t *chunk_desc_free_list; -RF_DECLARE_STATIC_MUTEX(chunkmutex) - static void rf_ShutdownMemChunk(void *); - static RF_ChunkDesc_t *NewMemChunk(int, char *); - - - static void rf_ShutdownMemChunk(ignored) - void *ignored; -{ - RF_ChunkDesc_t *pt, *p; - RF_ChunkHdr_t *hdr, *ht; - - if (rf_memChunkDebug) - printf("Chunklist:\n"); - for (hdr = chunklist; hdr;) { - for (p = hdr->list; p;) { - if (rf_memChunkDebug) - printf("Size %d reuse count %d\n", p->size, p->reuse_count); - pt = p; - p = p->next; - RF_Free(pt->buf, pt->size); - RF_Free(pt, sizeof(*pt)); - } - ht = hdr; - hdr = hdr->next; - RF_Free(ht, sizeof(*ht)); - } - - rf_mutex_destroy(&chunkmutex); -} - -int -rf_ConfigureMemChunk(listp) - RF_ShutdownList_t **listp; -{ - int rc; - - chunklist = NULL; - chunk_hdr_free_list = NULL; - chunk_desc_free_list = NULL; - rc = rf_mutex_init(&chunkmutex, __FUNCTION__); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - } - rc = rf_ShutdownCreate(listp, rf_ShutdownMemChunk, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_mutex_destroy(&chunkmutex); - } - return (rc); -} -/* called to get a chunk descriptor for a newly-allocated chunk of memory - * MUTEX MUST BE LOCKED - * - * free list is not currently used - */ -static RF_ChunkDesc_t * -NewMemChunk(size, buf) - int size; - char *buf; -{ - RF_ChunkDesc_t *p; - - if (chunk_desc_free_list) { - p = chunk_desc_free_list; - chunk_desc_free_list = p->next; - } else - RF_Malloc(p, sizeof(RF_ChunkDesc_t), (RF_ChunkDesc_t *)); - p->size = size; - p->buf = buf; - p->next = NULL; - p->reuse_count = 0; - return (p); -} -/* looks for a chunk of memory of acceptable size. If none, allocates one and returns - * a chunk descriptor for it, but does not install anything in the list. This is done - * when the chunk is released. - */ -RF_ChunkDesc_t * -rf_GetMemChunk(size) - int size; -{ - RF_ChunkHdr_t *hdr = chunklist; - RF_ChunkDesc_t *p = NULL; - char *buf; - - RF_LOCK_MUTEX(chunkmutex); - for (hdr = chunklist; hdr; hdr = hdr->next) - if (hdr->size >= size) { - p = hdr->list; - if (p) { - hdr->list = p->next; - p->next = NULL; - p->reuse_count++; - } - break; - } - if (!p) { - RF_Malloc(buf, size, (char *)); - p = NewMemChunk(size, buf); - } - RF_UNLOCK_MUTEX(chunkmutex); - (void) bzero(p->buf, size); - return (p); -} - -void -rf_ReleaseMemChunk(chunk) - RF_ChunkDesc_t *chunk; -{ - RF_ChunkHdr_t *hdr, *ht = NULL, *new; - - RF_LOCK_MUTEX(chunkmutex); - for (hdr = chunklist; hdr && hdr->size < chunk->size; ht = hdr, hdr = hdr->next); - if (hdr && hdr->size == chunk->size) { - chunk->next = hdr->list; - hdr->list = chunk; - } else { - RF_Malloc(new, sizeof(RF_ChunkHdr_t), (RF_ChunkHdr_t *)); - new->size = chunk->size; - new->list = chunk; - chunk->next = NULL; - if (ht) { - new->next = ht->next; - ht->next = new; - } else { - new->next = hdr; - chunklist = new; - } - } - RF_UNLOCK_MUTEX(chunkmutex); -} diff --git a/sys/dev/raidframe/rf_memchunk.h b/sys/dev/raidframe/rf_memchunk.h deleted file mode 100644 index 5806d20..0000000 --- a/sys/dev/raidframe/rf_memchunk.h +++ /dev/null @@ -1,48 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_memchunk.h,v 1.3 1999/02/05 00:06:13 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* header file for rf_memchunk.c. See comments there */ - -#ifndef _RF__RF_MEMCHUNK_H_ -#define _RF__RF_MEMCHUNK_H_ - -#include <dev/raidframe/rf_types.h> - -struct RF_ChunkDesc_s { - int size; - int reuse_count; - char *buf; - RF_ChunkDesc_t *next; -}; - -int rf_ConfigureMemChunk(RF_ShutdownList_t ** listp); -RF_ChunkDesc_t *rf_GetMemChunk(int size); -void rf_ReleaseMemChunk(RF_ChunkDesc_t * chunk); - -#endif /* !_RF__RF_MEMCHUNK_H_ */ diff --git a/sys/dev/raidframe/rf_nwayxor.c b/sys/dev/raidframe/rf_nwayxor.c deleted file mode 100644 index 170db6a..0000000 --- a/sys/dev/raidframe/rf_nwayxor.c +++ /dev/null @@ -1,451 +0,0 @@ -/* $NetBSD: rf_nwayxor.c,v 1.4 2000/03/30 12:45:41 augustss Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, Daniel Stodolsky - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/************************************************************ - * - * nwayxor.c -- code to do N-way xors for reconstruction - * - * nWayXorN xors N input buffers into the destination buffer. - * adapted from danner's longword_bxor code. - * - ************************************************************/ - -#include <dev/raidframe/rf_nwayxor.h> -#include <dev/raidframe/rf_shutdown.h> - -static int callcount[10]; -static void rf_ShutdownNWayXor(void *); - -static void -rf_ShutdownNWayXor(ignored) - void *ignored; -{ - int i; - - if (rf_showXorCallCounts == 0) - return; - printf("Call counts for n-way xor routines: "); - for (i = 0; i < 10; i++) - printf("%d ", callcount[i]); - printf("\n"); -} - -int -rf_ConfigureNWayXor(listp) - RF_ShutdownList_t **listp; -{ - int i, rc; - - for (i = 0; i < 10; i++) - callcount[i] = 0; - rc = rf_ShutdownCreate(listp, rf_ShutdownNWayXor, NULL); - return (rc); -} - -void -rf_nWayXor1(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; -{ - unsigned long *src = (unsigned long *) src_rbs[0]->buffer; - unsigned long *dest = (unsigned long *) dest_rb->buffer; - unsigned long *end = src + len; - unsigned long d0, d1, d2, d3, s0, s1, s2, s3; - - callcount[1]++; - while (len >= 4) { - d0 = dest[0]; - d1 = dest[1]; - d2 = dest[2]; - d3 = dest[3]; - s0 = src[0]; - s1 = src[1]; - s2 = src[2]; - s3 = src[3]; - dest[0] = d0 ^ s0; - dest[1] = d1 ^ s1; - dest[2] = d2 ^ s2; - dest[3] = d3 ^ s3; - src += 4; - dest += 4; - len -= 4; - } - while (src < end) { - *dest++ ^= *src++; - } -} - -void -rf_nWayXor2(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; -{ - unsigned long *dst = (unsigned long *) dest_rb->buffer; - unsigned long *a = dst; - unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - unsigned long a0, a1, a2, a3, b0, b1, b2, b3; - - callcount[2]++; - /* align dest to cache line */ - while ((((unsigned long) dst) & 0x1f)) { - *dst++ = *a++ ^ *b++ ^ *c++; - len--; - } - while (len > 4) { - a0 = a[0]; - len -= 4; - - a1 = a[1]; - a2 = a[2]; - - a3 = a[3]; - a += 4; - - b0 = b[0]; - b1 = b[1]; - - b2 = b[2]; - b3 = b[3]; - /* start dual issue */ - a0 ^= b0; - b0 = c[0]; - - b += 4; - a1 ^= b1; - - a2 ^= b2; - a3 ^= b3; - - b1 = c[1]; - a0 ^= b0; - - b2 = c[2]; - a1 ^= b1; - - b3 = c[3]; - a2 ^= b2; - - dst[0] = a0; - a3 ^= b3; - dst[1] = a1; - c += 4; - dst[2] = a2; - dst[3] = a3; - dst += 4; - } - while (len) { - *dst++ = *a++ ^ *b++ ^ *c++; - len--; - } -} -/* note that first arg is not incremented but 2nd arg is */ -#define LOAD_FIRST(_dst,_b) \ - a0 = _dst[0]; len -= 4; \ - a1 = _dst[1]; \ - a2 = _dst[2]; \ - a3 = _dst[3]; \ - b0 = _b[0]; \ - b1 = _b[1]; \ - b2 = _b[2]; \ - b3 = _b[3]; _b += 4; - -/* note: arg is incremented */ -#define XOR_AND_LOAD_NEXT(_n) \ - a0 ^= b0; b0 = _n[0]; \ - a1 ^= b1; b1 = _n[1]; \ - a2 ^= b2; b2 = _n[2]; \ - a3 ^= b3; b3 = _n[3]; \ - _n += 4; - -/* arg is incremented */ -#define XOR_AND_STORE(_dst) \ - a0 ^= b0; _dst[0] = a0; \ - a1 ^= b1; _dst[1] = a1; \ - a2 ^= b2; _dst[2] = a2; \ - a3 ^= b3; _dst[3] = a3; \ - _dst += 4; - - -void -rf_nWayXor3(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; -{ - unsigned long *dst = (unsigned long *) dest_rb->buffer; - unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - unsigned long *d = (unsigned long *) src_rbs[2]->buffer; - unsigned long a0, a1, a2, a3, b0, b1, b2, b3; - - callcount[3]++; - /* align dest to cache line */ - while ((((unsigned long) dst) & 0x1f)) { - *dst++ ^= *b++ ^ *c++ ^ *d++; - len--; - } - while (len > 4) { - LOAD_FIRST(dst, b); - XOR_AND_LOAD_NEXT(c); - XOR_AND_LOAD_NEXT(d); - XOR_AND_STORE(dst); - } - while (len) { - *dst++ ^= *b++ ^ *c++ ^ *d++; - len--; - } -} - -void -rf_nWayXor4(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; -{ - unsigned long *dst = (unsigned long *) dest_rb->buffer; - unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - unsigned long *d = (unsigned long *) src_rbs[2]->buffer; - unsigned long *e = (unsigned long *) src_rbs[3]->buffer; - unsigned long a0, a1, a2, a3, b0, b1, b2, b3; - - callcount[4]++; - /* align dest to cache line */ - while ((((unsigned long) dst) & 0x1f)) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++; - len--; - } - while (len > 4) { - LOAD_FIRST(dst, b); - XOR_AND_LOAD_NEXT(c); - XOR_AND_LOAD_NEXT(d); - XOR_AND_LOAD_NEXT(e); - XOR_AND_STORE(dst); - } - while (len) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++; - len--; - } -} - -void -rf_nWayXor5(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; -{ - unsigned long *dst = (unsigned long *) dest_rb->buffer; - unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - unsigned long *d = (unsigned long *) src_rbs[2]->buffer; - unsigned long *e = (unsigned long *) src_rbs[3]->buffer; - unsigned long *f = (unsigned long *) src_rbs[4]->buffer; - unsigned long a0, a1, a2, a3, b0, b1, b2, b3; - - callcount[5]++; - /* align dest to cache line */ - while ((((unsigned long) dst) & 0x1f)) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++; - len--; - } - while (len > 4) { - LOAD_FIRST(dst, b); - XOR_AND_LOAD_NEXT(c); - XOR_AND_LOAD_NEXT(d); - XOR_AND_LOAD_NEXT(e); - XOR_AND_LOAD_NEXT(f); - XOR_AND_STORE(dst); - } - while (len) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++; - len--; - } -} - -void -rf_nWayXor6(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; -{ - unsigned long *dst = (unsigned long *) dest_rb->buffer; - unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - unsigned long *d = (unsigned long *) src_rbs[2]->buffer; - unsigned long *e = (unsigned long *) src_rbs[3]->buffer; - unsigned long *f = (unsigned long *) src_rbs[4]->buffer; - unsigned long *g = (unsigned long *) src_rbs[5]->buffer; - unsigned long a0, a1, a2, a3, b0, b1, b2, b3; - - callcount[6]++; - /* align dest to cache line */ - while ((((unsigned long) dst) & 0x1f)) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++; - len--; - } - while (len > 4) { - LOAD_FIRST(dst, b); - XOR_AND_LOAD_NEXT(c); - XOR_AND_LOAD_NEXT(d); - XOR_AND_LOAD_NEXT(e); - XOR_AND_LOAD_NEXT(f); - XOR_AND_LOAD_NEXT(g); - XOR_AND_STORE(dst); - } - while (len) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++; - len--; - } -} - -void -rf_nWayXor7(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; -{ - unsigned long *dst = (unsigned long *) dest_rb->buffer; - unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - unsigned long *d = (unsigned long *) src_rbs[2]->buffer; - unsigned long *e = (unsigned long *) src_rbs[3]->buffer; - unsigned long *f = (unsigned long *) src_rbs[4]->buffer; - unsigned long *g = (unsigned long *) src_rbs[5]->buffer; - unsigned long *h = (unsigned long *) src_rbs[6]->buffer; - unsigned long a0, a1, a2, a3, b0, b1, b2, b3; - - callcount[7]++; - /* align dest to cache line */ - while ((((unsigned long) dst) & 0x1f)) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++; - len--; - } - while (len > 4) { - LOAD_FIRST(dst, b); - XOR_AND_LOAD_NEXT(c); - XOR_AND_LOAD_NEXT(d); - XOR_AND_LOAD_NEXT(e); - XOR_AND_LOAD_NEXT(f); - XOR_AND_LOAD_NEXT(g); - XOR_AND_LOAD_NEXT(h); - XOR_AND_STORE(dst); - } - while (len) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++; - len--; - } -} - -void -rf_nWayXor8(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; -{ - unsigned long *dst = (unsigned long *) dest_rb->buffer; - unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - unsigned long *d = (unsigned long *) src_rbs[2]->buffer; - unsigned long *e = (unsigned long *) src_rbs[3]->buffer; - unsigned long *f = (unsigned long *) src_rbs[4]->buffer; - unsigned long *g = (unsigned long *) src_rbs[5]->buffer; - unsigned long *h = (unsigned long *) src_rbs[6]->buffer; - unsigned long *i = (unsigned long *) src_rbs[7]->buffer; - unsigned long a0, a1, a2, a3, b0, b1, b2, b3; - - callcount[8]++; - /* align dest to cache line */ - while ((((unsigned long) dst) & 0x1f)) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++; - len--; - } - while (len > 4) { - LOAD_FIRST(dst, b); - XOR_AND_LOAD_NEXT(c); - XOR_AND_LOAD_NEXT(d); - XOR_AND_LOAD_NEXT(e); - XOR_AND_LOAD_NEXT(f); - XOR_AND_LOAD_NEXT(g); - XOR_AND_LOAD_NEXT(h); - XOR_AND_LOAD_NEXT(i); - XOR_AND_STORE(dst); - } - while (len) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++; - len--; - } -} - - -void -rf_nWayXor9(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; -{ - unsigned long *dst = (unsigned long *) dest_rb->buffer; - unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - unsigned long *d = (unsigned long *) src_rbs[2]->buffer; - unsigned long *e = (unsigned long *) src_rbs[3]->buffer; - unsigned long *f = (unsigned long *) src_rbs[4]->buffer; - unsigned long *g = (unsigned long *) src_rbs[5]->buffer; - unsigned long *h = (unsigned long *) src_rbs[6]->buffer; - unsigned long *i = (unsigned long *) src_rbs[7]->buffer; - unsigned long *j = (unsigned long *) src_rbs[8]->buffer; - unsigned long a0, a1, a2, a3, b0, b1, b2, b3; - - callcount[9]++; - /* align dest to cache line */ - while ((((unsigned long) dst) & 0x1f)) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++; - len--; - } - while (len > 4) { - LOAD_FIRST(dst, b); - XOR_AND_LOAD_NEXT(c); - XOR_AND_LOAD_NEXT(d); - XOR_AND_LOAD_NEXT(e); - XOR_AND_LOAD_NEXT(f); - XOR_AND_LOAD_NEXT(g); - XOR_AND_LOAD_NEXT(h); - XOR_AND_LOAD_NEXT(i); - XOR_AND_LOAD_NEXT(j); - XOR_AND_STORE(dst); - } - while (len) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++; - len--; - } -} diff --git a/sys/dev/raidframe/rf_nwayxor.h b/sys/dev/raidframe/rf_nwayxor.h deleted file mode 100644 index 1460d9b..0000000 --- a/sys/dev/raidframe/rf_nwayxor.h +++ /dev/null @@ -1,54 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_nwayxor.h,v 1.3 1999/02/05 00:06:13 oster Exp $ */ -/* - * rf_nwayxor.h - */ -/* - * Copyright (c) 1996 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ -/* - * rf_nwayxor.h -- types and prototypes for nwayxor module - */ - -#ifndef _RF__RF_NWAYXOR_H_ -#define _RF__RF_NWAYXOR_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_reconstruct.h> - -int rf_ConfigureNWayXor(RF_ShutdownList_t ** listp); -void rf_nWayXor1(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); -void rf_nWayXor2(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); -void rf_nWayXor3(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); -void rf_nWayXor4(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); -void rf_nWayXor5(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); -void rf_nWayXor6(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); -void rf_nWayXor7(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); -void rf_nWayXor8(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); -void rf_nWayXor9(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); - -#endif /* !_RF__RF_NWAYXOR_H_ */ diff --git a/sys/dev/raidframe/rf_options.c b/sys/dev/raidframe/rf_options.c deleted file mode 100644 index 107c509..0000000 --- a/sys/dev/raidframe/rf_options.c +++ /dev/null @@ -1,78 +0,0 @@ -/* $NetBSD: rf_options.c,v 1.3 1999/02/05 00:06:13 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * rf_options.c - */ -/* - * Copyright (c) 1996 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - - -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_options.h> - -#ifdef RF_DBG_OPTION -#undef RF_DBG_OPTION -#endif /* RF_DBG_OPTION */ - -#ifdef __STDC__ -#define RF_DBG_OPTION(_option_,_defval_) long rf_##_option_ = _defval_; -#else /* __STDC__ */ -#define RF_DBG_OPTION(_option_,_defval_) long rf_/**/_option_ = _defval_; -#endif /* __STDC__ */ - -#include <dev/raidframe/rf_optnames.h> - -#undef RF_DBG_OPTION - -#ifdef __STDC__ -#define RF_DBG_OPTION(_option_,_defval_) { RF_STRING(_option_), &rf_##_option_ }, -#else /* __STDC__ */ -#define RF_DBG_OPTION(_option_,_defval_) { RF_STRING(_option_), &rf_/**/_option_ }, -#endif /* __STDC__ */ - -RF_DebugName_t rf_debugNames[] = { -#include <dev/raidframe/rf_optnames.h> - {NULL, NULL} -}; -#undef RF_DBG_OPTION - -#ifdef __STDC__ -#define RF_DBG_OPTION(_option_,_defval_) rf_##_option_ = _defval_ ; -#else /* __STDC__ */ -#define RF_DBG_OPTION(_option_,_defval_) rf_/**/_option_ = _defval_ ; -#endif /* __STDC__ */ - -void -rf_ResetDebugOptions() -{ -#include <dev/raidframe/rf_optnames.h> -} diff --git a/sys/dev/raidframe/rf_options.h b/sys/dev/raidframe/rf_options.h deleted file mode 100644 index 22b6341..0000000 --- a/sys/dev/raidframe/rf_options.h +++ /dev/null @@ -1,58 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_options.h,v 1.3 1999/02/05 00:06:13 oster Exp $ */ -/* - * rf_options.h - */ -/* - * Copyright (c) 1996 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_OPTIONS_H_ -#define _RF__RF_OPTIONS_H_ - -#define RF_DEFAULT_LOCK_TABLE_SIZE 256 - -typedef struct RF_DebugNames_s { - char *name; - long *ptr; -} RF_DebugName_t; - -extern RF_DebugName_t rf_debugNames[]; - -#ifdef RF_DBG_OPTION -#undef RF_DBG_OPTION -#endif /* RF_DBG_OPTION */ - -#ifdef __STDC__ -#define RF_DBG_OPTION(_option_,_defval_) extern long rf_##_option_; -#else /* __STDC__ */ -#define RF_DBG_OPTION(_option_,_defval_) extern long rf_/**/_option_; -#endif /* __STDC__ */ -#include <dev/raidframe/rf_optnames.h> - -void rf_ResetDebugOptions(void); - -#endif /* !_RF__RF_OPTIONS_H_ */ diff --git a/sys/dev/raidframe/rf_optnames.h b/sys/dev/raidframe/rf_optnames.h deleted file mode 100644 index f04fbc1..0000000 --- a/sys/dev/raidframe/rf_optnames.h +++ /dev/null @@ -1,105 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_optnames.h,v 1.6 1999/12/07 02:54:08 oster Exp $ */ -/* - * rf_optnames.h - */ -/* - * Copyright (c) 1996 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * Don't protect against multiple inclusion here- we actually want this. - */ - -RF_DBG_OPTION(accessDebug, 0) -RF_DBG_OPTION(accessTraceBufSize, 0) -RF_DBG_OPTION(cscanDebug, 0) /* debug CSCAN sorting */ -RF_DBG_OPTION(dagDebug, 0) -RF_DBG_OPTION(debugPrintUseBuffer, 0) -RF_DBG_OPTION(degDagDebug, 0) -RF_DBG_OPTION(disableAsyncAccs, 0) -RF_DBG_OPTION(diskDebug, 0) -RF_DBG_OPTION(enableAtomicRMW, 0) /* this debug var enables locking of - * the disk arm during small-write - * operations. Setting this variable - * to anything other than 0 will - * result in deadlock. (wvcii) */ -RF_DBG_OPTION(engineDebug, 0) -RF_DBG_OPTION(fifoDebug, 0) /* debug fifo queueing */ -RF_DBG_OPTION(floatingRbufDebug, 0) -RF_DBG_OPTION(forceHeadSepLimit, -1) -RF_DBG_OPTION(forceNumFloatingReconBufs, -1) /* wire down number of - * extra recon buffers - * to use */ -RF_DBG_OPTION(keepAccTotals, 0) /* turn on keep_acc_totals */ -RF_DBG_OPTION(lockTableSize, RF_DEFAULT_LOCK_TABLE_SIZE) -RF_DBG_OPTION(mapDebug, 0) -RF_DBG_OPTION(maxNumTraces, -1) - -RF_DBG_OPTION(memChunkDebug, 0) -RF_DBG_OPTION(memDebug, 0) -RF_DBG_OPTION(memDebugAddress, 0) -RF_DBG_OPTION(numBufsToAccumulate, 1) /* number of buffers to - * accumulate before doing XOR */ -RF_DBG_OPTION(prReconSched, 0) -RF_DBG_OPTION(printDAGsDebug, 0) -RF_DBG_OPTION(printStatesDebug, 0) -RF_DBG_OPTION(protectedSectors, 64L) /* # of sectors at start of - * disk to exclude from RAID - * address space */ -RF_DBG_OPTION(pssDebug, 0) -RF_DBG_OPTION(queueDebug, 0) -RF_DBG_OPTION(quiesceDebug, 0) -RF_DBG_OPTION(raidSectorOffset, 0) /* added to all incoming sectors to - * debug alignment problems */ -RF_DBG_OPTION(reconDebug, 0) -RF_DBG_OPTION(reconbufferDebug, 0) -RF_DBG_OPTION(scanDebug, 0) /* debug SCAN sorting */ -RF_DBG_OPTION(showXorCallCounts, 0) /* show n-way Xor call counts */ -RF_DBG_OPTION(shutdownDebug, 0) /* show shutdown calls */ -RF_DBG_OPTION(sizePercentage, 100) -RF_DBG_OPTION(sstfDebug, 0) /* turn on debugging info for sstf queueing */ -RF_DBG_OPTION(stripeLockDebug, 0) -RF_DBG_OPTION(suppressLocksAndLargeWrites, 0) -RF_DBG_OPTION(suppressTraceDelays, 0) -RF_DBG_OPTION(useMemChunks, 1) -RF_DBG_OPTION(validateDAGDebug, 0) -RF_DBG_OPTION(validateVisitedDebug, 1) /* XXX turn to zero by - * default? */ -RF_DBG_OPTION(verifyParityDebug, 0) -RF_DBG_OPTION(debugKernelAccess, 0) /* DoAccessKernel debugging */ - -#if RF_INCLUDE_PARITYLOGGING > 0 -RF_DBG_OPTION(forceParityLogReint, 0) -RF_DBG_OPTION(numParityRegions, 0) /* number of regions in the array */ -RF_DBG_OPTION(numReintegrationThreads, 1) -RF_DBG_OPTION(parityLogDebug, 0) /* if nonzero, enables debugging of - * parity logging */ -RF_DBG_OPTION(totalInCoreLogCapacity, 1024 * 1024) /* target bytes - * available for in-core - * logs */ -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ - diff --git a/sys/dev/raidframe/rf_paritylog.c b/sys/dev/raidframe/rf_paritylog.c deleted file mode 100644 index 87c33e6..0000000 --- a/sys/dev/raidframe/rf_paritylog.c +++ /dev/null @@ -1,871 +0,0 @@ -/* $NetBSD: rf_paritylog.c,v 1.5 2000/01/07 03:41:01 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* Code for manipulating in-core parity logs - * - */ - -#include <dev/raidframe/rf_archs.h> - -#if RF_INCLUDE_PARITYLOGGING > 0 - -/* - * Append-only log for recording parity "update" and "overwrite" records - */ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_mcpair.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_desc.h> -#include <dev/raidframe/rf_layout.h> -#include <dev/raidframe/rf_diskqueue.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_paritylog.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_map.h> -#include <dev/raidframe/rf_paritylogging.h> -#include <dev/raidframe/rf_paritylogDiskMgr.h> - -static RF_CommonLogData_t * -AllocParityLogCommonData(RF_Raid_t * raidPtr) -{ - RF_CommonLogData_t *common = NULL; - int rc; - - /* Return a struct for holding common parity log information from the - * free list (rf_parityLogDiskQueue.freeCommonList). If the free list - * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */ - - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - if (raidPtr->parityLogDiskQueue.freeCommonList) { - common = raidPtr->parityLogDiskQueue.freeCommonList; - raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next; - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - } else { - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *)); - rc = rf_mutex_init(&common->mutex, __FUNCTION__); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - RF_Free(common, sizeof(RF_CommonLogData_t)); - common = NULL; - } - } - common->next = NULL; - return (common); -} - -static void -FreeParityLogCommonData(RF_CommonLogData_t * common) -{ - RF_Raid_t *raidPtr; - - /* Insert a single struct for holding parity log information (data) - * into the free list (rf_parityLogDiskQueue.freeCommonList). - * NON-BLOCKING */ - - raidPtr = common->raidPtr; - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - common->next = raidPtr->parityLogDiskQueue.freeCommonList; - raidPtr->parityLogDiskQueue.freeCommonList = common; - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); -} - -static RF_ParityLogData_t * -AllocParityLogData(RF_Raid_t * raidPtr) -{ - RF_ParityLogData_t *data = NULL; - - /* Return a struct for holding parity log information from the free - * list (rf_parityLogDiskQueue.freeList). If the free list is empty, - * call RF_Malloc to create a new structure. NON-BLOCKING */ - - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - if (raidPtr->parityLogDiskQueue.freeDataList) { - data = raidPtr->parityLogDiskQueue.freeDataList; - raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next; - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - } else { - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *)); - } - data->next = NULL; - data->prev = NULL; - return (data); -} - - -static void -FreeParityLogData(RF_ParityLogData_t * data) -{ - RF_ParityLogData_t *nextItem; - RF_Raid_t *raidPtr; - - /* Insert a linked list of structs for holding parity log information - * (data) into the free list (parityLogDiskQueue.freeList). - * NON-BLOCKING */ - - raidPtr = data->common->raidPtr; - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - while (data) { - nextItem = data->next; - data->next = raidPtr->parityLogDiskQueue.freeDataList; - raidPtr->parityLogDiskQueue.freeDataList = data; - data = nextItem; - } - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); -} - - -static void -EnqueueParityLogData( - RF_ParityLogData_t * data, - RF_ParityLogData_t ** head, - RF_ParityLogData_t ** tail) -{ - RF_Raid_t *raidPtr; - - /* Insert an in-core parity log (*data) into the head of a disk queue - * (*head, *tail). NON-BLOCKING */ - - raidPtr = data->common->raidPtr; - if (rf_parityLogDebug) - printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); - RF_ASSERT(data->prev == NULL); - RF_ASSERT(data->next == NULL); - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - if (*head) { - /* insert into head of queue */ - RF_ASSERT((*head)->prev == NULL); - RF_ASSERT((*tail)->next == NULL); - data->next = *head; - (*head)->prev = data; - *head = data; - } else { - /* insert into empty list */ - RF_ASSERT(*head == NULL); - RF_ASSERT(*tail == NULL); - *head = data; - *tail = data; - } - RF_ASSERT((*head)->prev == NULL); - RF_ASSERT((*tail)->next == NULL); - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); -} - -static RF_ParityLogData_t * -DequeueParityLogData( - RF_Raid_t * raidPtr, - RF_ParityLogData_t ** head, - RF_ParityLogData_t ** tail, - int ignoreLocks) -{ - RF_ParityLogData_t *data; - - /* Remove and return an in-core parity log from the tail of a disk - * queue (*head, *tail). NON-BLOCKING */ - - /* remove from tail, preserving FIFO order */ - if (!ignoreLocks) - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - data = *tail; - if (data) { - if (*head == *tail) { - /* removing last item from queue */ - *head = NULL; - *tail = NULL; - } else { - *tail = (*tail)->prev; - (*tail)->next = NULL; - RF_ASSERT((*head)->prev == NULL); - RF_ASSERT((*tail)->next == NULL); - } - data->next = NULL; - data->prev = NULL; - if (rf_parityLogDebug) - printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); - } - if (*head) { - RF_ASSERT((*head)->prev == NULL); - RF_ASSERT((*tail)->next == NULL); - } - if (!ignoreLocks) - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - return (data); -} - - -static void -RequeueParityLogData( - RF_ParityLogData_t * data, - RF_ParityLogData_t ** head, - RF_ParityLogData_t ** tail) -{ - RF_Raid_t *raidPtr; - - /* Insert an in-core parity log (*data) into the tail of a disk queue - * (*head, *tail). NON-BLOCKING */ - - raidPtr = data->common->raidPtr; - RF_ASSERT(data); - if (rf_parityLogDebug) - printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - if (*tail) { - /* append to tail of list */ - data->prev = *tail; - data->next = NULL; - (*tail)->next = data; - *tail = data; - } else { - /* inserting into an empty list */ - *head = data; - *tail = data; - (*head)->prev = NULL; - (*tail)->next = NULL; - } - RF_ASSERT((*head)->prev == NULL); - RF_ASSERT((*tail)->next == NULL); - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); -} - -RF_ParityLogData_t * -rf_CreateParityLogData( - RF_ParityRecordType_t operation, - RF_PhysDiskAddr_t * pda, - caddr_t bufPtr, - RF_Raid_t * raidPtr, - int (*wakeFunc) (RF_DagNode_t * node, int status), - void *wakeArg, - RF_AccTraceEntry_t * tracerec, - RF_Etimer_t startTime) -{ - RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL; - RF_CommonLogData_t *common; - RF_PhysDiskAddr_t *diskAddress; - int boundary, offset = 0; - - /* Return an initialized struct of info to be logged. Build one item - * per physical disk address, one item per region. - * - * NON-BLOCKING */ - - diskAddress = pda; - common = AllocParityLogCommonData(raidPtr); - RF_ASSERT(common); - - common->operation = operation; - common->bufPtr = bufPtr; - common->raidPtr = raidPtr; - common->wakeFunc = wakeFunc; - common->wakeArg = wakeArg; - common->tracerec = tracerec; - common->startTime = startTime; - common->cnt = 0; - - if (rf_parityLogDebug) - printf("[entering CreateParityLogData]\n"); - while (diskAddress) { - common->cnt++; - data = AllocParityLogData(raidPtr); - RF_ASSERT(data); - data->common = common; - data->next = NULL; - data->prev = NULL; - data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector); - if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) { - /* disk address does not cross a region boundary */ - data->diskAddress = *diskAddress; - data->bufOffset = offset; - offset = offset + diskAddress->numSector; - EnqueueParityLogData(data, &resultHead, &resultTail); - /* adjust disk address */ - diskAddress = diskAddress->next; - } else { - /* disk address crosses a region boundary */ - /* find address where region is crossed */ - boundary = 0; - while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary)) - boundary++; - - /* enter data before the boundary */ - data->diskAddress = *diskAddress; - data->diskAddress.numSector = boundary; - data->bufOffset = offset; - offset += boundary; - EnqueueParityLogData(data, &resultHead, &resultTail); - /* adjust disk address */ - diskAddress->startSector += boundary; - diskAddress->numSector -= boundary; - } - } - if (rf_parityLogDebug) - printf("[leaving CreateParityLogData]\n"); - return (resultHead); -} - - -RF_ParityLogData_t * -rf_SearchAndDequeueParityLogData( - RF_Raid_t * raidPtr, - int regionID, - RF_ParityLogData_t ** head, - RF_ParityLogData_t ** tail, - int ignoreLocks) -{ - RF_ParityLogData_t *w; - - /* Remove and return an in-core parity log from a specified region - * (regionID). If a matching log is not found, return NULL. - * - * NON-BLOCKING. */ - - /* walk backward through a list, looking for an entry with a matching - * region ID */ - if (!ignoreLocks) - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - w = (*tail); - while (w) { - if (w->regionID == regionID) { - /* remove an element from the list */ - if (w == *tail) { - if (*head == *tail) { - /* removing only element in the list */ - *head = NULL; - *tail = NULL; - } else { - /* removing last item in the list */ - *tail = (*tail)->prev; - (*tail)->next = NULL; - RF_ASSERT((*head)->prev == NULL); - RF_ASSERT((*tail)->next == NULL); - } - } else { - if (w == *head) { - /* removing first item in the list */ - *head = (*head)->next; - (*head)->prev = NULL; - RF_ASSERT((*head)->prev == NULL); - RF_ASSERT((*tail)->next == NULL); - } else { - /* removing an item from the middle of - * the list */ - w->prev->next = w->next; - w->next->prev = w->prev; - RF_ASSERT((*head)->prev == NULL); - RF_ASSERT((*tail)->next == NULL); - } - } - w->prev = NULL; - w->next = NULL; - if (rf_parityLogDebug) - printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector); - return (w); - } else - w = w->prev; - } - if (!ignoreLocks) - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - return (NULL); -} - -static RF_ParityLogData_t * -DequeueMatchingLogData( - RF_Raid_t * raidPtr, - RF_ParityLogData_t ** head, - RF_ParityLogData_t ** tail) -{ - RF_ParityLogData_t *logDataList, *logData; - int regionID; - - /* Remove and return an in-core parity log from the tail of a disk - * queue (*head, *tail). Then remove all matching (identical - * regionIDs) logData and return as a linked list. - * - * NON-BLOCKING */ - - logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE); - if (logDataList) { - regionID = logDataList->regionID; - logData = logDataList; - logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); - while (logData->next) { - logData = logData->next; - logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); - } - } - return (logDataList); -} - - -static RF_ParityLog_t * -AcquireParityLog( - RF_ParityLogData_t * logData, - int finish) -{ - RF_ParityLog_t *log = NULL; - RF_Raid_t *raidPtr; - - /* Grab a log buffer from the pool and return it. If no buffers are - * available, return NULL. NON-BLOCKING */ - raidPtr = logData->common->raidPtr; - RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); - if (raidPtr->parityLogPool.parityLogs) { - log = raidPtr->parityLogPool.parityLogs; - raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next; - log->regionID = logData->regionID; - log->numRecords = 0; - log->next = NULL; - raidPtr->logsInUse++; - RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); - } else { - /* no logs available, so place ourselves on the queue of work - * waiting on log buffers this is done while - * parityLogPool.mutex is held, to ensure synchronization with - * ReleaseParityLogs. */ - if (rf_parityLogDebug) - printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish); - if (finish) - RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); - else - EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); - } - RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); - return (log); -} - -void -rf_ReleaseParityLogs( - RF_Raid_t * raidPtr, - RF_ParityLog_t * firstLog) -{ - RF_ParityLogData_t *logDataList; - RF_ParityLog_t *log, *lastLog; - int cnt; - - /* Insert a linked list of parity logs (firstLog) to the free list - * (parityLogPool.parityLogPool) - * - * NON-BLOCKING. */ - - RF_ASSERT(firstLog); - - /* Before returning logs to global free list, service all requests - * which are blocked on logs. Holding mutexes for parityLogPool and - * parityLogDiskQueue forces synchronization with AcquireParityLog(). */ - RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); - log = firstLog; - if (firstLog) - firstLog = firstLog->next; - log->numRecords = 0; - log->next = NULL; - while (logDataList && log) { - RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE); - if (rf_parityLogDebug) - printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID); - if (log == NULL) { - log = firstLog; - if (firstLog) { - firstLog = firstLog->next; - log->numRecords = 0; - log->next = NULL; - } - } - RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - if (log) - logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); - } - /* return remaining logs to pool */ - if (log) { - log->next = firstLog; - firstLog = log; - } - if (firstLog) { - lastLog = firstLog; - raidPtr->logsInUse--; - RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); - while (lastLog->next) { - lastLog = lastLog->next; - raidPtr->logsInUse--; - RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); - } - lastLog->next = raidPtr->parityLogPool.parityLogs; - raidPtr->parityLogPool.parityLogs = firstLog; - cnt = 0; - log = raidPtr->parityLogPool.parityLogs; - while (log) { - cnt++; - log = log->next; - } - RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs); - } - RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); -} - -static void -ReintLog( - RF_Raid_t * raidPtr, - int regionID, - RF_ParityLog_t * log) -{ - RF_ASSERT(log); - - /* Insert an in-core parity log (log) into the disk queue of - * reintegration work. Set the flag (reintInProgress) for the - * specified region (regionID) to indicate that reintegration is in - * progress for this region. NON-BLOCKING */ - - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); - raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint - * complete */ - - if (rf_parityLogDebug) - printf("[requesting reintegration of region %d]\n", log->regionID); - /* move record to reintegration queue */ - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - log->next = raidPtr->parityLogDiskQueue.reintQueue; - raidPtr->parityLogDiskQueue.reintQueue = log; - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); -} - -static void -FlushLog( - RF_Raid_t * raidPtr, - RF_ParityLog_t * log) -{ - /* insert a core log (log) into a list of logs - * (parityLogDiskQueue.flushQueue) waiting to be written to disk. - * NON-BLOCKING */ - - RF_ASSERT(log); - RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); - RF_ASSERT(log->next == NULL); - /* move log to flush queue */ - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - log->next = raidPtr->parityLogDiskQueue.flushQueue; - raidPtr->parityLogDiskQueue.flushQueue = log; - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); -} - -static int -DumpParityLogToDisk( - int finish, - RF_ParityLogData_t * logData) -{ - int i, diskCount, regionID = logData->regionID; - RF_ParityLog_t *log; - RF_Raid_t *raidPtr; - - raidPtr = logData->common->raidPtr; - - /* Move a core log to disk. If the log disk is full, initiate - * reintegration. - * - * Return (0) if we can enqueue the dump immediately, otherwise return - * (1) to indicate we are blocked on reintegration and control of the - * thread should be relinquished. - * - * Caller must hold regionInfo[regionID].mutex - * - * NON-BLOCKING */ - - if (rf_parityLogDebug) - printf("[dumping parity log to disk, region %d]\n", regionID); - log = raidPtr->regionInfo[regionID].coreLog; - RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); - RF_ASSERT(log->next == NULL); - - /* if reintegration is in progress, must queue work */ - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); - if (raidPtr->regionInfo[regionID].reintInProgress) { - /* Can not proceed since this region is currently being - * reintegrated. We can not block, so queue remaining work and - * return */ - if (rf_parityLogDebug) - printf("[region %d waiting on reintegration]\n", regionID); - /* XXX not sure about the use of finish - shouldn't this - * always be "Enqueue"? */ - if (finish) - RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); - else - EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); - return (1); /* relenquish control of this thread */ - } - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); - raidPtr->regionInfo[regionID].coreLog = NULL; - if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity) - /* IMPORTANT!! this loop bound assumes region disk holds an - * integral number of core logs */ - { - /* update disk map for this region */ - diskCount = raidPtr->regionInfo[regionID].diskCount; - for (i = 0; i < raidPtr->numSectorsPerLog; i++) { - raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation; - raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr; - } - log->diskOffset = diskCount; - raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog; - FlushLog(raidPtr, log); - } else { - /* no room for log on disk, send it to disk manager and - * request reintegration */ - RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity); - ReintLog(raidPtr, regionID, log); - } - if (rf_parityLogDebug) - printf("[finished dumping parity log to disk, region %d]\n", regionID); - return (0); -} - -int -rf_ParityLogAppend( - RF_ParityLogData_t * logData, - int finish, - RF_ParityLog_t ** incomingLog, - int clearReintFlag) -{ - int regionID, logItem, itemDone; - RF_ParityLogData_t *item; - int punt, done = RF_FALSE; - RF_ParityLog_t *log; - RF_Raid_t *raidPtr; - RF_Etimer_t timer; - int (*wakeFunc) (RF_DagNode_t * node, int status); - void *wakeArg; - - /* Add parity to the appropriate log, one sector at a time. This - * routine is called is called by dag functions ParityLogUpdateFunc - * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING. - * - * Parity to be logged is contained in a linked-list (logData). When - * this routine returns, every sector in the list will be in one of - * three places: 1) entered into the parity log 2) queued, waiting on - * reintegration 3) queued, waiting on a core log - * - * Blocked work is passed to the ParityLoggingDiskManager for completion. - * Later, as conditions which required the block are removed, the work - * reenters this routine with the "finish" parameter set to "RF_TRUE." - * - * NON-BLOCKING */ - - RF_ASSERT(logData != NULL); - raidPtr = logData->common->raidPtr; - /* lock the region for the first item in logData */ - regionID = logData->regionID; - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); - - if (clearReintFlag) { - /* Enable flushing for this region. Holding both locks - * provides a synchronization barrier with DumpParityLogToDisk */ - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE); - raidPtr->regionInfo[regionID].diskCount = 0; - raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now - * enabled */ - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - } - /* process each item in logData */ - while (logData) { - /* remove an item from logData */ - item = logData; - logData = logData->next; - item->next = NULL; - item->prev = NULL; - - if (rf_parityLogDebug) - printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector); - - /* see if we moved to a new region */ - if (regionID != item->regionID) { - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - regionID = item->regionID; - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); - } - punt = RF_FALSE;/* Set to RF_TRUE if work is blocked. This - * can happen in one of two ways: 1) no core - * log (AcquireParityLog) 2) waiting on - * reintegration (DumpParityLogToDisk) If punt - * is RF_TRUE, the dataItem was queued, so - * skip to next item. */ - - /* process item, one sector at a time, until all sectors - * processed or we punt */ - if (item->diskAddress.numSector > 0) - done = RF_FALSE; - else - RF_ASSERT(0); - while (!punt && !done) { - /* verify that a core log exists for this region */ - if (!raidPtr->regionInfo[regionID].coreLog) { - /* Attempt to acquire a parity log. If - * acquisition fails, queue remaining work in - * data item and move to nextItem. */ - if (incomingLog) - if (*incomingLog) { - RF_ASSERT((*incomingLog)->next == NULL); - raidPtr->regionInfo[regionID].coreLog = *incomingLog; - raidPtr->regionInfo[regionID].coreLog->regionID = regionID; - *incomingLog = NULL; - } else - raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); - else - raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); - /* Note: AcquireParityLog either returns a log - * or enqueues currentItem */ - } - if (!raidPtr->regionInfo[regionID].coreLog) - punt = RF_TRUE; /* failed to find a core log */ - else { - RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); - /* verify that the log has room for new - * entries */ - /* if log is full, dump it to disk and grab a - * new log */ - if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) { - /* log is full, dump it to disk */ - if (DumpParityLogToDisk(finish, item)) - punt = RF_TRUE; /* dump unsuccessful, - * blocked on - * reintegration */ - else { - /* dump was successful */ - if (incomingLog) - if (*incomingLog) { - RF_ASSERT((*incomingLog)->next == NULL); - raidPtr->regionInfo[regionID].coreLog = *incomingLog; - raidPtr->regionInfo[regionID].coreLog->regionID = regionID; - *incomingLog = NULL; - } else - raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); - else - raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); - /* if a core log is not - * available, must queue work - * and return */ - if (!raidPtr->regionInfo[regionID].coreLog) - punt = RF_TRUE; /* blocked on log - * availability */ - } - } - } - /* if we didn't punt on this item, attempt to add a - * sector to the core log */ - if (!punt) { - RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); - /* at this point, we have a core log with - * enough room for a sector */ - /* copy a sector into the log */ - log = raidPtr->regionInfo[regionID].coreLog; - RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog); - logItem = log->numRecords++; - log->records[logItem].parityAddr = item->diskAddress; - RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr); - RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity); - log->records[logItem].parityAddr.numSector = 1; - log->records[logItem].operation = item->common->operation; - bcopy((item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (1 << item->common->raidPtr->logBytesPerSector)); - item->diskAddress.numSector--; - item->diskAddress.startSector++; - if (item->diskAddress.numSector == 0) - done = RF_TRUE; - } - } - - if (!punt) { - /* Processed this item completely, decrement count of - * items to be processed. */ - RF_ASSERT(item->diskAddress.numSector == 0); - RF_LOCK_MUTEX(item->common->mutex); - item->common->cnt--; - if (item->common->cnt == 0) - itemDone = RF_TRUE; - else - itemDone = RF_FALSE; - RF_UNLOCK_MUTEX(item->common->mutex); - if (itemDone) { - /* Finished processing all log data for this - * IO Return structs to free list and invoke - * wakeup function. */ - timer = item->common->startTime; /* grab initial value of - * timer */ - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer); - if (rf_parityLogDebug) - printf("[waking process for region %d]\n", item->regionID); - wakeFunc = item->common->wakeFunc; - wakeArg = item->common->wakeArg; - FreeParityLogCommonData(item->common); - FreeParityLogData(item); - (wakeFunc) (wakeArg, 0); - } else - FreeParityLogData(item); - } - } - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - if (rf_parityLogDebug) - printf("[exiting ParityLogAppend]\n"); - return (0); -} - - -void -rf_EnableParityLogging(RF_Raid_t * raidPtr) -{ - int regionID; - - for (regionID = 0; regionID < rf_numParityRegions; regionID++) { - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE; - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - } - if (rf_parityLogDebug) - printf("[parity logging enabled]\n"); -} -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ diff --git a/sys/dev/raidframe/rf_paritylog.h b/sys/dev/raidframe/rf_paritylog.h deleted file mode 100644 index 1f2b80d..0000000 --- a/sys/dev/raidframe/rf_paritylog.h +++ /dev/null @@ -1,181 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_paritylog.h,v 1.3 1999/02/05 00:06:14 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* header file for parity log - * - */ - -#ifndef _RF__RF_PARITYLOG_H_ -#define _RF__RF_PARITYLOG_H_ - -#include <dev/raidframe/rf_types.h> - -#define RF_DEFAULT_NUM_SECTORS_PER_LOG 64 - -typedef int RF_RegionId_t; - -typedef enum RF_ParityRecordType_e { - RF_STOP, - RF_UPDATE, - RF_OVERWRITE -} RF_ParityRecordType_t; - -struct RF_CommonLogData_s { - RF_DECLARE_MUTEX(mutex) /* protects cnt */ - int cnt; /* when 0, time to call wakeFunc */ - RF_Raid_t *raidPtr; -/* int (*wakeFunc)(RF_Buf_t); */ - int (*wakeFunc) (RF_DagNode_t * node, int status); - void *wakeArg; - RF_AccTraceEntry_t *tracerec; - RF_Etimer_t startTime; - caddr_t bufPtr; - RF_ParityRecordType_t operation; - RF_CommonLogData_t *next; -}; - -struct RF_ParityLogData_s { - RF_RegionId_t regionID; /* this struct guaranteed to span a single - * region */ - int bufOffset; /* offset from common->bufPtr */ - RF_PhysDiskAddr_t diskAddress; - RF_CommonLogData_t *common; /* info shared by one or more - * parityLogData structs */ - RF_ParityLogData_t *next; - RF_ParityLogData_t *prev; -}; - -struct RF_ParityLogAppendQueue_s { - RF_DECLARE_MUTEX(mutex) -}; - -struct RF_ParityLogRecord_s { - RF_PhysDiskAddr_t parityAddr; - RF_ParityRecordType_t operation; -}; - -struct RF_ParityLog_s { - RF_RegionId_t regionID; - int numRecords; - int diskOffset; - RF_ParityLogRecord_t *records; - caddr_t bufPtr; - RF_ParityLog_t *next; -}; - -struct RF_ParityLogQueue_s { - RF_DECLARE_MUTEX(mutex) - RF_ParityLog_t *parityLogs; -}; - -struct RF_RegionBufferQueue_s { - RF_DECLARE_MUTEX(mutex) - RF_DECLARE_COND(cond) - int bufferSize; - int totalBuffers; /* size of array 'buffers' */ - int availableBuffers; /* num available 'buffers' */ - int emptyBuffersIndex; /* stick next freed buffer here */ - int availBuffersIndex; /* grab next buffer from here */ - caddr_t *buffers; /* array buffers used to hold parity */ -}; -#define RF_PLOG_CREATED (1<<0)/* thread is created */ -#define RF_PLOG_RUNNING (1<<1)/* thread is running */ -#define RF_PLOG_TERMINATE (1<<2)/* thread is terminated (should exit) */ -#define RF_PLOG_SHUTDOWN (1<<3)/* thread is aware and exiting/exited */ - -struct RF_ParityLogDiskQueue_s { - RF_DECLARE_MUTEX(mutex) /* protects all vars in this struct */ - RF_DECLARE_COND(cond) - int threadState; /* is thread running, should it shutdown (see - * above) */ - RF_ParityLog_t *flushQueue; /* list of parity logs to be flushed - * to log disk */ - RF_ParityLog_t *reintQueue; /* list of parity logs waiting to be - * reintegrated */ - RF_ParityLogData_t *bufHead; /* head of FIFO list of log data, - * waiting on a buffer */ - RF_ParityLogData_t *bufTail; /* tail of FIFO list of log data, - * waiting on a buffer */ - RF_ParityLogData_t *reintHead; /* head of FIFO list of log data, - * waiting on reintegration */ - RF_ParityLogData_t *reintTail; /* tail of FIFO list of log data, - * waiting on reintegration */ - RF_ParityLogData_t *logBlockHead; /* queue of work, blocked - * until a log is available */ - RF_ParityLogData_t *logBlockTail; - RF_ParityLogData_t *reintBlockHead; /* queue of work, blocked - * until reintegration is - * complete */ - RF_ParityLogData_t *reintBlockTail; - RF_CommonLogData_t *freeCommonList; /* list of unused common data - * structs */ - RF_ParityLogData_t *freeDataList; /* list of unused log data - * structs */ -}; - -struct RF_DiskMap_s { - RF_PhysDiskAddr_t parityAddr; - RF_ParityRecordType_t operation; -}; - -struct RF_RegionInfo_s { - RF_DECLARE_MUTEX(mutex) /* protects: diskCount, diskMap, - * loggingEnabled, coreLog */ - RF_DECLARE_MUTEX(reintMutex) /* protects: reintInProgress */ - int reintInProgress;/* flag used to suspend flushing operations */ - RF_SectorCount_t capacity; /* capacity of this region in sectors */ - RF_SectorNum_t regionStartAddr; /* starting disk address for this - * region */ - RF_SectorNum_t parityStartAddr; /* starting disk address for this - * region */ - RF_SectorCount_t numSectorsParity; /* number of parity sectors - * protected by this region */ - RF_SectorCount_t diskCount; /* num of sectors written to this - * region's disk log */ - RF_DiskMap_t *diskMap; /* in-core map of what's in this region's disk - * log */ - int loggingEnabled; /* logging enable for this region */ - RF_ParityLog_t *coreLog;/* in-core log for this region */ -}; - -RF_ParityLogData_t * -rf_CreateParityLogData(RF_ParityRecordType_t operation, - RF_PhysDiskAddr_t * pda, caddr_t bufPtr, RF_Raid_t * raidPtr, - int (*wakeFunc) (RF_DagNode_t * node, int status), - void *wakeArg, RF_AccTraceEntry_t * tracerec, - RF_Etimer_t startTime); - RF_ParityLogData_t *rf_SearchAndDequeueParityLogData(RF_Raid_t * raidPtr, - RF_RegionId_t regionID, RF_ParityLogData_t ** head, - RF_ParityLogData_t ** tail, int ignoreLocks); - void rf_ReleaseParityLogs(RF_Raid_t * raidPtr, RF_ParityLog_t * firstLog); - int rf_ParityLogAppend(RF_ParityLogData_t * logData, int finish, - RF_ParityLog_t ** incomingLog, int clearReintFlag); - void rf_EnableParityLogging(RF_Raid_t * raidPtr); - -#endif /* !_RF__RF_PARITYLOG_H_ */ diff --git a/sys/dev/raidframe/rf_paritylogDiskMgr.c b/sys/dev/raidframe/rf_paritylogDiskMgr.c deleted file mode 100644 index 5d864e2..0000000 --- a/sys/dev/raidframe/rf_paritylogDiskMgr.c +++ /dev/null @@ -1,703 +0,0 @@ -/* $NetBSD: rf_paritylogDiskMgr.c,v 1.10 2000/01/15 01:57:57 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ -/* Code for flushing and reintegration operations related to parity logging. - * - */ - -#include <dev/raidframe/rf_archs.h> - -#if RF_INCLUDE_PARITYLOGGING > 0 - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_mcpair.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_desc.h> -#include <dev/raidframe/rf_layout.h> -#include <dev/raidframe/rf_diskqueue.h> -#include <dev/raidframe/rf_paritylog.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_paritylogging.h> -#include <dev/raidframe/rf_engine.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_map.h> -#include <dev/raidframe/rf_parityscan.h> -#include <dev/raidframe/rf_kintf.h> - -#include <dev/raidframe/rf_paritylogDiskMgr.h> - -static caddr_t AcquireReintBuffer(RF_RegionBufferQueue_t *); - -static caddr_t -AcquireReintBuffer(pool) - RF_RegionBufferQueue_t *pool; -{ - caddr_t bufPtr = NULL; - - /* Return a region buffer from the free list (pool). If the free list - * is empty, WAIT. BLOCKING */ - - RF_LOCK_MUTEX(pool->mutex); - if (pool->availableBuffers > 0) { - bufPtr = pool->buffers[pool->availBuffersIndex]; - pool->availableBuffers--; - pool->availBuffersIndex++; - if (pool->availBuffersIndex == pool->totalBuffers) - pool->availBuffersIndex = 0; - RF_UNLOCK_MUTEX(pool->mutex); - } else { - RF_PANIC(); /* should never happen in correct config, - * single reint */ - RF_WAIT_COND(pool->cond, pool->mutex); - } - return (bufPtr); -} - -static void -ReleaseReintBuffer( - RF_RegionBufferQueue_t * pool, - caddr_t bufPtr) -{ - /* Insert a region buffer (bufPtr) into the free list (pool). - * NON-BLOCKING */ - - RF_LOCK_MUTEX(pool->mutex); - pool->availableBuffers++; - pool->buffers[pool->emptyBuffersIndex] = bufPtr; - pool->emptyBuffersIndex++; - if (pool->emptyBuffersIndex == pool->totalBuffers) - pool->emptyBuffersIndex = 0; - RF_ASSERT(pool->availableBuffers <= pool->totalBuffers); - RF_UNLOCK_MUTEX(pool->mutex); - RF_SIGNAL_COND(pool->cond); -} - - - -static void -ReadRegionLog( - RF_RegionId_t regionID, - RF_MCPair_t * rrd_mcpair, - caddr_t regionBuffer, - RF_Raid_t * raidPtr, - RF_DagHeader_t ** rrd_dag_h, - RF_AllocListElem_t ** rrd_alloclist, - RF_PhysDiskAddr_t ** rrd_pda) -{ - /* Initiate the read a region log from disk. Once initiated, return - * to the calling routine. - * - * NON-BLOCKING */ - - RF_AccTraceEntry_t *tracerec; - RF_DagNode_t *rrd_rdNode; - - /* create DAG to read region log from disk */ - rf_MakeAllocList(*rrd_alloclist); - *rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer, - rf_DiskReadFunc, rf_DiskReadUndoFunc, - "Rrl", *rrd_alloclist, - RF_DAG_FLAGS_NONE, - RF_IO_NORMAL_PRIORITY); - - /* create and initialize PDA for the core log */ - /* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t - * *)); */ - *rrd_pda = rf_AllocPDAList(1); - rf_MapLogParityLogging(raidPtr, regionID, 0, &((*rrd_pda)->row), - &((*rrd_pda)->col), &((*rrd_pda)->startSector)); - (*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity; - - if ((*rrd_pda)->next) { - (*rrd_pda)->next = NULL; - printf("set rrd_pda->next to NULL\n"); - } - /* initialize DAG parameters */ - RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); - bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t)); - (*rrd_dag_h)->tracerec = tracerec; - rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0]; - rrd_rdNode->params[0].p = *rrd_pda; -/* rrd_rdNode->params[1] = regionBuffer; */ - rrd_rdNode->params[2].v = 0; - rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, 0, 0); - - /* launch region log read dag */ - rf_DispatchDAG(*rrd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, - (void *) rrd_mcpair); -} - - - -static void -WriteCoreLog( - RF_ParityLog_t * log, - RF_MCPair_t * fwr_mcpair, - RF_Raid_t * raidPtr, - RF_DagHeader_t ** fwr_dag_h, - RF_AllocListElem_t ** fwr_alloclist, - RF_PhysDiskAddr_t ** fwr_pda) -{ - RF_RegionId_t regionID = log->regionID; - RF_AccTraceEntry_t *tracerec; - RF_SectorNum_t regionOffset; - RF_DagNode_t *fwr_wrNode; - - /* Initiate the write of a core log to a region log disk. Once - * initiated, return to the calling routine. - * - * NON-BLOCKING */ - - /* create DAG to write a core log to a region log disk */ - rf_MakeAllocList(*fwr_alloclist); - *fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr, - rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - "Wcl", *fwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY); - - /* create and initialize PDA for the region log */ - /* RF_Malloc(*fwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t - * *)); */ - *fwr_pda = rf_AllocPDAList(1); - regionOffset = log->diskOffset; - rf_MapLogParityLogging(raidPtr, regionID, regionOffset, - &((*fwr_pda)->row), &((*fwr_pda)->col), - &((*fwr_pda)->startSector)); - (*fwr_pda)->numSector = raidPtr->numSectorsPerLog; - - /* initialize DAG parameters */ - RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); - bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t)); - (*fwr_dag_h)->tracerec = tracerec; - fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0]; - fwr_wrNode->params[0].p = *fwr_pda; -/* fwr_wrNode->params[1] = log->bufPtr; */ - fwr_wrNode->params[2].v = 0; - fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, 0, 0); - - /* launch the dag to write the core log to disk */ - rf_DispatchDAG(*fwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, - (void *) fwr_mcpair); -} - - -static void -ReadRegionParity( - RF_RegionId_t regionID, - RF_MCPair_t * prd_mcpair, - caddr_t parityBuffer, - RF_Raid_t * raidPtr, - RF_DagHeader_t ** prd_dag_h, - RF_AllocListElem_t ** prd_alloclist, - RF_PhysDiskAddr_t ** prd_pda) -{ - /* Initiate the read region parity from disk. Once initiated, return - * to the calling routine. - * - * NON-BLOCKING */ - - RF_AccTraceEntry_t *tracerec; - RF_DagNode_t *prd_rdNode; - - /* create DAG to read region parity from disk */ - rf_MakeAllocList(*prd_alloclist); - *prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc, - rf_DiskReadUndoFunc, "Rrp", - *prd_alloclist, RF_DAG_FLAGS_NONE, - RF_IO_NORMAL_PRIORITY); - - /* create and initialize PDA for region parity */ - /* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t - * *)); */ - *prd_pda = rf_AllocPDAList(1); - rf_MapRegionParity(raidPtr, regionID, &((*prd_pda)->row), - &((*prd_pda)->col), &((*prd_pda)->startSector), - &((*prd_pda)->numSector)); - if (rf_parityLogDebug) - printf("[reading %d sectors of parity from region %d]\n", - (int) (*prd_pda)->numSector, regionID); - if ((*prd_pda)->next) { - (*prd_pda)->next = NULL; - printf("set prd_pda->next to NULL\n"); - } - /* initialize DAG parameters */ - RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); - bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t)); - (*prd_dag_h)->tracerec = tracerec; - prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0]; - prd_rdNode->params[0].p = *prd_pda; - prd_rdNode->params[1].p = parityBuffer; - prd_rdNode->params[2].v = 0; - prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, 0, 0); - if (rf_validateDAGDebug) - rf_ValidateDAG(*prd_dag_h); - /* launch region parity read dag */ - rf_DispatchDAG(*prd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, - (void *) prd_mcpair); -} - -static void -WriteRegionParity( - RF_RegionId_t regionID, - RF_MCPair_t * pwr_mcpair, - caddr_t parityBuffer, - RF_Raid_t * raidPtr, - RF_DagHeader_t ** pwr_dag_h, - RF_AllocListElem_t ** pwr_alloclist, - RF_PhysDiskAddr_t ** pwr_pda) -{ - /* Initiate the write of region parity to disk. Once initiated, return - * to the calling routine. - * - * NON-BLOCKING */ - - RF_AccTraceEntry_t *tracerec; - RF_DagNode_t *pwr_wrNode; - - /* create DAG to write region log from disk */ - rf_MakeAllocList(*pwr_alloclist); - *pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer, - rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - "Wrp", *pwr_alloclist, - RF_DAG_FLAGS_NONE, - RF_IO_NORMAL_PRIORITY); - - /* create and initialize PDA for region parity */ - /* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t - * *)); */ - *pwr_pda = rf_AllocPDAList(1); - rf_MapRegionParity(raidPtr, regionID, &((*pwr_pda)->row), - &((*pwr_pda)->col), &((*pwr_pda)->startSector), - &((*pwr_pda)->numSector)); - - /* initialize DAG parameters */ - RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); - bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t)); - (*pwr_dag_h)->tracerec = tracerec; - pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0]; - pwr_wrNode->params[0].p = *pwr_pda; -/* pwr_wrNode->params[1] = parityBuffer; */ - pwr_wrNode->params[2].v = 0; - pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, 0, 0); - - /* launch the dag to write region parity to disk */ - rf_DispatchDAG(*pwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, - (void *) pwr_mcpair); -} - -static void -FlushLogsToDisk( - RF_Raid_t * raidPtr, - RF_ParityLog_t * logList) -{ - /* Flush a linked list of core logs to the log disk. Logs contain the - * disk location where they should be written. Logs were written in - * FIFO order and that order must be preserved. - * - * Recommended optimizations: 1) allow multiple flushes to occur - * simultaneously 2) coalesce contiguous flush operations - * - * BLOCKING */ - - RF_ParityLog_t *log; - RF_RegionId_t regionID; - RF_MCPair_t *fwr_mcpair; - RF_DagHeader_t *fwr_dag_h; - RF_AllocListElem_t *fwr_alloclist; - RF_PhysDiskAddr_t *fwr_pda; - - fwr_mcpair = rf_AllocMCPair(); - RF_LOCK_MUTEX(fwr_mcpair->mutex); - - RF_ASSERT(logList); - log = logList; - while (log) { - regionID = log->regionID; - - /* create and launch a DAG to write the core log */ - if (rf_parityLogDebug) - printf("[initiating write of core log for region %d]\n", regionID); - fwr_mcpair->flag = RF_FALSE; - WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h, - &fwr_alloclist, &fwr_pda); - - /* wait for the DAG to complete */ - while (!fwr_mcpair->flag) - RF_WAIT_COND(fwr_mcpair->cond, fwr_mcpair->mutex); - if (fwr_dag_h->status != rf_enable) { - RF_ERRORMSG1("Unable to write core log to disk (region %d)\n", regionID); - RF_ASSERT(0); - } - /* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */ - rf_FreePhysDiskAddr(fwr_pda); - rf_FreeDAG(fwr_dag_h); - rf_FreeAllocList(fwr_alloclist); - - log = log->next; - } - RF_UNLOCK_MUTEX(fwr_mcpair->mutex); - rf_FreeMCPair(fwr_mcpair); - rf_ReleaseParityLogs(raidPtr, logList); -} - -static void -ReintegrateRegion( - RF_Raid_t * raidPtr, - RF_RegionId_t regionID, - RF_ParityLog_t * coreLog) -{ - RF_MCPair_t *rrd_mcpair = NULL, *prd_mcpair, *pwr_mcpair; - RF_DagHeader_t *rrd_dag_h, *prd_dag_h, *pwr_dag_h; - RF_AllocListElem_t *rrd_alloclist, *prd_alloclist, *pwr_alloclist; - RF_PhysDiskAddr_t *rrd_pda, *prd_pda, *pwr_pda; - caddr_t parityBuffer, regionBuffer = NULL; - - /* Reintegrate a region (regionID). - * - * 1. acquire region and parity buffers - * 2. read log from disk - * 3. read parity from disk - * 4. apply log to parity - * 5. apply core log to parity - * 6. write new parity to disk - * - * BLOCKING */ - - if (rf_parityLogDebug) - printf("[reintegrating region %d]\n", regionID); - - /* initiate read of region parity */ - if (rf_parityLogDebug) - printf("[initiating read of parity for region %d]\n",regionID); - parityBuffer = AcquireReintBuffer(&raidPtr->parityBufferPool); - prd_mcpair = rf_AllocMCPair(); - RF_LOCK_MUTEX(prd_mcpair->mutex); - prd_mcpair->flag = RF_FALSE; - ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr, - &prd_dag_h, &prd_alloclist, &prd_pda); - - /* if region log nonempty, initiate read */ - if (raidPtr->regionInfo[regionID].diskCount > 0) { - if (rf_parityLogDebug) - printf("[initiating read of disk log for region %d]\n", - regionID); - regionBuffer = AcquireReintBuffer(&raidPtr->regionBufferPool); - rrd_mcpair = rf_AllocMCPair(); - RF_LOCK_MUTEX(rrd_mcpair->mutex); - rrd_mcpair->flag = RF_FALSE; - ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr, - &rrd_dag_h, &rrd_alloclist, &rrd_pda); - } - /* wait on read of region parity to complete */ - while (!prd_mcpair->flag) { - RF_WAIT_COND(prd_mcpair->cond, prd_mcpair->mutex); - } - RF_UNLOCK_MUTEX(prd_mcpair->mutex); - if (prd_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to read parity from disk\n"); - /* add code to fail the parity disk */ - RF_ASSERT(0); - } - /* apply core log to parity */ - /* if (coreLog) ApplyLogsToParity(coreLog, parityBuffer); */ - - if (raidPtr->regionInfo[regionID].diskCount > 0) { - /* wait on read of region log to complete */ - while (!rrd_mcpair->flag) - RF_WAIT_COND(rrd_mcpair->cond, rrd_mcpair->mutex); - RF_UNLOCK_MUTEX(rrd_mcpair->mutex); - if (rrd_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to read region log from disk\n"); - /* add code to fail the log disk */ - RF_ASSERT(0); - } - /* apply region log to parity */ - /* ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */ - /* release resources associated with region log */ - /* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */ - rf_FreePhysDiskAddr(rrd_pda); - rf_FreeDAG(rrd_dag_h); - rf_FreeAllocList(rrd_alloclist); - rf_FreeMCPair(rrd_mcpair); - ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer); - } - /* write reintegrated parity to disk */ - if (rf_parityLogDebug) - printf("[initiating write of parity for region %d]\n", - regionID); - pwr_mcpair = rf_AllocMCPair(); - RF_LOCK_MUTEX(pwr_mcpair->mutex); - pwr_mcpair->flag = RF_FALSE; - WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr, - &pwr_dag_h, &pwr_alloclist, &pwr_pda); - while (!pwr_mcpair->flag) - RF_WAIT_COND(pwr_mcpair->cond, pwr_mcpair->mutex); - RF_UNLOCK_MUTEX(pwr_mcpair->mutex); - if (pwr_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to write parity to disk\n"); - /* add code to fail the parity disk */ - RF_ASSERT(0); - } - /* release resources associated with read of old parity */ - /* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */ - rf_FreePhysDiskAddr(prd_pda); - rf_FreeDAG(prd_dag_h); - rf_FreeAllocList(prd_alloclist); - rf_FreeMCPair(prd_mcpair); - - /* release resources associated with write of new parity */ - ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer); - /* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */ - rf_FreePhysDiskAddr(pwr_pda); - rf_FreeDAG(pwr_dag_h); - rf_FreeAllocList(pwr_alloclist); - rf_FreeMCPair(pwr_mcpair); - - if (rf_parityLogDebug) - printf("[finished reintegrating region %d]\n", regionID); -} - - - -static void -ReintegrateLogs( - RF_Raid_t * raidPtr, - RF_ParityLog_t * logList) -{ - RF_ParityLog_t *log, *freeLogList = NULL; - RF_ParityLogData_t *logData, *logDataList; - RF_RegionId_t regionID; - - RF_ASSERT(logList); - while (logList) { - log = logList; - logList = logList->next; - log->next = NULL; - regionID = log->regionID; - ReintegrateRegion(raidPtr, regionID, log); - log->numRecords = 0; - - /* remove all items which are blocked on reintegration of this - * region */ - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID, - &raidPtr->parityLogDiskQueue.reintBlockHead, - &raidPtr->parityLogDiskQueue.reintBlockTail, - RF_TRUE); - logDataList = logData; - while (logData) { - logData->next = rf_SearchAndDequeueParityLogData( - raidPtr, regionID, - &raidPtr->parityLogDiskQueue.reintBlockHead, - &raidPtr->parityLogDiskQueue.reintBlockTail, - RF_TRUE); - logData = logData->next; - } - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - - /* process blocked log data and clear reintInProgress flag for - * this region */ - if (logDataList) - rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE); - else { - /* Enable flushing for this region. Holding both - * locks provides a synchronization barrier with - * DumpParityLogToDisk */ - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - raidPtr->regionInfo[regionID].diskCount = 0; - raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now - * enabled */ - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - } - /* if log wasn't used, attach it to the list of logs to be - * returned */ - if (log) { - log->next = freeLogList; - freeLogList = log; - } - } - if (freeLogList) - rf_ReleaseParityLogs(raidPtr, freeLogList); -} - -int -rf_ShutdownLogging(RF_Raid_t * raidPtr) -{ - /* shutdown parity logging 1) disable parity logging in all regions 2) - * reintegrate all regions */ - - RF_SectorCount_t diskCount; - RF_RegionId_t regionID; - RF_ParityLog_t *log; - - if (rf_parityLogDebug) - printf("[shutting down parity logging]\n"); - /* Since parity log maps are volatile, we must reintegrate all - * regions. */ - if (rf_forceParityLogReint) { - for (regionID = 0; regionID < rf_numParityRegions; regionID++) { - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - raidPtr->regionInfo[regionID].loggingEnabled = - RF_FALSE; - log = raidPtr->regionInfo[regionID].coreLog; - raidPtr->regionInfo[regionID].coreLog = NULL; - diskCount = raidPtr->regionInfo[regionID].diskCount; - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - if (diskCount > 0 || log != NULL) - ReintegrateRegion(raidPtr, regionID, log); - if (log != NULL) - rf_ReleaseParityLogs(raidPtr, log); - } - } - if (rf_parityLogDebug) { - printf("[parity logging disabled]\n"); - printf("[should be done!]\n"); - } - return (0); -} - -int -rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr) -{ - RF_ParityLog_t *reintQueue, *flushQueue; - int workNeeded, done = RF_FALSE; - int s; - - /* Main program for parity logging disk thread. This routine waits - * for work to appear in either the flush or reintegration queues and - * is responsible for flushing core logs to the log disk as well as - * reintegrating parity regions. - * - * BLOCKING */ - - s = splbio(); - - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - - /* - * Inform our creator that we're running. Don't bother doing the - * mutex lock/unlock dance- we locked above, and we'll unlock - * below with nothing to do, yet. - */ - raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING; - RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); - - /* empty the work queues */ - flushQueue = raidPtr->parityLogDiskQueue.flushQueue; - raidPtr->parityLogDiskQueue.flushQueue = NULL; - reintQueue = raidPtr->parityLogDiskQueue.reintQueue; - raidPtr->parityLogDiskQueue.reintQueue = NULL; - workNeeded = (flushQueue || reintQueue); - - while (!done) { - while (workNeeded) { - /* First, flush all logs in the flush queue, freeing - * buffers Second, reintegrate all regions which are - * reported as full. Third, append queued log data - * until blocked. - * - * Note: Incoming appends (ParityLogAppend) can block on - * either 1. empty buffer pool 2. region under - * reintegration To preserve a global FIFO ordering of - * appends, buffers are not released to the world - * until those appends blocked on buffers are removed - * from the append queue. Similarly, regions which - * are reintegrated are not opened for general use - * until the append queue has been emptied. */ - - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - - /* empty flushQueue, using free'd log buffers to - * process bufTail */ - if (flushQueue) - FlushLogsToDisk(raidPtr, flushQueue); - - /* empty reintQueue, flushing from reintTail as we go */ - if (reintQueue) - ReintegrateLogs(raidPtr, reintQueue); - - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - flushQueue = raidPtr->parityLogDiskQueue.flushQueue; - raidPtr->parityLogDiskQueue.flushQueue = NULL; - reintQueue = raidPtr->parityLogDiskQueue.reintQueue; - raidPtr->parityLogDiskQueue.reintQueue = NULL; - workNeeded = (flushQueue || reintQueue); - } - /* no work is needed at this point */ - if (raidPtr->parityLogDiskQueue.threadState & RF_PLOG_TERMINATE) { - /* shutdown parity logging 1. disable parity logging - * in all regions 2. reintegrate all regions */ - done = RF_TRUE; /* thread disabled, no work needed */ - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - rf_ShutdownLogging(raidPtr); - } - if (!done) { - /* thread enabled, no work needed, so sleep */ - if (rf_parityLogDebug) - printf("[parity logging disk manager sleeping]\n"); - RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, - raidPtr->parityLogDiskQueue.mutex); - if (rf_parityLogDebug) - printf("[parity logging disk manager just woke up]\n"); - flushQueue = raidPtr->parityLogDiskQueue.flushQueue; - raidPtr->parityLogDiskQueue.flushQueue = NULL; - reintQueue = raidPtr->parityLogDiskQueue.reintQueue; - raidPtr->parityLogDiskQueue.reintQueue = NULL; - workNeeded = (flushQueue || reintQueue); - } - } - /* - * Announce that we're done. - */ - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN; - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); - - splx(s); - - /* - * In the NetBSD kernel, the thread must exit; returning would - * cause the proc trampoline to attempt to return to userspace. - */ - kthread_exit(0); /* does not return */ -} -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ diff --git a/sys/dev/raidframe/rf_paritylogDiskMgr.h b/sys/dev/raidframe/rf_paritylogDiskMgr.h deleted file mode 100644 index bdcc2a5..0000000 --- a/sys/dev/raidframe/rf_paritylogDiskMgr.h +++ /dev/null @@ -1,42 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_paritylogDiskMgr.h,v 1.3 1999/02/05 00:06:14 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* header file for parity log disk mgr code - * - */ - -#ifndef _RF__RF_PARITYLOGDISKMGR_H_ -#define _RF__RF_PARITYLOGDISKMGR_H_ - -#include <dev/raidframe/rf_types.h> - -int rf_ShutdownLogging(RF_Raid_t * raidPtr); -int rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr); - -#endif /* !_RF__RF_PARITYLOGDISKMGR_H_ */ diff --git a/sys/dev/raidframe/rf_paritylogging.c b/sys/dev/raidframe/rf_paritylogging.c deleted file mode 100644 index 2f9cf5e..0000000 --- a/sys/dev/raidframe/rf_paritylogging.c +++ /dev/null @@ -1,1076 +0,0 @@ -/* $NetBSD: rf_paritylogging.c,v 1.10 2000/02/12 16:06:27 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - - -/* - parity logging configuration, dag selection, and mapping is implemented here - */ - -#include <dev/raidframe/rf_archs.h> - -#if RF_INCLUDE_PARITYLOGGING > 0 - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_dagffrd.h> -#include <dev/raidframe/rf_dagffwr.h> -#include <dev/raidframe/rf_dagdegrd.h> -#include <dev/raidframe/rf_dagdegwr.h> -#include <dev/raidframe/rf_paritylog.h> -#include <dev/raidframe/rf_paritylogDiskMgr.h> -#include <dev/raidframe/rf_paritylogging.h> -#include <dev/raidframe/rf_parityloggingdags.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_map.h> -#include <dev/raidframe/rf_utils.h> -#include <dev/raidframe/rf_shutdown.h> -#include <dev/raidframe/rf_kintf.h> - -typedef struct RF_ParityLoggingConfigInfo_s { - RF_RowCol_t **stripeIdentifier; /* filled in at config time & used by - * IdentifyStripe */ -} RF_ParityLoggingConfigInfo_t; - -static void FreeRegionInfo(RF_Raid_t * raidPtr, RF_RegionId_t regionID); -static void rf_ShutdownParityLogging(RF_ThreadArg_t arg); -static void rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t arg); -static void rf_ShutdownParityLoggingPool(RF_ThreadArg_t arg); -static void rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t arg); -static void rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t arg); -static void rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t arg); - -int -rf_ConfigureParityLogging( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - int i, j, startdisk, rc; - RF_SectorCount_t totalLogCapacity, fragmentation, lastRegionCapacity; - RF_SectorCount_t parityBufferCapacity, maxRegionParityRange; - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_ParityLoggingConfigInfo_t *info; - RF_ParityLog_t *l = NULL, *next; - caddr_t lHeapPtr; - - if (rf_numParityRegions <= 0) - return(EINVAL); - - /* - * We create multiple entries on the shutdown list here, since - * this configuration routine is fairly complicated in and of - * itself, and this makes backing out of a failed configuration - * much simpler. - */ - - raidPtr->numSectorsPerLog = RF_DEFAULT_NUM_SECTORS_PER_LOG; - - /* create a parity logging configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_ParityLoggingConfigInfo_t), - (RF_ParityLoggingConfigInfo_t *), - raidPtr->cleanupList); - if (info == NULL) - return (ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - - RF_ASSERT(raidPtr->numRow == 1); - - /* the stripe identifier must identify the disks in each stripe, IN - * THE ORDER THAT THEY APPEAR IN THE STRIPE. */ - info->stripeIdentifier = rf_make_2d_array((raidPtr->numCol), - (raidPtr->numCol), - raidPtr->cleanupList); - if (info->stripeIdentifier == NULL) - return (ENOMEM); - - startdisk = 0; - for (i = 0; i < (raidPtr->numCol); i++) { - for (j = 0; j < (raidPtr->numCol); j++) { - info->stripeIdentifier[i][j] = (startdisk + j) % - (raidPtr->numCol - 1); - } - if ((--startdisk) < 0) - startdisk = raidPtr->numCol - 1 - 1; - } - - /* fill in the remaining layout parameters */ - layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << - raidPtr->logBytesPerSector; - layoutPtr->numParityCol = 1; - layoutPtr->numParityLogCol = 1; - layoutPtr->numDataCol = raidPtr->numCol - layoutPtr->numParityCol - - layoutPtr->numParityLogCol; - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * - layoutPtr->sectorsPerStripeUnit; - layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk; - raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * - layoutPtr->sectorsPerStripeUnit; - - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * - layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - - /* configure parity log parameters - * - * parameter comment/constraints - * ------------------------------------------- - * numParityRegions* all regions (except possibly last) - * of equal size - * totalInCoreLogCapacity* amount of memory in bytes available - * for in-core logs (default 1 MB) - * numSectorsPerLog# capacity of an in-core log in sectors - * (1 * disk track) - * numParityLogs total number of in-core logs, - * should be at least numParityRegions - * regionLogCapacity size of a region log (except possibly - * last one) in sectors - * totalLogCapacity total amount of log space in sectors - * - * where '*' denotes a user settable parameter. - * Note that logs are fixed to be the size of a disk track, - * value #defined in rf_paritylog.h - * - */ - - totalLogCapacity = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit * layoutPtr->numParityLogCol; - raidPtr->regionLogCapacity = totalLogCapacity / rf_numParityRegions; - if (rf_parityLogDebug) - printf("bytes per sector %d\n", raidPtr->bytesPerSector); - - /* reduce fragmentation within a disk region by adjusting the number - * of regions in an attempt to allow an integral number of logs to fit - * into a disk region */ - fragmentation = raidPtr->regionLogCapacity % raidPtr->numSectorsPerLog; - if (fragmentation > 0) - for (i = 1; i < (raidPtr->numSectorsPerLog / 2); i++) { - if (((totalLogCapacity / (rf_numParityRegions + i)) % - raidPtr->numSectorsPerLog) < fragmentation) { - rf_numParityRegions++; - raidPtr->regionLogCapacity = totalLogCapacity / - rf_numParityRegions; - fragmentation = raidPtr->regionLogCapacity % - raidPtr->numSectorsPerLog; - } - if (((totalLogCapacity / (rf_numParityRegions - i)) % - raidPtr->numSectorsPerLog) < fragmentation) { - rf_numParityRegions--; - raidPtr->regionLogCapacity = totalLogCapacity / - rf_numParityRegions; - fragmentation = raidPtr->regionLogCapacity % - raidPtr->numSectorsPerLog; - } - } - /* ensure integral number of regions per log */ - raidPtr->regionLogCapacity = (raidPtr->regionLogCapacity / - raidPtr->numSectorsPerLog) * - raidPtr->numSectorsPerLog; - - raidPtr->numParityLogs = rf_totalInCoreLogCapacity / - (raidPtr->bytesPerSector * raidPtr->numSectorsPerLog); - /* to avoid deadlock, must ensure that enough logs exist for each - * region to have one simultaneously */ - if (raidPtr->numParityLogs < rf_numParityRegions) - raidPtr->numParityLogs = rf_numParityRegions; - - /* create region information structs */ - printf("Allocating %d bytes for in-core parity region info\n", - (int) (rf_numParityRegions * sizeof(RF_RegionInfo_t))); - RF_Malloc(raidPtr->regionInfo, - (rf_numParityRegions * sizeof(RF_RegionInfo_t)), - (RF_RegionInfo_t *)); - if (raidPtr->regionInfo == NULL) - return (ENOMEM); - - /* last region may not be full capacity */ - lastRegionCapacity = raidPtr->regionLogCapacity; - while ((rf_numParityRegions - 1) * raidPtr->regionLogCapacity + - lastRegionCapacity > totalLogCapacity) - lastRegionCapacity = lastRegionCapacity - - raidPtr->numSectorsPerLog; - - raidPtr->regionParityRange = raidPtr->sectorsPerDisk / - rf_numParityRegions; - maxRegionParityRange = raidPtr->regionParityRange; - -/* i can't remember why this line is in the code -wvcii 6/30/95 */ -/* if (raidPtr->sectorsPerDisk % rf_numParityRegions > 0) - regionParityRange++; */ - - /* build pool of unused parity logs */ - printf("Allocating %d bytes for %d parity logs\n", - raidPtr->numParityLogs * raidPtr->numSectorsPerLog * - raidPtr->bytesPerSector, - raidPtr->numParityLogs); - RF_Malloc(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * - raidPtr->numSectorsPerLog * raidPtr->bytesPerSector, - (caddr_t)); - if (raidPtr->parityLogBufferHeap == NULL) - return (ENOMEM); - lHeapPtr = raidPtr->parityLogBufferHeap; - rc = rf_mutex_init(&raidPtr->parityLogPool.mutex, "RF_PARITYLOGGING1"); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * - raidPtr->numSectorsPerLog * raidPtr->bytesPerSector); - return (ENOMEM); - } - for (i = 0; i < raidPtr->numParityLogs; i++) { - if (i == 0) { - RF_Calloc(raidPtr->parityLogPool.parityLogs, 1, - sizeof(RF_ParityLog_t), (RF_ParityLog_t *)); - if (raidPtr->parityLogPool.parityLogs == NULL) { - RF_Free(raidPtr->parityLogBufferHeap, - raidPtr->numParityLogs * - raidPtr->numSectorsPerLog * - raidPtr->bytesPerSector); - return (ENOMEM); - } - l = raidPtr->parityLogPool.parityLogs; - } else { - RF_Calloc(l->next, 1, sizeof(RF_ParityLog_t), - (RF_ParityLog_t *)); - if (l->next == NULL) { - RF_Free(raidPtr->parityLogBufferHeap, - raidPtr->numParityLogs * - raidPtr->numSectorsPerLog * - raidPtr->bytesPerSector); - for (l = raidPtr->parityLogPool.parityLogs; - l; - l = next) { - next = l->next; - if (l->records) - RF_Free(l->records, (raidPtr->numSectorsPerLog * sizeof(RF_ParityLogRecord_t))); - RF_Free(l, sizeof(RF_ParityLog_t)); - } - return (ENOMEM); - } - l = l->next; - } - l->bufPtr = lHeapPtr; - lHeapPtr += raidPtr->numSectorsPerLog * - raidPtr->bytesPerSector; - RF_Malloc(l->records, (raidPtr->numSectorsPerLog * - sizeof(RF_ParityLogRecord_t)), - (RF_ParityLogRecord_t *)); - if (l->records == NULL) { - RF_Free(raidPtr->parityLogBufferHeap, - raidPtr->numParityLogs * - raidPtr->numSectorsPerLog * - raidPtr->bytesPerSector); - for (l = raidPtr->parityLogPool.parityLogs; - l; - l = next) { - next = l->next; - if (l->records) - RF_Free(l->records, - (raidPtr->numSectorsPerLog * - sizeof(RF_ParityLogRecord_t))); - RF_Free(l, sizeof(RF_ParityLog_t)); - } - return (ENOMEM); - } - } - rc = rf_ShutdownCreate(listp, rf_ShutdownParityLoggingPool, raidPtr); - if (rc) { - RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownParityLoggingPool(raidPtr); - return (rc); - } - /* build pool of region buffers */ - rc = rf_mutex_init(&raidPtr->regionBufferPool.mutex, "RF_PARITYLOGGING3"); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - return (ENOMEM); - } - rc = rf_cond_init(&raidPtr->regionBufferPool.cond); - if (rc) { - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - rf_mutex_destroy(&raidPtr->regionBufferPool.mutex); - return (ENOMEM); - } - raidPtr->regionBufferPool.bufferSize = raidPtr->regionLogCapacity * - raidPtr->bytesPerSector; - printf("regionBufferPool.bufferSize %d\n", - raidPtr->regionBufferPool.bufferSize); - - /* for now, only one region at a time may be reintegrated */ - raidPtr->regionBufferPool.totalBuffers = 1; - - raidPtr->regionBufferPool.availableBuffers = - raidPtr->regionBufferPool.totalBuffers; - raidPtr->regionBufferPool.availBuffersIndex = 0; - raidPtr->regionBufferPool.emptyBuffersIndex = 0; - printf("Allocating %d bytes for regionBufferPool\n", - (int) (raidPtr->regionBufferPool.totalBuffers * - sizeof(caddr_t))); - RF_Malloc(raidPtr->regionBufferPool.buffers, - raidPtr->regionBufferPool.totalBuffers * sizeof(caddr_t), - (caddr_t *)); - if (raidPtr->regionBufferPool.buffers == NULL) { - rf_mutex_destroy(&raidPtr->regionBufferPool.mutex); - rf_cond_destroy(&raidPtr->regionBufferPool.cond); - return (ENOMEM); - } - for (i = 0; i < raidPtr->regionBufferPool.totalBuffers; i++) { - printf("Allocating %d bytes for regionBufferPool#%d\n", - (int) (raidPtr->regionBufferPool.bufferSize * - sizeof(char)), i); - RF_Malloc(raidPtr->regionBufferPool.buffers[i], - raidPtr->regionBufferPool.bufferSize * sizeof(char), - (caddr_t)); - if (raidPtr->regionBufferPool.buffers[i] == NULL) { - rf_mutex_destroy(&raidPtr->regionBufferPool.mutex); - rf_cond_destroy(&raidPtr->regionBufferPool.cond); - for (j = 0; j < i; j++) { - RF_Free(raidPtr->regionBufferPool.buffers[i], - raidPtr->regionBufferPool.bufferSize * - sizeof(char)); - } - RF_Free(raidPtr->regionBufferPool.buffers, - raidPtr->regionBufferPool.totalBuffers * - sizeof(caddr_t)); - return (ENOMEM); - } - printf("raidPtr->regionBufferPool.buffers[%d] = %lx\n", i, - (long) raidPtr->regionBufferPool.buffers[i]); - } - rc = rf_ShutdownCreate(listp, - rf_ShutdownParityLoggingRegionBufferPool, - raidPtr); - if (rc) { - RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownParityLoggingRegionBufferPool(raidPtr); - return (rc); - } - /* build pool of parity buffers */ - parityBufferCapacity = maxRegionParityRange; - rc = rf_mutex_init(&raidPtr->parityBufferPool.mutex, "RF_PARITYLOGGING3"); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - return (rc); - } - rc = rf_cond_init(&raidPtr->parityBufferPool.cond); - if (rc) { - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - rf_mutex_destroy(&raidPtr->parityBufferPool.mutex); - return (ENOMEM); - } - raidPtr->parityBufferPool.bufferSize = parityBufferCapacity * - raidPtr->bytesPerSector; - printf("parityBufferPool.bufferSize %d\n", - raidPtr->parityBufferPool.bufferSize); - - /* for now, only one region at a time may be reintegrated */ - raidPtr->parityBufferPool.totalBuffers = 1; - - raidPtr->parityBufferPool.availableBuffers = - raidPtr->parityBufferPool.totalBuffers; - raidPtr->parityBufferPool.availBuffersIndex = 0; - raidPtr->parityBufferPool.emptyBuffersIndex = 0; - printf("Allocating %d bytes for parityBufferPool of %d units\n", - (int) (raidPtr->parityBufferPool.totalBuffers * - sizeof(caddr_t)), - raidPtr->parityBufferPool.totalBuffers ); - RF_Malloc(raidPtr->parityBufferPool.buffers, - raidPtr->parityBufferPool.totalBuffers * sizeof(caddr_t), - (caddr_t *)); - if (raidPtr->parityBufferPool.buffers == NULL) { - rf_mutex_destroy(&raidPtr->parityBufferPool.mutex); - rf_cond_destroy(&raidPtr->parityBufferPool.cond); - return (ENOMEM); - } - for (i = 0; i < raidPtr->parityBufferPool.totalBuffers; i++) { - printf("Allocating %d bytes for parityBufferPool#%d\n", - (int) (raidPtr->parityBufferPool.bufferSize * - sizeof(char)),i); - RF_Malloc(raidPtr->parityBufferPool.buffers[i], - raidPtr->parityBufferPool.bufferSize * sizeof(char), - (caddr_t)); - if (raidPtr->parityBufferPool.buffers == NULL) { - rf_mutex_destroy(&raidPtr->parityBufferPool.mutex); - rf_cond_destroy(&raidPtr->parityBufferPool.cond); - for (j = 0; j < i; j++) { - RF_Free(raidPtr->parityBufferPool.buffers[i], - raidPtr->regionBufferPool.bufferSize * - sizeof(char)); - } - RF_Free(raidPtr->parityBufferPool.buffers, - raidPtr->regionBufferPool.totalBuffers * - sizeof(caddr_t)); - return (ENOMEM); - } - printf("parityBufferPool.buffers[%d] = %lx\n", i, - (long) raidPtr->parityBufferPool.buffers[i]); - } - rc = rf_ShutdownCreate(listp, - rf_ShutdownParityLoggingParityBufferPool, - raidPtr); - if (rc) { - RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownParityLoggingParityBufferPool(raidPtr); - return (rc); - } - /* initialize parityLogDiskQueue */ - rc = rf_create_managed_mutex(listp, - &raidPtr->parityLogDiskQueue.mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - return (rc); - } - rc = rf_create_managed_cond(listp, &raidPtr->parityLogDiskQueue.cond); - if (rc) { - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - return (rc); - } - raidPtr->parityLogDiskQueue.flushQueue = NULL; - raidPtr->parityLogDiskQueue.reintQueue = NULL; - raidPtr->parityLogDiskQueue.bufHead = NULL; - raidPtr->parityLogDiskQueue.bufTail = NULL; - raidPtr->parityLogDiskQueue.reintHead = NULL; - raidPtr->parityLogDiskQueue.reintTail = NULL; - raidPtr->parityLogDiskQueue.logBlockHead = NULL; - raidPtr->parityLogDiskQueue.logBlockTail = NULL; - raidPtr->parityLogDiskQueue.reintBlockHead = NULL; - raidPtr->parityLogDiskQueue.reintBlockTail = NULL; - raidPtr->parityLogDiskQueue.freeDataList = NULL; - raidPtr->parityLogDiskQueue.freeCommonList = NULL; - - rc = rf_ShutdownCreate(listp, - rf_ShutdownParityLoggingDiskQueue, - raidPtr); - if (rc) { - RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return (rc); - } - for (i = 0; i < rf_numParityRegions; i++) { - rc = rf_mutex_init(&raidPtr->regionInfo[i].mutex, "RF_PARITYLOGGING3"); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - for (j = 0; j < i; j++) - FreeRegionInfo(raidPtr, j); - RF_Free(raidPtr->regionInfo, - (rf_numParityRegions * - sizeof(RF_RegionInfo_t))); - return (ENOMEM); - } - rc = rf_mutex_init(&raidPtr->regionInfo[i].reintMutex, "RF_PARITYLOGGING4"); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_mutex_destroy(&raidPtr->regionInfo[i].mutex); - for (j = 0; j < i; j++) - FreeRegionInfo(raidPtr, j); - RF_Free(raidPtr->regionInfo, - (rf_numParityRegions * - sizeof(RF_RegionInfo_t))); - return (ENOMEM); - } - raidPtr->regionInfo[i].reintInProgress = RF_FALSE; - raidPtr->regionInfo[i].regionStartAddr = - raidPtr->regionLogCapacity * i; - raidPtr->regionInfo[i].parityStartAddr = - raidPtr->regionParityRange * i; - if (i < rf_numParityRegions - 1) { - raidPtr->regionInfo[i].capacity = - raidPtr->regionLogCapacity; - raidPtr->regionInfo[i].numSectorsParity = - raidPtr->regionParityRange; - } else { - raidPtr->regionInfo[i].capacity = - lastRegionCapacity; - raidPtr->regionInfo[i].numSectorsParity = - raidPtr->sectorsPerDisk - - raidPtr->regionParityRange * i; - if (raidPtr->regionInfo[i].numSectorsParity > - maxRegionParityRange) - maxRegionParityRange = - raidPtr->regionInfo[i].numSectorsParity; - } - raidPtr->regionInfo[i].diskCount = 0; - RF_ASSERT(raidPtr->regionInfo[i].capacity + - raidPtr->regionInfo[i].regionStartAddr <= - totalLogCapacity); - RF_ASSERT(raidPtr->regionInfo[i].parityStartAddr + - raidPtr->regionInfo[i].numSectorsParity <= - raidPtr->sectorsPerDisk); - printf("Allocating %d bytes for region %d\n", - (int) (raidPtr->regionInfo[i].capacity * - sizeof(RF_DiskMap_t)), i); - RF_Malloc(raidPtr->regionInfo[i].diskMap, - (raidPtr->regionInfo[i].capacity * - sizeof(RF_DiskMap_t)), - (RF_DiskMap_t *)); - if (raidPtr->regionInfo[i].diskMap == NULL) { - rf_mutex_destroy(&raidPtr->regionInfo[i].mutex); - rf_mutex_destroy(&raidPtr->regionInfo[i].reintMutex); - for (j = 0; j < i; j++) - FreeRegionInfo(raidPtr, j); - RF_Free(raidPtr->regionInfo, - (rf_numParityRegions * - sizeof(RF_RegionInfo_t))); - return (ENOMEM); - } - raidPtr->regionInfo[i].loggingEnabled = RF_FALSE; - raidPtr->regionInfo[i].coreLog = NULL; - } - rc = rf_ShutdownCreate(listp, - rf_ShutdownParityLoggingRegionInfo, - raidPtr); - if (rc) { - RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownParityLoggingRegionInfo(raidPtr); - return (rc); - } - RF_ASSERT(raidPtr->parityLogDiskQueue.threadState == 0); - raidPtr->parityLogDiskQueue.threadState = RF_PLOG_CREATED; - rc = RF_CREATE_THREAD(raidPtr->pLogDiskThreadHandle, - rf_ParityLoggingDiskManager, raidPtr,"rf_log"); - if (rc) { - raidPtr->parityLogDiskQueue.threadState = 0; - RF_ERRORMSG3("Unable to create parity logging disk thread file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - return (ENOMEM); - } - /* wait for thread to start */ - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - while (!(raidPtr->parityLogDiskQueue.threadState & RF_PLOG_RUNNING)) { - RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, - raidPtr->parityLogDiskQueue.mutex); - } - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - - rc = rf_ShutdownCreate(listp, rf_ShutdownParityLogging, raidPtr); - if (rc) { - RF_ERRORMSG1("Got rc=%d adding parity logging shutdown event\n", rc); - rf_ShutdownParityLogging(raidPtr); - return (rc); - } - if (rf_parityLogDebug) { - printf(" size of disk log in sectors: %d\n", - (int) totalLogCapacity); - printf(" total number of parity regions is %d\n", (int) rf_numParityRegions); - printf(" nominal sectors of log per parity region is %d\n", (int) raidPtr->regionLogCapacity); - printf(" nominal region fragmentation is %d sectors\n", (int) fragmentation); - printf(" total number of parity logs is %d\n", raidPtr->numParityLogs); - printf(" parity log size is %d sectors\n", raidPtr->numSectorsPerLog); - printf(" total in-core log space is %d bytes\n", (int) rf_totalInCoreLogCapacity); - } - rf_EnableParityLogging(raidPtr); - - return (0); -} - -static void -FreeRegionInfo( - RF_Raid_t * raidPtr, - RF_RegionId_t regionID) -{ - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - RF_Free(raidPtr->regionInfo[regionID].diskMap, - (raidPtr->regionInfo[regionID].capacity * - sizeof(RF_DiskMap_t))); - if (!rf_forceParityLogReint && raidPtr->regionInfo[regionID].coreLog) { - rf_ReleaseParityLogs(raidPtr, - raidPtr->regionInfo[regionID].coreLog); - raidPtr->regionInfo[regionID].coreLog = NULL; - } else { - RF_ASSERT(raidPtr->regionInfo[regionID].coreLog == NULL); - RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == 0); - } - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - rf_mutex_destroy(&raidPtr->regionInfo[regionID].mutex); - rf_mutex_destroy(&raidPtr->regionInfo[regionID].reintMutex); -} - - -static void -FreeParityLogQueue( - RF_Raid_t * raidPtr, - RF_ParityLogQueue_t * queue) -{ - RF_ParityLog_t *l1, *l2; - - RF_LOCK_MUTEX(queue->mutex); - l1 = queue->parityLogs; - while (l1) { - l2 = l1; - l1 = l2->next; - RF_Free(l2->records, (raidPtr->numSectorsPerLog * - sizeof(RF_ParityLogRecord_t))); - RF_Free(l2, sizeof(RF_ParityLog_t)); - } - RF_UNLOCK_MUTEX(queue->mutex); - rf_mutex_destroy(&queue->mutex); -} - - -static void -FreeRegionBufferQueue(RF_RegionBufferQueue_t * queue) -{ - int i; - - RF_LOCK_MUTEX(queue->mutex); - if (queue->availableBuffers != queue->totalBuffers) { - printf("Attempt to free region queue which is still in use!\n"); - RF_ASSERT(0); - } - for (i = 0; i < queue->totalBuffers; i++) - RF_Free(queue->buffers[i], queue->bufferSize); - RF_Free(queue->buffers, queue->totalBuffers * sizeof(caddr_t)); - RF_UNLOCK_MUTEX(queue->mutex); - rf_mutex_destroy(&queue->mutex); -} - -static void -rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t arg) -{ - RF_Raid_t *raidPtr; - RF_RegionId_t i; - - raidPtr = (RF_Raid_t *) arg; - if (rf_parityLogDebug) { - printf("raid%d: ShutdownParityLoggingRegionInfo\n", - raidPtr->raidid); - } - /* free region information structs */ - for (i = 0; i < rf_numParityRegions; i++) - FreeRegionInfo(raidPtr, i); - RF_Free(raidPtr->regionInfo, (rf_numParityRegions * - sizeof(raidPtr->regionInfo))); - raidPtr->regionInfo = NULL; -} - -static void -rf_ShutdownParityLoggingPool(RF_ThreadArg_t arg) -{ - RF_Raid_t *raidPtr; - - raidPtr = (RF_Raid_t *) arg; - if (rf_parityLogDebug) { - printf("raid%d: ShutdownParityLoggingPool\n", raidPtr->raidid); - } - /* free contents of parityLogPool */ - FreeParityLogQueue(raidPtr, &raidPtr->parityLogPool); - RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * - raidPtr->numSectorsPerLog * raidPtr->bytesPerSector); -} - -static void -rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t arg) -{ - RF_Raid_t *raidPtr; - - raidPtr = (RF_Raid_t *) arg; - if (rf_parityLogDebug) { - printf("raid%d: ShutdownParityLoggingRegionBufferPool\n", - raidPtr->raidid); - } - FreeRegionBufferQueue(&raidPtr->regionBufferPool); -} - -static void -rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t arg) -{ - RF_Raid_t *raidPtr; - - raidPtr = (RF_Raid_t *) arg; - if (rf_parityLogDebug) { - printf("raid%d: ShutdownParityLoggingParityBufferPool\n", - raidPtr->raidid); - } - FreeRegionBufferQueue(&raidPtr->parityBufferPool); -} - -static void -rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t arg) -{ - RF_ParityLogData_t *d; - RF_CommonLogData_t *c; - RF_Raid_t *raidPtr; - - raidPtr = (RF_Raid_t *) arg; - if (rf_parityLogDebug) { - printf("raid%d: ShutdownParityLoggingDiskQueue\n", - raidPtr->raidid); - } - /* free disk manager stuff */ - RF_ASSERT(raidPtr->parityLogDiskQueue.bufHead == NULL); - RF_ASSERT(raidPtr->parityLogDiskQueue.bufTail == NULL); - RF_ASSERT(raidPtr->parityLogDiskQueue.reintHead == NULL); - RF_ASSERT(raidPtr->parityLogDiskQueue.reintTail == NULL); - while (raidPtr->parityLogDiskQueue.freeDataList) { - d = raidPtr->parityLogDiskQueue.freeDataList; - raidPtr->parityLogDiskQueue.freeDataList = - raidPtr->parityLogDiskQueue.freeDataList->next; - RF_Free(d, sizeof(RF_ParityLogData_t)); - } - while (raidPtr->parityLogDiskQueue.freeCommonList) { - c = raidPtr->parityLogDiskQueue.freeCommonList; - rf_mutex_destroy(&c->mutex); - raidPtr->parityLogDiskQueue.freeCommonList = - raidPtr->parityLogDiskQueue.freeCommonList->next; - RF_Free(c, sizeof(RF_CommonLogData_t)); - } -} - -static void -rf_ShutdownParityLogging(RF_ThreadArg_t arg) -{ - RF_Raid_t *raidPtr; - - raidPtr = (RF_Raid_t *) arg; - if (rf_parityLogDebug) { - printf("raid%d: ShutdownParityLogging\n", raidPtr->raidid); - } - /* shutdown disk thread */ - /* This has the desirable side-effect of forcing all regions to be - * reintegrated. This is necessary since all parity log maps are - * currently held in volatile memory. */ - - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_TERMINATE; - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); - /* - * pLogDiskThread will now terminate when queues are cleared - * now wait for it to be done - */ - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - while (!(raidPtr->parityLogDiskQueue.threadState & RF_PLOG_SHUTDOWN)) { - RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, - raidPtr->parityLogDiskQueue.mutex); - } - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - if (rf_parityLogDebug) { - printf("raid%d: ShutdownParityLogging done (thread completed)\n", raidPtr->raidid); - } -} - -int -rf_GetDefaultNumFloatingReconBuffersParityLogging(RF_Raid_t * raidPtr) -{ - return (20); -} - -RF_HeadSepLimit_t -rf_GetDefaultHeadSepLimitParityLogging(RF_Raid_t * raidPtr) -{ - return (10); -} -/* return the region ID for a given RAID address */ -RF_RegionId_t -rf_MapRegionIDParityLogging( - RF_Raid_t * raidPtr, - RF_SectorNum_t address) -{ - RF_RegionId_t regionID; - -/* regionID = address / (raidPtr->regionParityRange * raidPtr->Layout.numDataCol); */ - regionID = address / raidPtr->regionParityRange; - if (regionID == rf_numParityRegions) { - /* last region may be larger than other regions */ - regionID--; - } - RF_ASSERT(address >= raidPtr->regionInfo[regionID].parityStartAddr); - RF_ASSERT(address < raidPtr->regionInfo[regionID].parityStartAddr + - raidPtr->regionInfo[regionID].numSectorsParity); - RF_ASSERT(regionID < rf_numParityRegions); - return (regionID); -} - - -/* given a logical RAID sector, determine physical disk address of data */ -void -rf_MapSectorParityLogging( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_StripeNum_t SUID = raidSector / - raidPtr->Layout.sectorsPerStripeUnit; - *row = 0; - /* *col = (SUID % (raidPtr->numCol - - * raidPtr->Layout.numParityLogCol)); */ - *col = SUID % raidPtr->Layout.numDataCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * - raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); -} - - -/* given a logical RAID sector, determine physical disk address of parity */ -void -rf_MapParityParityLogging( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_StripeNum_t SUID = raidSector / - raidPtr->Layout.sectorsPerStripeUnit; - - *row = 0; - /* *col = - * raidPtr->Layout.numDataCol-(SUID/raidPtr->Layout.numDataCol)%(raidPt - * r->numCol - raidPtr->Layout.numParityLogCol); */ - *col = raidPtr->Layout.numDataCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * - raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); -} - - -/* given a regionID and sector offset, determine the physical disk address of the parity log */ -void -rf_MapLogParityLogging( - RF_Raid_t * raidPtr, - RF_RegionId_t regionID, - RF_SectorNum_t regionOffset, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * startSector) -{ - *row = 0; - *col = raidPtr->numCol - 1; - *startSector = raidPtr->regionInfo[regionID].regionStartAddr + regionOffset; -} - - -/* given a regionID, determine the physical disk address of the logged - parity for that region */ -void -rf_MapRegionParity( - RF_Raid_t * raidPtr, - RF_RegionId_t regionID, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * startSector, - RF_SectorCount_t * numSector) -{ - *row = 0; - *col = raidPtr->numCol - 2; - *startSector = raidPtr->regionInfo[regionID].parityStartAddr; - *numSector = raidPtr->regionInfo[regionID].numSectorsParity; -} - - -/* given a logical RAID address, determine the participating disks in - the stripe */ -void -rf_IdentifyStripeParityLogging( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) -{ - RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, - addr); - RF_ParityLoggingConfigInfo_t *info = (RF_ParityLoggingConfigInfo_t *) - raidPtr->Layout.layoutSpecificInfo; - *outRow = 0; - *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol]; -} - - -void -rf_MapSIDToPSIDParityLogging( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru) -{ - *which_ru = 0; - *psID = stripeID; -} - - -/* select an algorithm for performing an access. Returns two pointers, - * one to a function that will return information about the DAG, and - * another to a function that will create the dag. - */ -void -rf_ParityLoggingDagSelect( - RF_Raid_t * raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t * asmp, - RF_VoidFuncPtr * createFunc) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_PhysDiskAddr_t *failedPDA = NULL; - RF_RowCol_t frow, fcol; - RF_RowStatus_t rstat; - int prior_recon; - - RF_ASSERT(RF_IO_IS_R_OR_W(type)); - - if (asmp->numDataFailed + asmp->numParityFailed > 1) { - RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); - /* *infoFunc = */ *createFunc = NULL; - return; - } else - if (asmp->numDataFailed + asmp->numParityFailed == 1) { - - /* if under recon & already reconstructed, redirect - * the access to the spare drive and eliminate the - * failure indication */ - failedPDA = asmp->failedPDAs[0]; - frow = failedPDA->row; - fcol = failedPDA->col; - rstat = raidPtr->status[failedPDA->row]; - prior_recon = (rstat == rf_rs_reconfigured) || ( - (rstat == rf_rs_reconstructing) ? - rf_CheckRUReconstructed(raidPtr->reconControl[frow]->reconMap, failedPDA->startSector) : 0 - ); - if (prior_recon) { - RF_RowCol_t or = failedPDA->row, oc = failedPDA->col; - RF_SectorNum_t oo = failedPDA->startSector; - if (layoutPtr->map->flags & - RF_DISTRIBUTE_SPARE) { - /* redirect to dist spare space */ - - if (failedPDA == asmp->parityInfo) { - - /* parity has failed */ - (layoutPtr->map->MapParity) (raidPtr, failedPDA->raidAddress, &failedPDA->row, - &failedPDA->col, &failedPDA->startSector, RF_REMAP); - - if (asmp->parityInfo->next) { /* redir 2nd component, - * if any */ - RF_PhysDiskAddr_t *p = asmp->parityInfo->next; - RF_SectorNum_t SUoffs = p->startSector % layoutPtr->sectorsPerStripeUnit; - p->row = failedPDA->row; - p->col = failedPDA->col; - p->startSector = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, failedPDA->startSector) + - SUoffs; /* cheating: - * startSector is not - * really a RAID address */ - } - } else - if (asmp->parityInfo->next && failedPDA == asmp->parityInfo->next) { - RF_ASSERT(0); /* should not ever - * happen */ - } else { - - /* data has failed */ - (layoutPtr->map->MapSector) (raidPtr, failedPDA->raidAddress, &failedPDA->row, - &failedPDA->col, &failedPDA->startSector, RF_REMAP); - - } - - } else { - /* redirect to dedicated spare space */ - - failedPDA->row = raidPtr->Disks[frow][fcol].spareRow; - failedPDA->col = raidPtr->Disks[frow][fcol].spareCol; - - /* the parity may have two distinct - * components, both of which may need - * to be redirected */ - if (asmp->parityInfo->next) { - if (failedPDA == asmp->parityInfo) { - failedPDA->next->row = failedPDA->row; - failedPDA->next->col = failedPDA->col; - } else - if (failedPDA == asmp->parityInfo->next) { /* paranoid: should never occur */ - asmp->parityInfo->row = failedPDA->row; - asmp->parityInfo->col = failedPDA->col; - } - } - } - - RF_ASSERT(failedPDA->col != -1); - - if (rf_dagDebug || rf_mapDebug) { - printf("raid%d: Redirected type '%c' r %d c %d o %ld -> r %d c %d o %ld\n", - raidPtr->raidid, type, or, oc, (long) oo, failedPDA->row, failedPDA->col, (long) failedPDA->startSector); - } - asmp->numDataFailed = asmp->numParityFailed = 0; - } - } - if (type == RF_IO_TYPE_READ) { - - if (asmp->numDataFailed == 0) - *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_CreateRaidFiveDegradedReadDAG; - - } else { - - - /* if mirroring, always use large writes. If the access - * requires two distinct parity updates, always do a small - * write. If the stripe contains a failure but the access - * does not, do a small write. The first conditional - * (numStripeUnitsAccessed <= numDataCol/2) uses a - * less-than-or-equal rather than just a less-than because - * when G is 3 or 4, numDataCol/2 is 1, and I want - * single-stripe-unit updates to use just one disk. */ - if ((asmp->numDataFailed + asmp->numParityFailed) == 0) { - if (((asmp->numStripeUnitsAccessed <= - (layoutPtr->numDataCol / 2)) && - (layoutPtr->numDataCol != 1)) || - (asmp->parityInfo->next != NULL) || - rf_CheckStripeForFailures(raidPtr, asmp)) { - *createFunc = (RF_VoidFuncPtr) rf_CreateParityLoggingSmallWriteDAG; - } else - *createFunc = (RF_VoidFuncPtr) rf_CreateParityLoggingLargeWriteDAG; - } else - if (asmp->numParityFailed == 1) - *createFunc = (RF_VoidFuncPtr) rf_CreateNonRedundantWriteDAG; - else - if (asmp->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit) - *createFunc = NULL; - else - *createFunc = (RF_VoidFuncPtr) rf_CreateDegradedWriteDAG; - } -} -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ diff --git a/sys/dev/raidframe/rf_paritylogging.h b/sys/dev/raidframe/rf_paritylogging.h deleted file mode 100644 index 5b7dd25..0000000 --- a/sys/dev/raidframe/rf_paritylogging.h +++ /dev/null @@ -1,70 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_paritylogging.h,v 1.3 1999/02/05 00:06:14 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* header file for Parity Logging */ - -#ifndef _RF__RF_PARITYLOGGING_H_ -#define _RF__RF_PARITYLOGGING_H_ - -int -rf_ConfigureParityLogging(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int rf_GetDefaultNumFloatingReconBuffersParityLogging(RF_Raid_t * raidPtr); -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitParityLogging(RF_Raid_t * raidPtr); -RF_RegionId_t -rf_MapRegionIDParityLogging(RF_Raid_t * raidPtr, - RF_SectorNum_t address); -void -rf_MapSectorParityLogging(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, - int remap); -void -rf_MapParityParityLogging(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, - int remap); -void -rf_MapLogParityLogging(RF_Raid_t * raidPtr, RF_RegionId_t regionID, - RF_SectorNum_t regionOffset, RF_RowCol_t * row, RF_RowCol_t * col, - RF_SectorNum_t * startSector); -void -rf_MapRegionParity(RF_Raid_t * raidPtr, RF_RegionId_t regionID, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * startSector, - RF_SectorCount_t * numSector); -void -rf_IdentifyStripeParityLogging(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -void -rf_MapSIDToPSIDParityLogging(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru); -void -rf_ParityLoggingDagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); - -#endif /* !_RF__RF_PARITYLOGGING_H_ */ diff --git a/sys/dev/raidframe/rf_parityloggingdags.c b/sys/dev/raidframe/rf_parityloggingdags.c deleted file mode 100644 index 30a5892..0000000 --- a/sys/dev/raidframe/rf_parityloggingdags.c +++ /dev/null @@ -1,675 +0,0 @@ -/* $NetBSD: rf_parityloggingdags.c,v 1.4 2000/01/07 03:41:04 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#include <dev/raidframe/rf_archs.h> - -#if RF_INCLUDE_PARITYLOGGING > 0 - -/* - DAGs specific to parity logging are created here - */ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_paritylog.h> -#include <dev/raidframe/rf_memchunk.h> -#include <dev/raidframe/rf_general.h> - -#include <dev/raidframe/rf_parityloggingdags.h> - -/****************************************************************************** - * - * creates a DAG to perform a large-write operation: - * - * / Rod \ / Wnd \ - * H -- NIL- Rod - NIL - Wnd ------ NIL - T - * \ Rod / \ Xor - Lpo / - * - * The writes are not done until the reads complete because if they were done in - * parallel, a failure on one of the reads could leave the parity in an inconsistent - * state, so that the retry with a new DAG would produce erroneous parity. - * - * Note: this DAG has the nasty property that none of the buffers allocated for reading - * old data can be freed until the XOR node fires. Need to fix this. - * - * The last two arguments are the number of faults tolerated, and function for the - * redundancy calculation. The undo for the redundancy calc is assumed to be null - * - *****************************************************************************/ - -void -rf_CommonCreateParityLoggingLargeWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - int nfaults, - int (*redFunc) (RF_DagNode_t *)) -{ - RF_DagNode_t *nodes, *wndNodes, *rodNodes = NULL, *syncNode, *xorNode, - *lpoNode, *blockNode, *unblockNode, *termNode; - int nWndNodes, nRodNodes, i; - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_AccessStripeMapHeader_t *new_asm_h[2]; - int nodeNum, asmNum; - RF_ReconUnitNum_t which_ru; - char *sosBuffer, *eosBuffer; - RF_PhysDiskAddr_t *pda; - RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru); - - if (rf_dagDebug) - printf("[Creating parity-logging large-write DAG]\n"); - RF_ASSERT(nfaults == 1);/* this arch only single fault tolerant */ - dag_h->creator = "ParityLoggingLargeWriteDAG"; - - /* alloc the Wnd nodes, the xor node, and the Lpo node */ - nWndNodes = asmap->numStripeUnitsAccessed; - RF_CallocAndAdd(nodes, nWndNodes + 6, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - i = 0; - wndNodes = &nodes[i]; - i += nWndNodes; - xorNode = &nodes[i]; - i += 1; - lpoNode = &nodes[i]; - i += 1; - blockNode = &nodes[i]; - i += 1; - syncNode = &nodes[i]; - i += 1; - unblockNode = &nodes[i]; - i += 1; - termNode = &nodes[i]; - i += 1; - - dag_h->numCommitNodes = nWndNodes + 1; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h, new_asm_h, &nRodNodes, &sosBuffer, &eosBuffer, allocList); - if (nRodNodes > 0) - RF_CallocAndAdd(rodNodes, nRodNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - - /* begin node initialization */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nRodNodes + 1, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nWndNodes + 1, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(syncNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nWndNodes + 1, nRodNodes + 1, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - - /* initialize the Rod nodes */ - for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) { - if (new_asm_h[asmNum]) { - pda = new_asm_h[asmNum]->stripeMap->physInfo; - while (pda) { - rf_InitNode(&rodNodes[nodeNum], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rod", allocList); - rodNodes[nodeNum].params[0].p = pda; - rodNodes[nodeNum].params[1].p = pda->bufPtr; - rodNodes[nodeNum].params[2].v = parityStripeID; - rodNodes[nodeNum].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - nodeNum++; - pda = pda->next; - } - } - } - RF_ASSERT(nodeNum == nRodNodes); - - /* initialize the wnd nodes */ - pda = asmap->physInfo; - for (i = 0; i < nWndNodes; i++) { - rf_InitNode(&wndNodes[i], rf_wait, RF_TRUE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList); - RF_ASSERT(pda != NULL); - wndNodes[i].params[0].p = pda; - wndNodes[i].params[1].p = pda->bufPtr; - wndNodes[i].params[2].v = parityStripeID; - wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - pda = pda->next; - } - - /* initialize the redundancy node */ - rf_InitNode(xorNode, rf_wait, RF_TRUE, redFunc, rf_NullNodeUndoFunc, NULL, 1, 1, 2 * (nWndNodes + nRodNodes) + 1, 1, dag_h, "Xr ", allocList); - xorNode->flags |= RF_DAGNODE_FLAG_YIELD; - for (i = 0; i < nWndNodes; i++) { - xorNode->params[2 * i + 0] = wndNodes[i].params[0]; /* pda */ - xorNode->params[2 * i + 1] = wndNodes[i].params[1]; /* buf ptr */ - } - for (i = 0; i < nRodNodes; i++) { - xorNode->params[2 * (nWndNodes + i) + 0] = rodNodes[i].params[0]; /* pda */ - xorNode->params[2 * (nWndNodes + i) + 1] = rodNodes[i].params[1]; /* buf ptr */ - } - xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr; /* xor node needs to get - * at RAID information */ - - /* look for an Rod node that reads a complete SU. If none, alloc a - * buffer to receive the parity info. Note that we can't use a new - * data buffer because it will not have gotten written when the xor - * occurs. */ - for (i = 0; i < nRodNodes; i++) - if (((RF_PhysDiskAddr_t *) rodNodes[i].params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit) - break; - if (i == nRodNodes) { - RF_CallocAndAdd(xorNode->results[0], 1, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (void *), allocList); - } else { - xorNode->results[0] = rodNodes[i].params[1].p; - } - - /* initialize the Lpo node */ - rf_InitNode(lpoNode, rf_wait, RF_FALSE, rf_ParityLogOverwriteFunc, rf_ParityLogOverwriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Lpo", allocList); - - lpoNode->params[0].p = asmap->parityInfo; - lpoNode->params[1].p = xorNode->results[0]; - RF_ASSERT(asmap->parityInfo->next == NULL); /* parityInfo must - * describe entire - * parity unit */ - - /* connect nodes to form graph */ - - /* connect dag header to block node */ - RF_ASSERT(dag_h->numSuccedents == 1); - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - /* connect the block node to the Rod nodes */ - RF_ASSERT(blockNode->numSuccedents == nRodNodes + 1); - for (i = 0; i < nRodNodes; i++) { - RF_ASSERT(rodNodes[i].numAntecedents == 1); - blockNode->succedents[i] = &rodNodes[i]; - rodNodes[i].antecedents[0] = blockNode; - rodNodes[i].antType[0] = rf_control; - } - - /* connect the block node to the sync node */ - /* necessary if nRodNodes == 0 */ - RF_ASSERT(syncNode->numAntecedents == nRodNodes + 1); - blockNode->succedents[nRodNodes] = syncNode; - syncNode->antecedents[0] = blockNode; - syncNode->antType[0] = rf_control; - - /* connect the Rod nodes to the syncNode */ - for (i = 0; i < nRodNodes; i++) { - rodNodes[i].succedents[0] = syncNode; - syncNode->antecedents[1 + i] = &rodNodes[i]; - syncNode->antType[1 + i] = rf_control; - } - - /* connect the sync node to the xor node */ - RF_ASSERT(syncNode->numSuccedents == nWndNodes + 1); - RF_ASSERT(xorNode->numAntecedents == 1); - syncNode->succedents[0] = xorNode; - xorNode->antecedents[0] = syncNode; - xorNode->antType[0] = rf_trueData; /* carry forward from sync */ - - /* connect the sync node to the Wnd nodes */ - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNodes->numAntecedents == 1); - syncNode->succedents[1 + i] = &wndNodes[i]; - wndNodes[i].antecedents[0] = syncNode; - wndNodes[i].antType[0] = rf_control; - } - - /* connect the xor node to the Lpo node */ - RF_ASSERT(xorNode->numSuccedents == 1); - RF_ASSERT(lpoNode->numAntecedents == 1); - xorNode->succedents[0] = lpoNode; - lpoNode->antecedents[0] = xorNode; - lpoNode->antType[0] = rf_trueData; - - /* connect the Wnd nodes to the unblock node */ - RF_ASSERT(unblockNode->numAntecedents == nWndNodes + 1); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNodes->numSuccedents == 1); - wndNodes[i].succedents[0] = unblockNode; - unblockNode->antecedents[i] = &wndNodes[i]; - unblockNode->antType[i] = rf_control; - } - - /* connect the Lpo node to the unblock node */ - RF_ASSERT(lpoNode->numSuccedents == 1); - lpoNode->succedents[0] = unblockNode; - unblockNode->antecedents[nWndNodes] = lpoNode; - unblockNode->antType[nWndNodes] = rf_control; - - /* connect unblock node to terminator */ - RF_ASSERT(unblockNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - unblockNode->succedents[0] = termNode; - termNode->antecedents[0] = unblockNode; - termNode->antType[0] = rf_control; -} - - - - -/****************************************************************************** - * - * creates a DAG to perform a small-write operation (either raid 5 or pq), which is as follows: - * - * Header - * | - * Block - * / | ... \ \ - * / | \ \ - * Rod Rod Rod Rop - * | \ /| \ / | \/ | - * | | | /\ | - * Wnd Wnd Wnd X - * | \ / | - * | \ / | - * \ \ / Lpo - * \ \ / / - * +-> Unblock <-+ - * | - * T - * - * - * R = Read, W = Write, X = Xor, o = old, n = new, d = data, p = parity. - * When the access spans a stripe unit boundary and is less than one SU in size, there will - * be two Rop -- X -- Wnp branches. I call this the "double-XOR" case. - * The second output from each Rod node goes to the X node. In the double-XOR - * case, there are exactly 2 Rod nodes, and each sends one output to one X node. - * There is one Rod -- Wnd -- T branch for each stripe unit being updated. - * - * The block and unblock nodes are unused. See comment above CreateFaultFreeReadDAG. - * - * Note: this DAG ignores all the optimizations related to making the RMWs atomic. - * it also has the nasty property that none of the buffers allocated for reading - * old data & parity can be freed until the XOR node fires. Need to fix this. - * - * A null qfuncs indicates single fault tolerant - *****************************************************************************/ - -void -rf_CommonCreateParityLoggingSmallWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - RF_RedFuncs_t * pfuncs, - RF_RedFuncs_t * qfuncs) -{ - RF_DagNode_t *xorNodes, *blockNode, *unblockNode, *nodes; - RF_DagNode_t *readDataNodes, *readParityNodes; - RF_DagNode_t *writeDataNodes, *lpuNodes; - RF_DagNode_t *unlockDataNodes = NULL, *termNode; - RF_PhysDiskAddr_t *pda = asmap->physInfo; - int numDataNodes = asmap->numStripeUnitsAccessed; - int numParityNodes = (asmap->parityInfo->next) ? 2 : 1; - int i, j, nNodes, totalNumNodes; - RF_ReconUnitNum_t which_ru; - int (*func) (RF_DagNode_t * node), (*undoFunc) (RF_DagNode_t * node); - int (*qfunc) (RF_DagNode_t * node); - char *name, *qname; - RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru); - long nfaults = qfuncs ? 2 : 1; - int lu_flag = (rf_enableAtomicRMW) ? 1 : 0; /* lock/unlock flag */ - - if (rf_dagDebug) - printf("[Creating parity-logging small-write DAG]\n"); - RF_ASSERT(numDataNodes > 0); - RF_ASSERT(nfaults == 1); - dag_h->creator = "ParityLoggingSmallWriteDAG"; - - /* DAG creation occurs in three steps: 1. count the number of nodes in - * the DAG 2. create the nodes 3. initialize the nodes 4. connect the - * nodes */ - - /* Step 1. compute number of nodes in the graph */ - - /* number of nodes: a read and write for each data unit a redundancy - * computation node for each parity node a read and Lpu for each - * parity unit a block and unblock node (2) a terminator node if - * atomic RMW an unlock node for each data unit, redundancy unit */ - totalNumNodes = (2 * numDataNodes) + numParityNodes + (2 * numParityNodes) + 3; - if (lu_flag) - totalNumNodes += numDataNodes; - - nNodes = numDataNodes + numParityNodes; - - dag_h->numCommitNodes = numDataNodes + numParityNodes; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* Step 2. create the nodes */ - RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - i = 0; - blockNode = &nodes[i]; - i += 1; - unblockNode = &nodes[i]; - i += 1; - readDataNodes = &nodes[i]; - i += numDataNodes; - readParityNodes = &nodes[i]; - i += numParityNodes; - writeDataNodes = &nodes[i]; - i += numDataNodes; - lpuNodes = &nodes[i]; - i += numParityNodes; - xorNodes = &nodes[i]; - i += numParityNodes; - termNode = &nodes[i]; - i += 1; - if (lu_flag) { - unlockDataNodes = &nodes[i]; - i += numDataNodes; - } - RF_ASSERT(i == totalNumNodes); - - /* Step 3. initialize the nodes */ - /* initialize block node (Nil) */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", allocList); - - /* initialize unblock node (Nil) */ - rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nNodes, 0, 0, dag_h, "Nil", allocList); - - /* initialize terminatory node (Trm) */ - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - - /* initialize nodes which read old data (Rod) */ - for (i = 0; i < numDataNodes; i++) { - rf_InitNode(&readDataNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, nNodes, 1, 4, 0, dag_h, "Rod", allocList); - RF_ASSERT(pda != NULL); - readDataNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - readDataNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old - * data */ - readDataNodes[i].params[2].v = parityStripeID; - readDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru); - pda = pda->next; - readDataNodes[i].propList[0] = NULL; - readDataNodes[i].propList[1] = NULL; - } - - /* initialize nodes which read old parity (Rop) */ - pda = asmap->parityInfo; - i = 0; - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(pda != NULL); - rf_InitNode(&readParityNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, nNodes, 1, 4, 0, dag_h, "Rop", allocList); - readParityNodes[i].params[0].p = pda; - readParityNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old - * parity */ - readParityNodes[i].params[2].v = parityStripeID; - readParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - readParityNodes[i].propList[0] = NULL; - pda = pda->next; - } - - /* initialize nodes which write new data (Wnd) */ - pda = asmap->physInfo; - for (i = 0; i < numDataNodes; i++) { - RF_ASSERT(pda != NULL); - rf_InitNode(&writeDataNodes[i], rf_wait, RF_TRUE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, nNodes, 4, 0, dag_h, "Wnd", allocList); - writeDataNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - writeDataNodes[i].params[1].p = pda->bufPtr; /* buffer holding new - * data to be written */ - writeDataNodes[i].params[2].v = parityStripeID; - writeDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - - if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockDataNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Und", allocList); - unlockDataNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - unlockDataNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru); - } - pda = pda->next; - } - - - /* initialize nodes which compute new parity */ - /* we use the simple XOR func in the double-XOR case, and when we're - * accessing only a portion of one stripe unit. the distinction - * between the two is that the regular XOR func assumes that the - * targbuf is a full SU in size, and examines the pda associated with - * the buffer to decide where within the buffer to XOR the data, - * whereas the simple XOR func just XORs the data into the start of - * the buffer. */ - if ((numParityNodes == 2) || ((numDataNodes == 1) && (asmap->totalSectorsAccessed < raidPtr->Layout.sectorsPerStripeUnit))) { - func = pfuncs->simple; - undoFunc = rf_NullNodeUndoFunc; - name = pfuncs->SimpleName; - if (qfuncs) { - qfunc = qfuncs->simple; - qname = qfuncs->SimpleName; - } - } else { - func = pfuncs->regular; - undoFunc = rf_NullNodeUndoFunc; - name = pfuncs->RegularName; - if (qfuncs) { - qfunc = qfuncs->regular; - qname = qfuncs->RegularName; - } - } - /* initialize the xor nodes: params are {pda,buf} from {Rod,Wnd,Rop} - * nodes, and raidPtr */ - if (numParityNodes == 2) { /* double-xor case */ - for (i = 0; i < numParityNodes; i++) { - rf_InitNode(&xorNodes[i], rf_wait, RF_TRUE, func, undoFunc, NULL, 1, nNodes, 7, 1, dag_h, name, allocList); /* no wakeup func for - * xor */ - xorNodes[i].flags |= RF_DAGNODE_FLAG_YIELD; - xorNodes[i].params[0] = readDataNodes[i].params[0]; - xorNodes[i].params[1] = readDataNodes[i].params[1]; - xorNodes[i].params[2] = readParityNodes[i].params[0]; - xorNodes[i].params[3] = readParityNodes[i].params[1]; - xorNodes[i].params[4] = writeDataNodes[i].params[0]; - xorNodes[i].params[5] = writeDataNodes[i].params[1]; - xorNodes[i].params[6].p = raidPtr; - xorNodes[i].results[0] = readParityNodes[i].params[1].p; /* use old parity buf as - * target buf */ - } - } else { - /* there is only one xor node in this case */ - rf_InitNode(&xorNodes[0], rf_wait, RF_TRUE, func, undoFunc, NULL, 1, nNodes, (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, name, allocList); - xorNodes[0].flags |= RF_DAGNODE_FLAG_YIELD; - for (i = 0; i < numDataNodes + 1; i++) { - /* set up params related to Rod and Rop nodes */ - xorNodes[0].params[2 * i + 0] = readDataNodes[i].params[0]; /* pda */ - xorNodes[0].params[2 * i + 1] = readDataNodes[i].params[1]; /* buffer pointer */ - } - for (i = 0; i < numDataNodes; i++) { - /* set up params related to Wnd and Wnp nodes */ - xorNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = writeDataNodes[i].params[0]; /* pda */ - xorNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = writeDataNodes[i].params[1]; /* buffer pointer */ - } - xorNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr; /* xor node needs to get - * at RAID information */ - xorNodes[0].results[0] = readParityNodes[0].params[1].p; - } - - /* initialize the log node(s) */ - pda = asmap->parityInfo; - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(pda); - rf_InitNode(&lpuNodes[i], rf_wait, RF_FALSE, rf_ParityLogUpdateFunc, rf_ParityLogUpdateUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Lpu", allocList); - lpuNodes[i].params[0].p = pda; /* PhysDiskAddr of parity */ - lpuNodes[i].params[1].p = xorNodes[i].results[0]; /* buffer pointer to - * parity */ - pda = pda->next; - } - - - /* Step 4. connect the nodes */ - - /* connect header to block node */ - RF_ASSERT(dag_h->numSuccedents == 1); - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - /* connect block node to read old data nodes */ - RF_ASSERT(blockNode->numSuccedents == (numDataNodes + numParityNodes)); - for (i = 0; i < numDataNodes; i++) { - blockNode->succedents[i] = &readDataNodes[i]; - RF_ASSERT(readDataNodes[i].numAntecedents == 1); - readDataNodes[i].antecedents[0] = blockNode; - readDataNodes[i].antType[0] = rf_control; - } - - /* connect block node to read old parity nodes */ - for (i = 0; i < numParityNodes; i++) { - blockNode->succedents[numDataNodes + i] = &readParityNodes[i]; - RF_ASSERT(readParityNodes[i].numAntecedents == 1); - readParityNodes[i].antecedents[0] = blockNode; - readParityNodes[i].antType[0] = rf_control; - } - - /* connect read old data nodes to write new data nodes */ - for (i = 0; i < numDataNodes; i++) { - RF_ASSERT(readDataNodes[i].numSuccedents == numDataNodes + numParityNodes); - for (j = 0; j < numDataNodes; j++) { - RF_ASSERT(writeDataNodes[j].numAntecedents == numDataNodes + numParityNodes); - readDataNodes[i].succedents[j] = &writeDataNodes[j]; - writeDataNodes[j].antecedents[i] = &readDataNodes[i]; - if (i == j) - writeDataNodes[j].antType[i] = rf_antiData; - else - writeDataNodes[j].antType[i] = rf_control; - } - } - - /* connect read old data nodes to xor nodes */ - for (i = 0; i < numDataNodes; i++) - for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(xorNodes[j].numAntecedents == numDataNodes + numParityNodes); - readDataNodes[i].succedents[numDataNodes + j] = &xorNodes[j]; - xorNodes[j].antecedents[i] = &readDataNodes[i]; - xorNodes[j].antType[i] = rf_trueData; - } - - /* connect read old parity nodes to write new data nodes */ - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(readParityNodes[i].numSuccedents == numDataNodes + numParityNodes); - for (j = 0; j < numDataNodes; j++) { - readParityNodes[i].succedents[j] = &writeDataNodes[j]; - writeDataNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i]; - writeDataNodes[j].antType[numDataNodes + i] = rf_control; - } - } - - /* connect read old parity nodes to xor nodes */ - for (i = 0; i < numParityNodes; i++) - for (j = 0; j < numParityNodes; j++) { - readParityNodes[i].succedents[numDataNodes + j] = &xorNodes[j]; - xorNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i]; - xorNodes[j].antType[numDataNodes + i] = rf_trueData; - } - - /* connect xor nodes to write new parity nodes */ - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(xorNodes[i].numSuccedents == 1); - RF_ASSERT(lpuNodes[i].numAntecedents == 1); - xorNodes[i].succedents[0] = &lpuNodes[i]; - lpuNodes[i].antecedents[0] = &xorNodes[i]; - lpuNodes[i].antType[0] = rf_trueData; - } - - for (i = 0; i < numDataNodes; i++) { - if (lu_flag) { - /* connect write new data nodes to unlock nodes */ - RF_ASSERT(writeDataNodes[i].numSuccedents == 1); - RF_ASSERT(unlockDataNodes[i].numAntecedents == 1); - writeDataNodes[i].succedents[0] = &unlockDataNodes[i]; - unlockDataNodes[i].antecedents[0] = &writeDataNodes[i]; - unlockDataNodes[i].antType[0] = rf_control; - - /* connect unlock nodes to unblock node */ - RF_ASSERT(unlockDataNodes[i].numSuccedents == 1); - RF_ASSERT(unblockNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); - unlockDataNodes[i].succedents[0] = unblockNode; - unblockNode->antecedents[i] = &unlockDataNodes[i]; - unblockNode->antType[i] = rf_control; - } else { - /* connect write new data nodes to unblock node */ - RF_ASSERT(writeDataNodes[i].numSuccedents == 1); - RF_ASSERT(unblockNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); - writeDataNodes[i].succedents[0] = unblockNode; - unblockNode->antecedents[i] = &writeDataNodes[i]; - unblockNode->antType[i] = rf_control; - } - } - - /* connect write new parity nodes to unblock node */ - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(lpuNodes[i].numSuccedents == 1); - lpuNodes[i].succedents[0] = unblockNode; - unblockNode->antecedents[numDataNodes + i] = &lpuNodes[i]; - unblockNode->antType[numDataNodes + i] = rf_control; - } - - /* connect unblock node to terminator */ - RF_ASSERT(unblockNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - unblockNode->succedents[0] = termNode; - termNode->antecedents[0] = unblockNode; - termNode->antType[0] = rf_control; -} - - -void -rf_CreateParityLoggingSmallWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - RF_RedFuncs_t * pfuncs, - RF_RedFuncs_t * qfuncs) -{ - dag_h->creator = "ParityLoggingSmallWriteDAG"; - rf_CommonCreateParityLoggingSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_xorFuncs, NULL); -} - - -void -rf_CreateParityLoggingLargeWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - int nfaults, - int (*redFunc) (RF_DagNode_t *)) -{ - dag_h->creator = "ParityLoggingSmallWriteDAG"; - rf_CommonCreateParityLoggingLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularXorFunc); -} -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ diff --git a/sys/dev/raidframe/rf_parityloggingdags.h b/sys/dev/raidframe/rf_parityloggingdags.h deleted file mode 100644 index dc0fc9b..0000000 --- a/sys/dev/raidframe/rf_parityloggingdags.h +++ /dev/null @@ -1,59 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_parityloggingdags.h,v 1.3 1999/02/05 00:06:14 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/**************************************************************************** - * * - * rf_parityloggingdags.h -- header file for parity logging dags * - * * - ****************************************************************************/ - -#ifndef _RF__RF_PARITYLOGGINGDAGS_H_ -#define _RF__RF_PARITYLOGGINGDAGS_H_ - -/* routines that create DAGs */ -void -rf_CommonCreateParityLoggingLargeWriteDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, - void *bp, RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, - int nfaults, int (*redFunc) (RF_DagNode_t *)); - void rf_CommonCreateParityLoggingSmallWriteDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, - void *bp, RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, - RF_RedFuncs_t * pfuncs, RF_RedFuncs_t * qfuncs); - - void rf_CreateParityLoggingLargeWriteDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, - void *bp, RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, - int nfaults, int (*redFunc) (RF_DagNode_t *)); - void rf_CreateParityLoggingSmallWriteDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, - void *bp, RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, - RF_RedFuncs_t * pfuncs, RF_RedFuncs_t * qfuncs); - -#endif /* !_RF__RF_PARITYLOGGINGDAGS_H_ */ diff --git a/sys/dev/raidframe/rf_parityscan.c b/sys/dev/raidframe/rf_parityscan.c deleted file mode 100644 index 34834cb..0000000 --- a/sys/dev/raidframe/rf_parityscan.c +++ /dev/null @@ -1,445 +0,0 @@ -/* $NetBSD: rf_parityscan.c,v 1.9 2000/05/28 03:00:31 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************** - * - * rf_parityscan.c -- misc utilities related to parity verification - * - *****************************************************************************/ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_mcpair.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_engine.h> -#include <dev/raidframe/rf_parityscan.h> -#include <dev/raidframe/rf_map.h> -#include <dev/raidframe/rf_kintf.h> - -/***************************************************************************************** - * - * walk through the entire arry and write new parity. - * This works by creating two DAGs, one to read a stripe of data and one to - * write new parity. The first is executed, the data is xored together, and - * then the second is executed. To avoid constantly building and tearing down - * the DAGs, we create them a priori and fill them in with the mapping - * information as we go along. - * - * there should never be more than one thread running this. - * - ****************************************************************************************/ - -int -rf_RewriteParity(raidPtr) - RF_Raid_t *raidPtr; -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_AccessStripeMapHeader_t *asm_h; - int ret_val; - int rc; - RF_PhysDiskAddr_t pda; - RF_SectorNum_t i; - - if (raidPtr->Layout.map->faultsTolerated == 0) { - /* There isn't any parity. Call it "okay." */ - return (RF_PARITY_OKAY); - } - if (raidPtr->status[0] != rf_rs_optimal) { - /* - * We're in degraded mode. Don't try to verify parity now! - * XXX: this should be a "we don't want to", not a - * "we can't" error. - */ - return (RF_PARITY_COULD_NOT_VERIFY); - } - - ret_val = 0; - - pda.startSector = 0; - pda.numSector = raidPtr->Layout.sectorsPerStripeUnit; - rc = RF_PARITY_OKAY; - - for (i = 0; i < raidPtr->totalSectors && - rc <= RF_PARITY_CORRECTED; - i += layoutPtr->dataSectorsPerStripe) { - if (raidPtr->waitShutdown) { - /* Someone is pulling the plug on this set... - abort the re-write */ - return (1); - } - asm_h = rf_MapAccess(raidPtr, i, - layoutPtr->dataSectorsPerStripe, - NULL, RF_DONT_REMAP); - raidPtr->parity_rewrite_stripes_done = - i / layoutPtr->dataSectorsPerStripe ; - rc = rf_VerifyParity(raidPtr, asm_h->stripeMap, 1, 0); - - switch (rc) { - case RF_PARITY_OKAY: - case RF_PARITY_CORRECTED: - break; - case RF_PARITY_BAD: - printf("Parity bad during correction\n"); - ret_val = 1; - break; - case RF_PARITY_COULD_NOT_CORRECT: - printf("Could not correct bad parity\n"); - ret_val = 1; - break; - case RF_PARITY_COULD_NOT_VERIFY: - printf("Could not verify parity\n"); - ret_val = 1; - break; - default: - printf("Bad rc=%d from VerifyParity in RewriteParity\n", rc); - ret_val = 1; - } - rf_FreeAccessStripeMap(asm_h); - } - return (ret_val); -} -/***************************************************************************************** - * - * verify that the parity in a particular stripe is correct. - * we validate only the range of parity defined by parityPDA, since - * this is all we have locked. The way we do this is to create an asm - * that maps the whole stripe and then range-restrict it to the parity - * region defined by the parityPDA. - * - ****************************************************************************************/ -int -rf_VerifyParity(raidPtr, aasm, correct_it, flags) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *aasm; - int correct_it; - RF_RaidAccessFlags_t flags; -{ - RF_PhysDiskAddr_t *parityPDA; - RF_AccessStripeMap_t *doasm; - RF_LayoutSW_t *lp; - int lrc, rc; - - lp = raidPtr->Layout.map; - if (lp->faultsTolerated == 0) { - /* - * There isn't any parity. Call it "okay." - */ - return (RF_PARITY_OKAY); - } - rc = RF_PARITY_OKAY; - if (lp->VerifyParity) { - for (doasm = aasm; doasm; doasm = doasm->next) { - for (parityPDA = doasm->parityInfo; parityPDA; - parityPDA = parityPDA->next) { - lrc = lp->VerifyParity(raidPtr, - doasm->raidAddress, - parityPDA, - correct_it, flags); - if (lrc > rc) { - /* see rf_parityscan.h for why this - * works */ - rc = lrc; - } - } - } - } else { - rc = RF_PARITY_COULD_NOT_VERIFY; - } - return (rc); -} - -int -rf_VerifyParityBasic(raidPtr, raidAddr, parityPDA, correct_it, flags) - RF_Raid_t *raidPtr; - RF_RaidAddr_t raidAddr; - RF_PhysDiskAddr_t *parityPDA; - int correct_it; - RF_RaidAccessFlags_t flags; -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_RaidAddr_t startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, - raidAddr); - RF_SectorCount_t numsector = parityPDA->numSector; - int numbytes = rf_RaidAddressToByte(raidPtr, numsector); - int bytesPerStripe = numbytes * layoutPtr->numDataCol; - RF_DagHeader_t *rd_dag_h, *wr_dag_h; /* read, write dag */ - RF_DagNode_t *blockNode, *unblockNode, *wrBlock, *wrUnblock; - RF_AccessStripeMapHeader_t *asm_h; - RF_AccessStripeMap_t *asmap; - RF_AllocListElem_t *alloclist; - RF_PhysDiskAddr_t *pda; - char *pbuf, *buf, *end_p, *p; - int i, retcode; - RF_ReconUnitNum_t which_ru; - RF_StripeNum_t psID = rf_RaidAddressToParityStripeID(layoutPtr, - raidAddr, - &which_ru); - int stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; - RF_AccTraceEntry_t tracerec; - RF_MCPair_t *mcpair; - - retcode = RF_PARITY_OKAY; - - mcpair = rf_AllocMCPair(); - rf_MakeAllocList(alloclist); - RF_MallocAndAdd(buf, numbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol), (char *), alloclist); - RF_CallocAndAdd(pbuf, 1, numbytes, (char *), alloclist); /* use calloc to make - * sure buffer is zeroed */ - end_p = buf + bytesPerStripe; - - rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, numbytes, buf, rf_DiskReadFunc, rf_DiskReadUndoFunc, - "Rod", alloclist, flags, RF_IO_NORMAL_PRIORITY); - blockNode = rd_dag_h->succedents[0]; - unblockNode = blockNode->succedents[0]->succedents[0]; - - /* map the stripe and fill in the PDAs in the dag */ - asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, buf, RF_DONT_REMAP); - asmap = asm_h->stripeMap; - - for (pda = asmap->physInfo, i = 0; i < layoutPtr->numDataCol; i++, pda = pda->next) { - RF_ASSERT(pda); - rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); - RF_ASSERT(pda->numSector != 0); - if (rf_TryToRedirectPDA(raidPtr, pda, 0)) - goto out; /* no way to verify parity if disk is - * dead. return w/ good status */ - blockNode->succedents[i]->params[0].p = pda; - blockNode->succedents[i]->params[2].v = psID; - blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - - RF_ASSERT(!asmap->parityInfo->next); - rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->parityInfo, 0, 1); - RF_ASSERT(asmap->parityInfo->numSector != 0); - if (rf_TryToRedirectPDA(raidPtr, asmap->parityInfo, 1)) - goto out; - blockNode->succedents[layoutPtr->numDataCol]->params[0].p = asmap->parityInfo; - - /* fire off the DAG */ - bzero((char *) &tracerec, sizeof(tracerec)); - rd_dag_h->tracerec = &tracerec; - - if (rf_verifyParityDebug) { - printf("Parity verify read dag:\n"); - rf_PrintDAGList(rd_dag_h); - } - RF_LOCK_MUTEX(mcpair->mutex); - mcpair->flag = 0; - rf_DispatchDAG(rd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, - (void *) mcpair); - while (!mcpair->flag) - RF_WAIT_COND(mcpair->cond, mcpair->mutex); - RF_UNLOCK_MUTEX(mcpair->mutex); - if (rd_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to verify parity: can't read the stripe\n"); - retcode = RF_PARITY_COULD_NOT_VERIFY; - goto out; - } - for (p = buf; p < end_p; p += numbytes) { - rf_bxor(p, pbuf, numbytes, NULL); - } - for (i = 0; i < numbytes; i++) { -#if 0 - if (pbuf[i] != 0 || buf[bytesPerStripe + i] != 0) { - printf("Bytes: %d %d %d\n", i, pbuf[i], buf[bytesPerStripe + i]); - } -#endif - if (pbuf[i] != buf[bytesPerStripe + i]) { - if (!correct_it) - RF_ERRORMSG3("Parity verify error: byte %d of parity is 0x%x should be 0x%x\n", - i, (u_char) buf[bytesPerStripe + i], (u_char) pbuf[i]); - retcode = RF_PARITY_BAD; - break; - } - } - - if (retcode && correct_it) { - wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, pbuf, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - "Wnp", alloclist, flags, RF_IO_NORMAL_PRIORITY); - wrBlock = wr_dag_h->succedents[0]; - wrUnblock = wrBlock->succedents[0]->succedents[0]; - wrBlock->succedents[0]->params[0].p = asmap->parityInfo; - wrBlock->succedents[0]->params[2].v = psID; - wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - bzero((char *) &tracerec, sizeof(tracerec)); - wr_dag_h->tracerec = &tracerec; - if (rf_verifyParityDebug) { - printf("Parity verify write dag:\n"); - rf_PrintDAGList(wr_dag_h); - } - RF_LOCK_MUTEX(mcpair->mutex); - mcpair->flag = 0; - rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, - (void *) mcpair); - while (!mcpair->flag) - RF_WAIT_COND(mcpair->cond, mcpair->mutex); - RF_UNLOCK_MUTEX(mcpair->mutex); - if (wr_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to correct parity in VerifyParity: can't write the stripe\n"); - retcode = RF_PARITY_COULD_NOT_CORRECT; - } - rf_FreeDAG(wr_dag_h); - if (retcode == RF_PARITY_BAD) - retcode = RF_PARITY_CORRECTED; - } -out: - rf_FreeAccessStripeMap(asm_h); - rf_FreeAllocList(alloclist); - rf_FreeDAG(rd_dag_h); - rf_FreeMCPair(mcpair); - return (retcode); -} - -int -rf_TryToRedirectPDA(raidPtr, pda, parity) - RF_Raid_t *raidPtr; - RF_PhysDiskAddr_t *pda; - int parity; -{ - if (raidPtr->Disks[pda->row][pda->col].status == rf_ds_reconstructing) { - if (rf_CheckRUReconstructed(raidPtr->reconControl[pda->row]->reconMap, pda->startSector)) { - if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { - RF_RowCol_t or = pda->row, oc = pda->col; - RF_SectorNum_t os = pda->startSector; - if (parity) { - (raidPtr->Layout.map->MapParity) (raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP); - if (rf_verifyParityDebug) - printf("VerifyParity: Redir P r %d c %d sect %ld -> r %d c %d sect %ld\n", - or, oc, (long) os, pda->row, pda->col, (long) pda->startSector); - } else { - (raidPtr->Layout.map->MapSector) (raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP); - if (rf_verifyParityDebug) - printf("VerifyParity: Redir D r %d c %d sect %ld -> r %d c %d sect %ld\n", - or, oc, (long) os, pda->row, pda->col, (long) pda->startSector); - } - } else { - RF_RowCol_t spRow = raidPtr->Disks[pda->row][pda->col].spareRow; - RF_RowCol_t spCol = raidPtr->Disks[pda->row][pda->col].spareCol; - pda->row = spRow; - pda->col = spCol; - } - } - } - if (RF_DEAD_DISK(raidPtr->Disks[pda->row][pda->col].status)) - return (1); - return (0); -} -/***************************************************************************************** - * - * currently a stub. - * - * takes as input an ASM describing a write operation and containing one failure, and - * verifies that the parity was correctly updated to reflect the write. - * - * if it's a data unit that's failed, we read the other data units in the stripe and - * the parity unit, XOR them together, and verify that we get the data intended for - * the failed disk. Since it's easy, we also validate that the right data got written - * to the surviving data disks. - * - * If it's the parity that failed, there's really no validation we can do except the - * above verification that the right data got written to all disks. This is because - * the new data intended for the failed disk is supplied in the ASM, but this is of - * course not the case for the new parity. - * - ****************************************************************************************/ -int -rf_VerifyDegrModeWrite(raidPtr, asmh) - RF_Raid_t *raidPtr; - RF_AccessStripeMapHeader_t *asmh; -{ - return (0); -} -/* creates a simple DAG with a header, a block-recon node at level 1, - * nNodes nodes at level 2, an unblock-recon node at level 3, and - * a terminator node at level 4. The stripe address field in - * the block and unblock nodes are not touched, nor are the pda - * fields in the second-level nodes, so they must be filled in later. - * - * commit point is established at unblock node - this means that any - * failure during dag execution causes the dag to fail - */ -RF_DagHeader_t * -rf_MakeSimpleDAG(raidPtr, nNodes, bytesPerSU, databuf, doFunc, undoFunc, name, alloclist, flags, priority) - RF_Raid_t *raidPtr; - int nNodes; - int bytesPerSU; - char *databuf; - int (*doFunc) (RF_DagNode_t * node); - int (*undoFunc) (RF_DagNode_t * node); - char *name; /* node names at the second level */ - RF_AllocListElem_t *alloclist; - RF_RaidAccessFlags_t flags; - int priority; -{ - RF_DagHeader_t *dag_h; - RF_DagNode_t *nodes, *termNode, *blockNode, *unblockNode; - int i; - - /* create the nodes, the block & unblock nodes, and the terminator - * node */ - RF_CallocAndAdd(nodes, nNodes + 3, sizeof(RF_DagNode_t), (RF_DagNode_t *), alloclist); - blockNode = &nodes[nNodes]; - unblockNode = blockNode + 1; - termNode = unblockNode + 1; - - dag_h = rf_AllocDAGHeader(); - dag_h->raidPtr = (void *) raidPtr; - dag_h->allocList = NULL;/* we won't use this alloc list */ - dag_h->status = rf_enable; - dag_h->numSuccedents = 1; - dag_h->creator = "SimpleDAG"; - - /* this dag can not commit until the unblock node is reached errors - * prior to the commit point imply the dag has failed */ - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - - dag_h->succedents[0] = blockNode; - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", alloclist); - rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nNodes, 0, 0, dag_h, "Nil", alloclist); - unblockNode->succedents[0] = termNode; - for (i = 0; i < nNodes; i++) { - blockNode->succedents[i] = unblockNode->antecedents[i] = &nodes[i]; - unblockNode->antType[i] = rf_control; - rf_InitNode(&nodes[i], rf_wait, RF_FALSE, doFunc, undoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, alloclist); - nodes[i].succedents[0] = unblockNode; - nodes[i].antecedents[0] = blockNode; - nodes[i].antType[0] = rf_control; - nodes[i].params[1].p = (databuf + (i * bytesPerSU)); - } - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", alloclist); - termNode->antecedents[0] = unblockNode; - termNode->antType[0] = rf_control; - return (dag_h); -} diff --git a/sys/dev/raidframe/rf_parityscan.h b/sys/dev/raidframe/rf_parityscan.h deleted file mode 100644 index babca41..0000000 --- a/sys/dev/raidframe/rf_parityscan.h +++ /dev/null @@ -1,67 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_parityscan.h,v 1.3 1999/02/05 00:06:14 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_PARITYSCAN_H_ -#define _RF__RF_PARITYSCAN_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_alloclist.h> - -int rf_RewriteParity(RF_Raid_t * raidPtr); -int -rf_VerifyParityBasic(RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t * parityPDA, int correct_it, RF_RaidAccessFlags_t flags); -int -rf_VerifyParity(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * stripeMap, - int correct_it, RF_RaidAccessFlags_t flags); -int rf_TryToRedirectPDA(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * pda, int parity); -int rf_VerifyDegrModeWrite(RF_Raid_t * raidPtr, RF_AccessStripeMapHeader_t * asmh); -RF_DagHeader_t * -rf_MakeSimpleDAG(RF_Raid_t * raidPtr, int nNodes, - int bytesPerSU, char *databuf, - int (*doFunc) (RF_DagNode_t *), - int (*undoFunc) (RF_DagNode_t *), - char *name, RF_AllocListElem_t * alloclist, - RF_RaidAccessFlags_t flags, int priority); - -#define RF_DO_CORRECT_PARITY 1 -#define RF_DONT_CORRECT_PARITY 0 - -/* - * Return vals for VerifyParity operation - * - * Ordering is important here. - */ -#define RF_PARITY_OKAY 0 /* or no parity information */ -#define RF_PARITY_CORRECTED 1 -#define RF_PARITY_BAD 2 -#define RF_PARITY_COULD_NOT_CORRECT 3 -#define RF_PARITY_COULD_NOT_VERIFY 4 - -#endif /* !_RF__RF_PARITYSCAN_H_ */ diff --git a/sys/dev/raidframe/rf_pq.c b/sys/dev/raidframe/rf_pq.c deleted file mode 100644 index 98b53e8..0000000 --- a/sys/dev/raidframe/rf_pq.c +++ /dev/null @@ -1,928 +0,0 @@ -/* $NetBSD: rf_pq.c,v 1.7 2000/01/07 03:41:02 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Daniel Stodolsky - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * Code for RAID level 6 (P + Q) disk array architecture. - */ - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagffrd.h> -#include <dev/raidframe/rf_dagffwr.h> -#include <dev/raidframe/rf_dagdegrd.h> -#include <dev/raidframe/rf_dagdegwr.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_pqdeg.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_map.h> -#include <dev/raidframe/rf_pq.h> - -RF_RedFuncs_t rf_pFuncs = {rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P"}; -RF_RedFuncs_t rf_pRecoveryFuncs = {rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func"}; - -int -rf_RegularONPFunc(node) - RF_DagNode_t *node; -{ - return (rf_RegularXorFunc(node)); -} -/* - same as simpleONQ func, but the coefficient is always 1 -*/ - -int -rf_SimpleONPFunc(node) - RF_DagNode_t *node; -{ - return (rf_SimpleXorFunc(node)); -} - -int -rf_RecoveryPFunc(node) - RF_DagNode_t *node; -{ - return (rf_RecoveryXorFunc(node)); -} - -int -rf_RegularPFunc(node) - RF_DagNode_t *node; -{ - return (rf_RegularXorFunc(node)); -} -#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) - -static void -QDelta(char *dest, char *obuf, char *nbuf, unsigned length, - unsigned char coeff); -static void -rf_InvertQ(unsigned long *qbuf, unsigned long *abuf, - unsigned length, unsigned coeff); - -RF_RedFuncs_t rf_qFuncs = {rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q"}; -RF_RedFuncs_t rf_qRecoveryFuncs = {rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func"}; -RF_RedFuncs_t rf_pqRecoveryFuncs = {rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func"}; - -void -rf_PQDagSelect( - RF_Raid_t * raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t * asmap, - RF_VoidFuncPtr * createFunc) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - unsigned ndfail = asmap->numDataFailed; - unsigned npfail = asmap->numParityFailed; - unsigned ntfail = npfail + ndfail; - - RF_ASSERT(RF_IO_IS_R_OR_W(type)); - if (ntfail > 2) { - RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n"); - /* *infoFunc = */ *createFunc = NULL; - return; - } - /* ok, we can do this I/O */ - if (type == RF_IO_TYPE_READ) { - switch (ndfail) { - case 0: - /* fault free read */ - *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; /* same as raid 5 */ - break; - case 1: - /* lost a single data unit */ - /* two cases: (1) parity is not lost. do a normal raid - * 5 reconstruct read. (2) parity is lost. do a - * reconstruct read using "q". */ - if (ntfail == 2) { /* also lost redundancy */ - if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) - *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateReadDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateReadDAG; - } else { - /* P and Q are ok. But is there a failure in - * some unaccessed data unit? */ - if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2) - *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateReadDAG; - } - break; - case 2: - /* lost two data units */ - /* *infoFunc = PQOneTwo; */ - *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG; - break; - } - return; - } - /* a write */ - switch (ntfail) { - case 0: /* fault free */ - if (rf_suppressLocksAndLargeWrites || - (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) || - (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) { - - *createFunc = (RF_VoidFuncPtr) rf_PQCreateSmallWriteDAG; - } else { - *createFunc = (RF_VoidFuncPtr) rf_PQCreateLargeWriteDAG; - } - break; - - case 1: /* single disk fault */ - if (npfail == 1) { - RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q)); - if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) { /* q died, treat like - * normal mode raid5 - * write. */ - if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) - || rf_NumFailedDataUnitsInStripe(raidPtr, asmap)) - *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateSmallWriteDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateLargeWriteDAG; - } else {/* parity died, small write only updating Q */ - if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) - || rf_NumFailedDataUnitsInStripe(raidPtr, asmap)) - *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateSmallWriteDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateLargeWriteDAG; - } - } else { /* data missing. Do a P reconstruct write if - * only a single data unit is lost in the - * stripe, otherwise a PQ reconstruct write. */ - if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2) - *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateWriteDAG; - } - break; - - case 2: /* two disk faults */ - switch (npfail) { - case 2: /* both p and q dead */ - *createFunc = (RF_VoidFuncPtr) rf_PQ_011_CreateWriteDAG; - break; - case 1: /* either p or q and dead data */ - RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA); - RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)); - if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q) - *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateWriteDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateWriteDAG; - break; - case 0: /* double data loss */ - *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG; - break; - } - break; - - default: /* more than 2 disk faults */ - *createFunc = NULL; - RF_PANIC(); - } - return; -} -/* - Used as a stop gap info function -*/ -#if 0 -static void -PQOne(raidPtr, nSucc, nAnte, asmap) - RF_Raid_t *raidPtr; - int *nSucc; - int *nAnte; - RF_AccessStripeMap_t *asmap; -{ - *nSucc = *nAnte = 1; -} - -static void -PQOneTwo(raidPtr, nSucc, nAnte, asmap) - RF_Raid_t *raidPtr; - int *nSucc; - int *nAnte; - RF_AccessStripeMap_t *asmap; -{ - *nSucc = 1; - *nAnte = 2; -} -#endif - -RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG) -{ - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2, - rf_RegularPQFunc, RF_FALSE); -} - -int -rf_RegularONQFunc(node) - RF_DagNode_t *node; -{ - int np = node->numParams; - int d; - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; - int i; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - char *qbuf, *qpbuf; - char *obuf, *nbuf; - RF_PhysDiskAddr_t *old, *new; - unsigned long coeff; - unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; - - RF_ETIMER_START(timer); - - d = (np - 3) / 4; - RF_ASSERT(4 * d + 3 == np); - qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */ - for (i = 0; i < d; i++) { - old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; - obuf = (char *) node->params[2 * i + 1].p; - new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p; - nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p; - RF_ASSERT(new->numSector == old->numSector); - RF_ASSERT(new->raidAddress == old->raidAddress); - /* the stripe unit within the stripe tells us the coefficient - * to use for the multiply. */ - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress); - /* compute the data unit offset within the column, then add - * one */ - coeff = (coeff % raidPtr->Layout.numDataCol); - qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU); - QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); - } - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->q_us += RF_ETIMER_VAL_US(timer); - rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no - * I/O in this node */ - return (0); -} -/* - See the SimpleXORFunc for the difference between a simple and regular func. - These Q functions should be used for - - new q = Q(data,old data,old q) - - style updates and not for - - q = ( new data, new data, .... ) - - computations. - - The simple q takes 2(2d+1)+1 params, where d is the number - of stripes written. The order of params is - old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... old data pda_d, old data buffer_d - [2d] old q pda_0, old q buffer - [2d_2] new data pda_0, new data buffer_0, ... new data pda_d, new data buffer_d - raidPtr -*/ - -int -rf_SimpleONQFunc(node) - RF_DagNode_t *node; -{ - int np = node->numParams; - int d; - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; - int i; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - char *qbuf; - char *obuf, *nbuf; - RF_PhysDiskAddr_t *old, *new; - unsigned long coeff; - - RF_ETIMER_START(timer); - - d = (np - 3) / 4; - RF_ASSERT(4 * d + 3 == np); - qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */ - for (i = 0; i < d; i++) { - old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; - obuf = (char *) node->params[2 * i + 1].p; - new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p; - nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p; - RF_ASSERT(new->numSector == old->numSector); - RF_ASSERT(new->raidAddress == old->raidAddress); - /* the stripe unit within the stripe tells us the coefficient - * to use for the multiply. */ - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress); - /* compute the data unit offset within the column, then add - * one */ - coeff = (coeff % raidPtr->Layout.numDataCol); - QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); - } - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->q_us += RF_ETIMER_VAL_US(timer); - rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no - * I/O in this node */ - return (0); -} -RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG) -{ - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs); -} - -static void RegularQSubr(RF_DagNode_t *node, char *qbuf); - -static void -RegularQSubr(node, qbuf) - RF_DagNode_t *node; - char *qbuf; -{ - int np = node->numParams; - int d; - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; - unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; - int i; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - char *obuf, *qpbuf; - RF_PhysDiskAddr_t *old; - unsigned long coeff; - - RF_ETIMER_START(timer); - - d = (np - 1) / 2; - RF_ASSERT(2 * d + 1 == np); - for (i = 0; i < d; i++) { - old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; - obuf = (char *) node->params[2 * i + 1].p; - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress); - /* compute the data unit offset within the column, then add - * one */ - coeff = (coeff % raidPtr->Layout.numDataCol); - /* the input buffers may not all be aligned with the start of - * the stripe. so shift by their sector offset within the - * stripe unit */ - qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU); - rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); - } - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->q_us += RF_ETIMER_VAL_US(timer); -} -/* - used in degraded writes. -*/ - -static void DegrQSubr(RF_DagNode_t *node); - -static void -DegrQSubr(node) - RF_DagNode_t *node; -{ - int np = node->numParams; - int d; - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; - unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; - int i; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - char *qbuf = node->results[1]; - char *obuf, *qpbuf; - RF_PhysDiskAddr_t *old; - unsigned long coeff; - unsigned fail_start; - int j; - - old = (RF_PhysDiskAddr_t *) node->params[np - 2].p; - fail_start = old->startSector % secPerSU; - - RF_ETIMER_START(timer); - - d = (np - 2) / 2; - RF_ASSERT(2 * d + 2 == np); - for (i = 0; i < d; i++) { - old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; - obuf = (char *) node->params[2 * i + 1].p; - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress); - /* compute the data unit offset within the column, then add - * one */ - coeff = (coeff % raidPtr->Layout.numDataCol); - /* the input buffers may not all be aligned with the start of - * the stripe. so shift by their sector offset within the - * stripe unit */ - j = old->startSector % secPerSU; - RF_ASSERT(j >= fail_start); - qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start); - rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); - } - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->q_us += RF_ETIMER_VAL_US(timer); -} -/* - Called by large write code to compute the new parity and the new q. - - structure of the params: - - pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d ( d = numDataCol - raidPtr - - for a total of 2d+1 arguments. - The result buffers results[0], results[1] are the buffers for the p and q, - respectively. - - We compute Q first, then compute P. The P calculation may try to reuse - one of the input buffers for its output, so if we computed P first, we would - corrupt the input for the q calculation. -*/ - -int -rf_RegularPQFunc(node) - RF_DagNode_t *node; -{ - RegularQSubr(node, node->results[1]); - return (rf_RegularXorFunc(node)); /* does the wakeup */ -} - -int -rf_RegularQFunc(node) - RF_DagNode_t *node; -{ - /* Almost ... adjust Qsubr args */ - RegularQSubr(node, node->results[0]); - rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no - * I/O in this node */ - return (0); -} -/* - Called by singly degraded write code to compute the new parity and the new q. - - structure of the params: - - pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d - failedPDA raidPtr - - for a total of 2d+2 arguments. - The result buffers results[0], results[1] are the buffers for the parity and q, - respectively. - - We compute Q first, then compute parity. The parity calculation may try to reuse - one of the input buffers for its output, so if we computed parity first, we would - corrupt the input for the q calculation. - - We treat this identically to the regularPQ case, ignoring the failedPDA extra argument. -*/ - -void -rf_Degraded_100_PQFunc(node) - RF_DagNode_t *node; -{ - int np = node->numParams; - - RF_ASSERT(np >= 2); - DegrQSubr(node); - rf_RecoveryXorFunc(node); -} - - -/* - The two below are used when reading a stripe with a single lost data unit. - The parameters are - - pda_0, buffer_0, .... pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr - - and results[0] contains the data buffer. Which is originally zero-filled. - -*/ - -/* this Q func is used by the degraded-mode dag functions to recover lost data. - * the second-to-last parameter is the PDA for the failed portion of the access. - * the code here looks at this PDA and assumes that the xor target buffer is - * equal in size to the number of sectors in the failed PDA. It then uses - * the other PDAs in the parameter list to determine where within the target - * buffer the corresponding data should be xored. - * - * Recall the basic equation is - * - * Q = ( data_1 + 2 * data_2 ... + k * data_k ) mod 256 - * - * so to recover data_j we need - * - * J data_j = (Q - data_1 - 2 data_2 ....- k* data_k) mod 256 - * - * So the coefficient for each buffer is (255 - data_col), and j should be initialized by - * copying Q into it. Then we need to do a table lookup to convert to solve - * data_j /= J - * - * - */ -int -rf_RecoveryQFunc(node) - RF_DagNode_t *node; -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; - RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; - int i; - RF_PhysDiskAddr_t *pda; - RF_RaidAddr_t suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); - char *srcbuf, *destbuf; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - unsigned long coeff; - - RF_ETIMER_START(timer); - /* start by copying Q into the buffer */ - bcopy(node->params[node->numParams - 3].p, node->results[0], - rf_RaidAddressToByte(raidPtr, failedPDA->numSector)); - for (i = 0; i < node->numParams - 4; i += 2) { - RF_ASSERT(node->params[i + 1].p != node->results[0]); - pda = (RF_PhysDiskAddr_t *) node->params[i].p; - srcbuf = (char *) node->params[i + 1].p; - suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); - destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress); - /* compute the data unit offset within the column */ - coeff = (coeff % raidPtr->Layout.numDataCol); - rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff); - } - /* Do the nasty inversion now */ - coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), failedPDA->startSector) % raidPtr->Layout.numDataCol); - rf_InvertQ(node->results[0], node->results[0], rf_RaidAddressToByte(raidPtr, pda->numSector), coeff); - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->q_us += RF_ETIMER_VAL_US(timer); - rf_GenericWakeupFunc(node, 0); - return (0); -} - -int -rf_RecoveryPQFunc(node) - RF_DagNode_t *node; -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; - printf("raid%d: Recovery from PQ not implemented.\n",raidPtr->raidid); - return (1); -} -/* - Degraded write Q subroutine. - Used when P is dead. - Large-write style Q computation. - Parameters - - (pda,buf),(pda,buf),.....,(failedPDA,bufPtr),failedPDA,raidPtr. - - We ignore failedPDA. - - This is a "simple style" recovery func. -*/ - -void -rf_PQ_DegradedWriteQFunc(node) - RF_DagNode_t *node; -{ - int np = node->numParams; - int d; - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; - unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; - int i; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - char *qbuf = node->results[0]; - char *obuf, *qpbuf; - RF_PhysDiskAddr_t *old; - unsigned long coeff; - int fail_start, j; - - old = (RF_PhysDiskAddr_t *) node->params[np - 2].p; - fail_start = old->startSector % secPerSU; - - RF_ETIMER_START(timer); - - d = (np - 2) / 2; - RF_ASSERT(2 * d + 2 == np); - - for (i = 0; i < d; i++) { - old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; - obuf = (char *) node->params[2 * i + 1].p; - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress); - /* compute the data unit offset within the column, then add - * one */ - coeff = (coeff % raidPtr->Layout.numDataCol); - j = old->startSector % secPerSU; - RF_ASSERT(j >= fail_start); - qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start); - rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); - } - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->q_us += RF_ETIMER_VAL_US(timer); - rf_GenericWakeupFunc(node, 0); -} - - - - -/* Q computations */ - -/* - coeff - colummn; - - compute dest ^= qfor[28-coeff][rn[coeff+1] a] - - on 5-bit basis; - length in bytes; -*/ - -void -rf_IncQ(dest, buf, length, coeff) - unsigned long *dest; - unsigned long *buf; - unsigned length; - unsigned coeff; -{ - unsigned long a, d, new; - unsigned long a1, a2; - unsigned int *q = &(rf_qfor[28 - coeff][0]); - unsigned r = rf_rn[coeff + 1]; - -#define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f) -#define INSERT(a,i) (a << (5L*i)) - - length /= 8; - /* 13 5 bit quants in a 64 bit word */ - while (length) { - a = *buf++; - d = *dest; - a1 = EXTRACT(a, 0) ^ r; - a2 = EXTRACT(a, 1) ^ r; - new = INSERT(a2, 1) | a1; - a1 = EXTRACT(a, 2) ^ r; - a2 = EXTRACT(a, 3) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1, 2) | INSERT(a2, 3); - a1 = EXTRACT(a, 4) ^ r; - a2 = EXTRACT(a, 5) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1, 4) | INSERT(a2, 5); - a1 = EXTRACT(a, 5) ^ r; - a2 = EXTRACT(a, 6) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1, 5) | INSERT(a2, 6); -#if RF_LONGSHIFT > 2 - a1 = EXTRACT(a, 7) ^ r; - a2 = EXTRACT(a, 8) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1, 7) | INSERT(a2, 8); - a1 = EXTRACT(a, 9) ^ r; - a2 = EXTRACT(a, 10) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1, 9) | INSERT(a2, 10); - a1 = EXTRACT(a, 11) ^ r; - a2 = EXTRACT(a, 12) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1, 11) | INSERT(a2, 12); -#endif /* RF_LONGSHIFT > 2 */ - d ^= new; - *dest++ = d; - length--; - } -} -/* - compute - - dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new) ] - - on a five bit basis. - optimization: compute old ^ new on 64 bit basis. - - length in bytes. -*/ - -static void -QDelta( - char *dest, - char *obuf, - char *nbuf, - unsigned length, - unsigned char coeff) -{ - unsigned long a, d, new; - unsigned long a1, a2; - unsigned int *q = &(rf_qfor[28 - coeff][0]); - unsigned int r = rf_rn[coeff + 1]; - - r = a1 = a2 = new = d = a = 0; /* XXX for now... */ - q = NULL; /* XXX for now */ - -#ifdef _KERNEL - /* PQ in kernel currently not supported because the encoding/decoding - * table is not present */ - bzero(dest, length); -#else /* KERNEL */ - /* this code probably doesn't work and should be rewritten -wvcii */ - /* 13 5 bit quants in a 64 bit word */ - length /= 8; - while (length) { - a = *obuf++; /* XXX need to reorg to avoid cache conflicts */ - a ^= *nbuf++; - d = *dest; - a1 = EXTRACT(a, 0) ^ r; - a2 = EXTRACT(a, 1) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = INSERT(a2, 1) | a1; - a1 = EXTRACT(a, 2) ^ r; - a2 = EXTRACT(a, 3) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1, 2) | INSERT(a2, 3); - a1 = EXTRACT(a, 4) ^ r; - a2 = EXTRACT(a, 5) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1, 4) | INSERT(a2, 5); - a1 = EXTRACT(a, 5) ^ r; - a2 = EXTRACT(a, 6) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1, 5) | INSERT(a2, 6); -#if RF_LONGSHIFT > 2 - a1 = EXTRACT(a, 7) ^ r; - a2 = EXTRACT(a, 8) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1, 7) | INSERT(a2, 8); - a1 = EXTRACT(a, 9) ^ r; - a2 = EXTRACT(a, 10) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1, 9) | INSERT(a2, 10); - a1 = EXTRACT(a, 11) ^ r; - a2 = EXTRACT(a, 12) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1, 11) | INSERT(a2, 12); -#endif /* RF_LONGSHIFT > 2 */ - d ^= new; - *dest++ = d; - length--; - } -#endif /* _KERNEL */ -} -/* - recover columns a and b from the given p and q into - bufs abuf and bbuf. All bufs are word aligned. - Length is in bytes. -*/ - - -/* - * XXX - * - * Everything about this seems wrong. - */ -void -rf_PQ_recover(pbuf, qbuf, abuf, bbuf, length, coeff_a, coeff_b) - unsigned long *pbuf; - unsigned long *qbuf; - unsigned long *abuf; - unsigned long *bbuf; - unsigned length; - unsigned coeff_a; - unsigned coeff_b; -{ - unsigned long p, q, a, a0, a1; - int col = (29 * coeff_a) + coeff_b; - unsigned char *q0 = &(rf_qinv[col][0]); - - length /= 8; - while (length) { - p = *pbuf++; - q = *qbuf++; - a0 = EXTRACT(p, 0); - a1 = EXTRACT(q, 0); - a = q0[a0 << 5 | a1]; -#define MF(i) \ - a0 = EXTRACT(p,i); \ - a1 = EXTRACT(q,i); \ - a = a | INSERT(q0[a0<<5 | a1],i) - - MF(1); - MF(2); - MF(3); - MF(4); - MF(5); - MF(6); -#if 0 - MF(7); - MF(8); - MF(9); - MF(10); - MF(11); - MF(12); -#endif /* 0 */ - *abuf++ = a; - *bbuf++ = a ^ p; - length--; - } -} -/* - Lost parity and a data column. Recover that data column. - Assume col coeff is lost. Let q the contents of Q after - all surviving data columns have been q-xored out of it. - Then we have the equation - - q[28-coeff][a_i ^ r_i+1] = q - - but q is cyclic with period 31. - So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] = - q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} . - - so a_i = r_{coeff+1} ^ q[3+coeff][q] - - The routine is passed q buffer and the buffer - the data is to be recoverd into. They can be the same. -*/ - - - -static void -rf_InvertQ( - unsigned long *qbuf, - unsigned long *abuf, - unsigned length, - unsigned coeff) -{ - unsigned long a, new; - unsigned long a1, a2; - unsigned int *q = &(rf_qfor[3 + coeff][0]); - unsigned r = rf_rn[coeff + 1]; - - /* 13 5 bit quants in a 64 bit word */ - length /= 8; - while (length) { - a = *qbuf++; - a1 = EXTRACT(a, 0); - a2 = EXTRACT(a, 1); - a1 = r ^ q[a1]; - a2 = r ^ q[a2]; - new = INSERT(a2, 1) | a1; -#define M(i,j) \ - a1 = EXTRACT(a,i); \ - a2 = EXTRACT(a,j); \ - a1 = r ^ q[a1]; \ - a2 = r ^ q[a2]; \ - new = new | INSERT(a1,i) | INSERT(a2,j) - - M(2, 3); - M(4, 5); - M(5, 6); -#if RF_LONGSHIFT > 2 - M(7, 8); - M(9, 10); - M(11, 12); -#endif /* RF_LONGSHIFT > 2 */ - *abuf++ = new; - length--; - } -} -#endif /* (RF_INCLUDE_DECL_PQ > 0) || - * (RF_INCLUDE_RAID6 > 0) */ diff --git a/sys/dev/raidframe/rf_pq.h b/sys/dev/raidframe/rf_pq.h deleted file mode 100644 index 9a2ce23..0000000 --- a/sys/dev/raidframe/rf_pq.h +++ /dev/null @@ -1,75 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_pq.h,v 1.3 1999/02/05 00:06:15 oster Exp $ */ -/* - * rf_pq.h - */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Daniel Stodolsky - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_PQ_H_ -#define _RF__RF_PQ_H_ - -#include <dev/raidframe/rf_archs.h> - -extern RF_RedFuncs_t rf_pFuncs; -extern RF_RedFuncs_t rf_pRecoveryFuncs; - -int rf_RegularONPFunc(RF_DagNode_t * node); -int rf_SimpleONPFunc(RF_DagNode_t * node); -int rf_RecoveryPFunc(RF_DagNode_t * node); -int rf_RegularPFunc(RF_DagNode_t * node); - -#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) - -extern RF_RedFuncs_t rf_qFuncs; -extern RF_RedFuncs_t rf_qRecoveryFuncs; -extern RF_RedFuncs_t rf_pqRecoveryFuncs; - -void -rf_PQDagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); -RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG); -int rf_RegularONQFunc(RF_DagNode_t * node); -int rf_SimpleONQFunc(RF_DagNode_t * node); -RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG); -int rf_RegularPQFunc(RF_DagNode_t * node); -int rf_RegularQFunc(RF_DagNode_t * node); -void rf_Degraded_100_PQFunc(RF_DagNode_t * node); -int rf_RecoveryQFunc(RF_DagNode_t * node); -int rf_RecoveryPQFunc(RF_DagNode_t * node); -void rf_PQ_DegradedWriteQFunc(RF_DagNode_t * node); -void -rf_IncQ(unsigned long *dest, unsigned long *buf, unsigned length, - unsigned coeff); -void -rf_PQ_recover(unsigned long *pbuf, unsigned long *qbuf, unsigned long *abuf, - unsigned long *bbuf, unsigned length, unsigned coeff_a, unsigned coeff_b); - -#endif /* (RF_INCLUDE_DECL_PQ > 0) || - * (RF_INCLUDE_RAID6 > 0) */ - -#endif /* !_RF__RF_PQ_H_ */ diff --git a/sys/dev/raidframe/rf_pqdeg.c b/sys/dev/raidframe/rf_pqdeg.c deleted file mode 100644 index 0d3356c..0000000 --- a/sys/dev/raidframe/rf_pqdeg.c +++ /dev/null @@ -1,219 +0,0 @@ -/* $NetBSD: rf_pqdeg.c,v 1.5 2000/01/07 03:41:04 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Daniel Stodolsky - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#include <dev/raidframe/rf_archs.h> - -#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_dagffrd.h> -#include <dev/raidframe/rf_dagffwr.h> -#include <dev/raidframe/rf_dagdegrd.h> -#include <dev/raidframe/rf_dagdegwr.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_pqdeg.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_pqdegdags.h> -#include <dev/raidframe/rf_pq.h> - -/* - Degraded mode dag functions for P+Q calculations. - - The following nomenclature is used. - - PQ_<D><P><Q>_Create{Large,Small}<Write|Read>DAG - - where <D><P><Q> are single digits representing the number of failed - data units <D> (0,1,2), parity units <P> (0,1), and Q units <Q>, effecting - the I/O. The reads have only PQ_<D><P><Q>_CreateReadDAG variants, while - the single fault writes have both large and small write versions. (Single fault - PQ is equivalent to normal mode raid 5 in many aspects. - - Some versions degenerate into the same case, and are grouped together below. -*/ - -/* Reads, single failure - - we have parity, so we can do a raid 5 - reconstruct read. -*/ - -RF_CREATE_DAG_FUNC_DECL(rf_PQ_100_CreateReadDAG) -{ - rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pRecoveryFuncs); -} -/* Reads double failure */ - -/* - Q is lost, but not parity - so we can a raid 5 reconstruct read. -*/ - -RF_CREATE_DAG_FUNC_DECL(rf_PQ_101_CreateReadDAG) -{ - rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pRecoveryFuncs); -} -/* - parity is lost, so we need to - do a reconstruct read and recompute - the data with Q. -*/ - -RF_CREATE_DAG_FUNC_DECL(rf_PQ_110_CreateReadDAG) -{ - RF_PhysDiskAddr_t *temp; - /* swap P and Q pointers to fake out the DegradedReadDAG code */ - temp = asmap->parityInfo; - asmap->parityInfo = asmap->qInfo; - asmap->qInfo = temp; - rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_qRecoveryFuncs); -} -/* - Two data units are dead in this stripe, so we will need read - both P and Q to reconstruct the data. Note that only - one data unit we are reading may actually be missing. -*/ -RF_CREATE_DAG_FUNC_DECL(rf_CreateDoubleDegradedReadDAG); -RF_CREATE_DAG_FUNC_DECL(rf_CreateDoubleDegradedReadDAG) -{ - rf_PQ_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList); -} -RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateReadDAG); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateReadDAG) -{ - rf_CreateDoubleDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList); -} -/* Writes, single failure */ - -RF_CREATE_DAG_FUNC_DECL(rf_PQ_100_CreateWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_100_CreateWriteDAG) -{ - if (asmap->numStripeUnitsAccessed != 1 && - asmap->failedPDAs[0]->numSector != - raidPtr->Layout.sectorsPerStripeUnit) - RF_PANIC(); - rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, - flags, allocList, 2, - (int (*) (RF_DagNode_t *)) rf_Degraded_100_PQFunc, - RF_FALSE); -} -/* Dead P - act like a RAID 5 small write with parity = Q */ -RF_CREATE_DAG_FUNC_DECL(rf_PQ_010_CreateSmallWriteDAG) -{ - RF_PhysDiskAddr_t *temp; - /* swap P and Q pointers to fake out the DegradedReadDAG code */ - temp = asmap->parityInfo; - asmap->parityInfo = asmap->qInfo; - asmap->qInfo = temp; - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, - allocList, &rf_qFuncs, NULL); -} -/* Dead Q - act like a RAID 5 small write */ -RF_CREATE_DAG_FUNC_DECL(rf_PQ_001_CreateSmallWriteDAG) -{ - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, - allocList, &rf_pFuncs, NULL); -} -/* Dead P - act like a RAID 5 large write but for Q */ -RF_CREATE_DAG_FUNC_DECL(rf_PQ_010_CreateLargeWriteDAG) -{ - RF_PhysDiskAddr_t *temp; - /* swap P and Q pointers to fake out the code */ - temp = asmap->parityInfo; - asmap->parityInfo = asmap->qInfo; - asmap->qInfo = temp; - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, - allocList, 1, rf_RegularQFunc, RF_FALSE); -} -/* Dead Q - act like a RAID 5 large write */ -RF_CREATE_DAG_FUNC_DECL(rf_PQ_001_CreateLargeWriteDAG) -{ - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, - allocList, 1, rf_RegularPFunc, RF_FALSE); -} - - -/* - * writes, double failure - */ - -/* - * Lost P & Q - do a nonredundant write - */ -RF_CREATE_DAG_FUNC_DECL(rf_PQ_011_CreateWriteDAG) -{ - rf_CreateNonRedundantWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - RF_IO_TYPE_WRITE); -} -/* - In the two cases below, - A nasty case arises when the write a (strict) portion of a failed stripe unit - and parts of another su. For now, we do not support this. -*/ - -/* - Lost Data and P - do a Q write. -*/ -RF_CREATE_DAG_FUNC_DECL(rf_PQ_110_CreateWriteDAG) -{ - RF_PhysDiskAddr_t *temp; - - if (asmap->numStripeUnitsAccessed != 1 && - asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) { - RF_PANIC(); - } - /* swap P and Q to fake out parity code */ - temp = asmap->parityInfo; - asmap->parityInfo = asmap->qInfo; - asmap->qInfo = temp; - rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, - allocList, 1, - (int (*) (RF_DagNode_t *)) rf_PQ_DegradedWriteQFunc, - RF_FALSE); - /* is the regular Q func the right one to call? */ -} -/* - Lost Data and Q - do degraded mode P write -*/ -RF_CREATE_DAG_FUNC_DECL(rf_PQ_101_CreateWriteDAG) -{ - if (asmap->numStripeUnitsAccessed != 1 && - asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) - RF_PANIC(); - rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, - allocList, 1, rf_RecoveryXorFunc, RF_FALSE); -} -#endif /* (RF_INCLUDE_DECL_PQ > 0) || - * (RF_INCLUDE_RAID6 > 0) */ diff --git a/sys/dev/raidframe/rf_pqdeg.h b/sys/dev/raidframe/rf_pqdeg.h deleted file mode 100644 index 83371e6..0000000 --- a/sys/dev/raidframe/rf_pqdeg.h +++ /dev/null @@ -1,75 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_pqdeg.h,v 1.3 1999/02/05 00:06:15 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Daniel Stodolsky - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_PQDEG_H_ -#define _RF__RF_PQDEG_H_ - -#include <dev/raidframe/rf_types.h> - -#if RF_UTILITY == 0 -#include <dev/raidframe/rf_dag.h> - -/* extern decl's of the failure mode PQ functions. - * See pddeg.c for nomenclature discussion. - */ - -/* reads, single failure */ -RF_CREATE_DAG_FUNC_DECL(rf_PQ_100_CreateReadDAG); -/* reads, two failure */ -RF_CREATE_DAG_FUNC_DECL(rf_PQ_110_CreateReadDAG); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_101_CreateReadDAG); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateReadDAG); - -/* writes, single failure */ -RF_CREATE_DAG_FUNC_DECL(rf_PQ_100_CreateWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_010_CreateSmallWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_010_CreateLargeWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_001_CreateSmallWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_001_CreateLargeWriteDAG); - -/* writes, double failure */ -RF_CREATE_DAG_FUNC_DECL(rf_PQ_011_CreateWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_110_CreateWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_101_CreateWriteDAG); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateWriteDAG); -#endif /* RF_UTILITY == 0 */ - -typedef RF_uint32 RF_ua32_t[32]; -typedef RF_uint8 RF_ua1024_t[1024]; - -extern RF_ua32_t rf_rn; -extern RF_ua32_t rf_qfor[32]; -#ifndef _KERNEL /* we don't support PQ in the kernel yet, so - * don't link in this monster table */ -extern RF_ua1024_t rf_qinv[29 * 29]; -#else /* !_KERNEL */ -extern RF_ua1024_t rf_qinv[1]; -#endif /* !_KERNEL */ - -#endif /* !_RF__RF_PQDEG_H_ */ diff --git a/sys/dev/raidframe/rf_pqdegdags.c b/sys/dev/raidframe/rf_pqdegdags.c deleted file mode 100644 index 3606005..0000000 --- a/sys/dev/raidframe/rf_pqdegdags.c +++ /dev/null @@ -1,432 +0,0 @@ -/* $NetBSD: rf_pqdegdags.c,v 1.5 1999/08/15 02:36:40 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Daniel Stodolsky - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * rf_pqdegdags.c - * Degraded mode dags for double fault cases. -*/ - - -#include <dev/raidframe/rf_archs.h> - -#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagdegrd.h> -#include <dev/raidframe/rf_dagdegwr.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_acctrace.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_pqdegdags.h> -#include <dev/raidframe/rf_pq.h> - -static void -applyPDA(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * pda, RF_PhysDiskAddr_t * ppda, - RF_PhysDiskAddr_t * qpda, void *bp); - -/* - Two data drives have failed, and we are doing a read that covers one of them. - We may also be reading some of the surviving drives. - - - ***************************************************************************************** - * - * creates a DAG to perform a degraded-mode read of data within one stripe. - * This DAG is as follows: - * - * Hdr - * | - * Block - * / / \ \ \ \ - * Rud ... Rud Rrd ... Rrd Rp Rq - * | \ | \ | \ | \ | \ | \ - * - * | | - * Unblock X - * \ / - * ------ T ------ - * - * Each R node is a successor of the L node - * One successor arc from each R node goes to U, and the other to X - * There is one Rud for each chunk of surviving user data requested by the user, - * and one Rrd for each chunk of surviving user data _not_ being read by the user - * R = read, ud = user data, rd = recovery (surviving) data, p = P data, q = Qdata - * X = pq recovery node, T = terminate - * - * The block & unblock nodes are leftovers from a previous version. They - * do nothing, but I haven't deleted them because it would be a tremendous - * effort to put them back in. - * - * Note: The target buffer for the XOR node is set to the actual user buffer where the - * failed data is supposed to end up. This buffer is zero'd by the code here. Thus, - * if you create a degraded read dag, use it, and then re-use, you have to be sure to - * zero the target buffer prior to the re-use. - * - * Every buffer read is passed to the pq recovery node, whose job it is to sort out whats - * needs and what's not. - ****************************************************************************************/ -/* init a disk node with 2 successors and one predecessor */ -#define INIT_DISK_NODE(node,name) \ -rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 2,1,4,0, dag_h, name, allocList); \ -(node)->succedents[0] = unblockNode; \ -(node)->succedents[1] = recoveryNode; \ -(node)->antecedents[0] = blockNode; \ -(node)->antType[0] = rf_control - -#define DISK_NODE_PARAMS(_node_,_p_) \ - (_node_).params[0].p = _p_ ; \ - (_node_).params[1].p = (_p_)->bufPtr; \ - (_node_).params[2].v = parityStripeID; \ - (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru) - -#define DISK_NODE_PDA(node) ((node)->params[0].p) - -RF_CREATE_DAG_FUNC_DECL(rf_PQ_DoubleDegRead) -{ - rf_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList, - "Rq", "PQ Recovery", rf_PQDoubleRecoveryFunc); -} - -static void -applyPDA(raidPtr, pda, ppda, qpda, bp) - RF_Raid_t *raidPtr; - RF_PhysDiskAddr_t *pda; - RF_PhysDiskAddr_t *ppda; - RF_PhysDiskAddr_t *qpda; - void *bp; -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_RaidAddr_t s0off = rf_StripeUnitOffset(layoutPtr, ppda->startSector); - RF_SectorCount_t s0len = ppda->numSector, len; - RF_SectorNum_t suoffset; - unsigned coeff; - char *pbuf = ppda->bufPtr; - char *qbuf = qpda->bufPtr; - char *buf; - int delta; - - suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); - len = pda->numSector; - /* see if pda intersects a recovery pda */ - if ((suoffset < s0off + s0len) && (suoffset + len > s0off)) { - buf = pda->bufPtr; - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress); - coeff = (coeff % raidPtr->Layout.numDataCol); - - if (suoffset < s0off) { - delta = s0off - suoffset; - buf += rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), delta); - suoffset = s0off; - len -= delta; - } - if (suoffset > s0off) { - delta = suoffset - s0off; - pbuf += rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), delta); - qbuf += rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), delta); - } - if ((suoffset + len) > (s0len + s0off)) - len = s0len + s0off - suoffset; - - /* src, dest, len */ - rf_bxor(buf, pbuf, rf_RaidAddressToByte(raidPtr, len), bp); - - /* dest, src, len, coeff */ - rf_IncQ((unsigned long *) qbuf, (unsigned long *) buf, rf_RaidAddressToByte(raidPtr, len), coeff); - } -} -/* - Recover data in the case of a double failure. There can be two - result buffers, one for each chunk of data trying to be recovered. - The params are pda's that have not been range restricted or otherwise - politely massaged - this should be done here. The last params are the - pdas of P and Q, followed by the raidPtr. The list can look like - - pda, pda, ... , p pda, q pda, raidptr, asm - - or - - pda, pda, ... , p_1 pda, p_2 pda, q_1 pda, q_2 pda, raidptr, asm - - depending on wether two chunks of recovery data were required. - - The second condition only arises if there are two failed buffers - whose lengths do not add up a stripe unit. -*/ - - -int -rf_PQDoubleRecoveryFunc(node) - RF_DagNode_t *node; -{ - int np = node->numParams; - RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p; - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); - int d, i; - unsigned coeff; - RF_RaidAddr_t sosAddr, suoffset; - RF_SectorCount_t len, secPerSU = layoutPtr->sectorsPerStripeUnit; - int two = 0; - RF_PhysDiskAddr_t *ppda, *ppda2, *qpda, *qpda2, *pda, npda; - char *buf; - int numDataCol = layoutPtr->numDataCol; - RF_Etimer_t timer; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - - RF_ETIMER_START(timer); - - if (asmap->failedPDAs[1] && - (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) { - RF_ASSERT(0); - ppda = node->params[np - 6].p; - ppda2 = node->params[np - 5].p; - qpda = node->params[np - 4].p; - qpda2 = node->params[np - 3].p; - d = (np - 6); - two = 1; - } else { - ppda = node->params[np - 4].p; - qpda = node->params[np - 3].p; - d = (np - 4); - } - - for (i = 0; i < d; i++) { - pda = node->params[i].p; - buf = pda->bufPtr; - suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); - len = pda->numSector; - coeff = rf_RaidAddressToStripeUnitID(layoutPtr, pda->raidAddress); - /* compute the data unit offset within the column */ - coeff = (coeff % raidPtr->Layout.numDataCol); - /* see if pda intersects a recovery pda */ - applyPDA(raidPtr, pda, ppda, qpda, node->dagHdr->bp); - if (two) - applyPDA(raidPtr, pda, ppda, qpda, node->dagHdr->bp); - } - - /* ok, we got the parity back to the point where we can recover. We - * now need to determine the coeff of the columns that need to be - * recovered. We can also only need to recover a single stripe unit. */ - - if (asmap->failedPDAs[1] == NULL) { /* only a single stripe unit - * to recover. */ - pda = asmap->failedPDAs[0]; - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - /* need to determine the column of the other failed disk */ - coeff = rf_RaidAddressToStripeUnitID(layoutPtr, pda->raidAddress); - /* compute the data unit offset within the column */ - coeff = (coeff % raidPtr->Layout.numDataCol); - for (i = 0; i < numDataCol; i++) { - npda.raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0); - /* skip over dead disks */ - if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status)) - if (i != coeff) - break; - } - RF_ASSERT(i < numDataCol); - RF_ASSERT(two == 0); - /* recover the data. Since we need only want to recover one - * column, we overwrite the parity with the other one. */ - if (coeff < i) /* recovering 'a' */ - rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) pda->bufPtr, (unsigned long *) ppda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff, i); - else /* recovering 'b' */ - rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, (unsigned long *) pda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), i, coeff); - } else - RF_PANIC(); - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - if (tracerec) - tracerec->q_us += RF_ETIMER_VAL_US(timer); - rf_GenericWakeupFunc(node, 0); - return (0); -} - -int -rf_PQWriteDoubleRecoveryFunc(node) - RF_DagNode_t *node; -{ - /* The situation: - * - * We are doing a write that hits only one failed data unit. The other - * failed data unit is not being overwritten, so we need to generate - * it. - * - * For the moment, we assume all the nonfailed data being written is in - * the shadow of the failed data unit. (i.e,, either a single data - * unit write or the entire failed stripe unit is being overwritten. ) - * - * Recovery strategy: apply the recovery data to the parity and q. Use P - * & Q to recover the second failed data unit in P. Zero fill Q, then - * apply the recovered data to p. Then apply the data being written to - * the failed drive. Then walk through the surviving drives, applying - * new data when it exists, othewise the recovery data. Quite a mess. - * - * - * The params - * - * read pda0, read pda1, ... read pda (numDataCol-3), write pda0, ... , - * write pda (numStripeUnitAccess - numDataFailed), failed pda, - * raidPtr, asmap */ - - int np = node->numParams; - RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p; - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); - int i; - RF_RaidAddr_t sosAddr; - unsigned coeff; - RF_StripeCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; - RF_PhysDiskAddr_t *ppda, *qpda, *pda, npda; - int numDataCol = layoutPtr->numDataCol; - RF_Etimer_t timer; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - - RF_ASSERT(node->numResults == 2); - RF_ASSERT(asmap->failedPDAs[1] == NULL); - RF_ETIMER_START(timer); - ppda = node->results[0]; - qpda = node->results[1]; - /* apply the recovery data */ - for (i = 0; i < numDataCol - 2; i++) - applyPDA(raidPtr, node->params[i].p, ppda, qpda, node->dagHdr->bp); - - /* determine the other failed data unit */ - pda = asmap->failedPDAs[0]; - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - /* need to determine the column of the other failed disk */ - coeff = rf_RaidAddressToStripeUnitID(layoutPtr, pda->raidAddress); - /* compute the data unit offset within the column */ - coeff = (coeff % raidPtr->Layout.numDataCol); - for (i = 0; i < numDataCol; i++) { - npda.raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0); - /* skip over dead disks */ - if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status)) - if (i != coeff) - break; - } - RF_ASSERT(i < numDataCol); - /* recover the data. The column we want to recover we write over the - * parity. The column we don't care about we dump in q. */ - if (coeff < i) /* recovering 'a' */ - rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff, i); - else /* recovering 'b' */ - rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), i, coeff); - - /* OK. The valid data is in P. Zero fill Q, then inc it into it. */ - bzero(qpda->bufPtr, rf_RaidAddressToByte(raidPtr, qpda->numSector)); - rf_IncQ((unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, rf_RaidAddressToByte(raidPtr, qpda->numSector), i); - - /* now apply all the write data to the buffer */ - /* single stripe unit write case: the failed data is only thing we are - * writing. */ - RF_ASSERT(asmap->numStripeUnitsAccessed == 1); - /* dest, src, len, coeff */ - rf_IncQ((unsigned long *) qpda->bufPtr, (unsigned long *) asmap->failedPDAs[0]->bufPtr, rf_RaidAddressToByte(raidPtr, qpda->numSector), coeff); - rf_bxor(asmap->failedPDAs[0]->bufPtr, ppda->bufPtr, rf_RaidAddressToByte(raidPtr, ppda->numSector), node->dagHdr->bp); - - /* now apply all the recovery data */ - for (i = 0; i < numDataCol - 2; i++) - applyPDA(raidPtr, node->params[i].p, ppda, qpda, node->dagHdr->bp); - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - if (tracerec) - tracerec->q_us += RF_ETIMER_VAL_US(timer); - - rf_GenericWakeupFunc(node, 0); - return (0); -} -RF_CREATE_DAG_FUNC_DECL(rf_PQ_DDLargeWrite) -{ - RF_PANIC(); -} -/* - Two lost data unit write case. - - There are really two cases here: - - (1) The write completely covers the two lost data units. - In that case, a reconstruct write that doesn't write the - failed data units will do the correct thing. So in this case, - the dag looks like - - full stripe read of surviving data units (not being overwriten) - write new data (ignoring failed units) compute P&Q - write P&Q - - - (2) The write does not completely cover both failed data units - (but touches at least one of them). Then we need to do the - equivalent of a reconstruct read to recover the missing data - unit from the other stripe. - - For any data we are writing that is not in the "shadow" - of the failed units, we need to do a four cycle update. - PANIC on this case. for now - -*/ - -RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateWriteDAG) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_SectorCount_t sectorsPerSU = layoutPtr->sectorsPerStripeUnit; - int sum; - int nf = asmap->numDataFailed; - - sum = asmap->failedPDAs[0]->numSector; - if (nf == 2) - sum += asmap->failedPDAs[1]->numSector; - - if ((nf == 2) && (sum == (2 * sectorsPerSU))) { - /* large write case */ - rf_PQ_DDLargeWrite(raidPtr, asmap, dag_h, bp, flags, allocList); - return; - } - if ((nf == asmap->numStripeUnitsAccessed) || (sum >= sectorsPerSU)) { - /* small write case, no user data not in shadow */ - rf_PQ_DDSimpleSmallWrite(raidPtr, asmap, dag_h, bp, flags, allocList); - return; - } - RF_PANIC(); -} -RF_CREATE_DAG_FUNC_DECL(rf_PQ_DDSimpleSmallWrite) -{ - rf_DoubleDegSmallWrite(raidPtr, asmap, dag_h, bp, flags, allocList, "Rq", "Wq", "PQ Recovery", rf_PQWriteDoubleRecoveryFunc); -} -#endif /* (RF_INCLUDE_DECL_PQ > 0) || - * (RF_INCLUDE_RAID6 > 0) */ diff --git a/sys/dev/raidframe/rf_pqdegdags.h b/sys/dev/raidframe/rf_pqdegdags.h deleted file mode 100644 index 11ce820..0000000 --- a/sys/dev/raidframe/rf_pqdegdags.h +++ /dev/null @@ -1,49 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_pqdegdags.h,v 1.3 1999/02/05 00:06:15 oster Exp $ */ -/* - * rf_pqdegdags.h - */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Daniel Stodolsky - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ -/* - * rf_pqdegdags.c - * Degraded mode dags for double fault cases. - */ - -#ifndef _RF__RF_PQDEGDAGS_H_ -#define _RF__RF_PQDEGDAGS_H_ - -#include <dev/raidframe/rf_dag.h> - -RF_CREATE_DAG_FUNC_DECL(rf_PQ_DoubleDegRead); -int rf_PQDoubleRecoveryFunc(RF_DagNode_t * node); -int rf_PQWriteDoubleRecoveryFunc(RF_DagNode_t * node); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_DDLargeWrite); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_DDSimpleSmallWrite); -RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateWriteDAG); - -#endif /* !_RF__RF_PQDEGDAGS_H_ */ diff --git a/sys/dev/raidframe/rf_psstatus.c b/sys/dev/raidframe/rf_psstatus.c deleted file mode 100644 index a6968cf..0000000 --- a/sys/dev/raidframe/rf_psstatus.c +++ /dev/null @@ -1,378 +0,0 @@ -/* $NetBSD: rf_psstatus.c,v 1.5 2000/01/08 22:57:31 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************** - * - * psstatus.c - * - * The reconstruction code maintains a bunch of status related to the parity - * stripes that are currently under reconstruction. This header file defines - * the status structures. - * - *****************************************************************************/ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_debugprint.h> -#include <dev/raidframe/rf_freelist.h> -#include <dev/raidframe/rf_psstatus.h> -#include <dev/raidframe/rf_shutdown.h> - -#define Dprintf1(s,a) if (rf_pssDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf2(s,a,b) if (rf_pssDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf3(s,a,b,c) if (rf_pssDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL) - -static void -RealPrintPSStatusTable(RF_Raid_t * raidPtr, - RF_PSStatusHeader_t * pssTable); - -#define RF_MAX_FREE_PSS 32 -#define RF_PSS_INC 8 -#define RF_PSS_INITIAL 4 - -static int init_pss(RF_ReconParityStripeStatus_t *, RF_Raid_t *); -static void clean_pss(RF_ReconParityStripeStatus_t *, RF_Raid_t *); -static void rf_ShutdownPSStatus(void *); - -static int -init_pss(p, raidPtr) - RF_ReconParityStripeStatus_t *p; - RF_Raid_t *raidPtr; -{ - RF_Calloc(p->issued, raidPtr->numCol, sizeof(char), (char *)); - if (p->issued == NULL) - return (ENOMEM); - return (0); -} - -static void -clean_pss(p, raidPtr) - RF_ReconParityStripeStatus_t *p; - RF_Raid_t *raidPtr; -{ - RF_Free(p->issued, raidPtr->numCol * sizeof(char)); -} - -static void -rf_ShutdownPSStatus(arg) - void *arg; -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) arg; - - RF_FREELIST_DESTROY_CLEAN_ARG(raidPtr->pss_freelist, next, (RF_ReconParityStripeStatus_t *), clean_pss, raidPtr); -} - -int -rf_ConfigurePSStatus( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - int rc; - - raidPtr->pssTableSize = RF_PSS_DEFAULT_TABLESIZE; - RF_FREELIST_CREATE(raidPtr->pss_freelist, RF_MAX_FREE_PSS, - RF_PSS_INC, sizeof(RF_ReconParityStripeStatus_t)); - if (raidPtr->pss_freelist == NULL) - return (ENOMEM); - rc = rf_ShutdownCreate(listp, rf_ShutdownPSStatus, raidPtr); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - rf_ShutdownPSStatus(raidPtr); - return (rc); - } - RF_FREELIST_PRIME_INIT_ARG(raidPtr->pss_freelist, RF_PSS_INITIAL, next, - (RF_ReconParityStripeStatus_t *), init_pss, raidPtr); - return (0); -} -/***************************************************************************************** - * sets up the pss table - * We pre-allocate a bunch of entries to avoid as much as possible having to - * malloc up hash chain entries. - ****************************************************************************************/ -RF_PSStatusHeader_t * -rf_MakeParityStripeStatusTable(raidPtr) - RF_Raid_t *raidPtr; -{ - RF_PSStatusHeader_t *pssTable; - int i, j, rc; - - RF_Calloc(pssTable, raidPtr->pssTableSize, sizeof(RF_PSStatusHeader_t), (RF_PSStatusHeader_t *)); - for (i = 0; i < raidPtr->pssTableSize; i++) { - rc = rf_mutex_init(&pssTable[i].mutex, __FUNCTION__); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - /* fail and deallocate */ - for (j = 0; j < i; j++) { - rf_mutex_destroy(&pssTable[i].mutex); - } - RF_Free(pssTable, raidPtr->pssTableSize * sizeof(RF_PSStatusHeader_t)); - return (NULL); - } - } - return (pssTable); -} - -void -rf_FreeParityStripeStatusTable(raidPtr, pssTable) - RF_Raid_t *raidPtr; - RF_PSStatusHeader_t *pssTable; -{ - int i; - - if (rf_pssDebug) - RealPrintPSStatusTable(raidPtr, pssTable); - for (i = 0; i < raidPtr->pssTableSize; i++) { - if (pssTable[i].chain) { - printf("ERROR: pss hash chain not null at recon shutdown\n"); - } - rf_mutex_destroy(&pssTable[i].mutex); - } - RF_Free(pssTable, raidPtr->pssTableSize * sizeof(RF_PSStatusHeader_t)); -} - - -/* looks up the status structure for a parity stripe. - * if the create_flag is on, creates and returns the status structure it it doesn't exist - * otherwise returns NULL if the status structure does not exist - * - * ASSUMES THE PSS DESCRIPTOR IS LOCKED UPON ENTRY - */ -RF_ReconParityStripeStatus_t * -rf_LookupRUStatus( - RF_Raid_t * raidPtr, - RF_PSStatusHeader_t * pssTable, - RF_StripeNum_t psID, - RF_ReconUnitNum_t which_ru, - RF_PSSFlags_t flags, /* whether or not to create it if it doesn't - * exist + what flags to set initially */ - int *created) -{ - RF_PSStatusHeader_t *hdr = &pssTable[RF_HASH_PSID(raidPtr, psID)]; - RF_ReconParityStripeStatus_t *p, *pssPtr = hdr->chain; - - *created = 0; - for (p = pssPtr; p; p = p->next) { - if (p->parityStripeID == psID && p->which_ru == which_ru) - break; - } - - if (!p && (flags & RF_PSS_CREATE)) { - Dprintf2("PSS: creating pss for psid %ld ru %d\n", psID, which_ru); - p = rf_AllocPSStatus(raidPtr); - p->next = hdr->chain; - hdr->chain = p; - - p->parityStripeID = psID; - p->which_ru = which_ru; - p->flags = flags; - p->rbuf = NULL; - p->writeRbuf = NULL; - p->blockCount = 0; - p->procWaitList = NULL; - p->blockWaitList = NULL; - p->bufWaitList = NULL; - *created = 1; - } else - if (p) { /* we didn't create, but we want to specify - * some new status */ - p->flags |= flags; /* add in whatever flags we're - * specifying */ - } - if (p && (flags & RF_PSS_RECON_BLOCKED)) { - p->blockCount++;/* if we're asking to block recon, bump the - * count */ - Dprintf3("raid%d: Blocked recon on psid %ld. count now %d\n", - raidPtr->raidid, psID, p->blockCount); - } - return (p); -} -/* deletes an entry from the parity stripe status table. typically used - * when an entry has been allocated solely to block reconstruction, and - * no recon was requested while recon was blocked. Assumes the hash - * chain is ALREADY LOCKED. - */ -void -rf_PSStatusDelete(raidPtr, pssTable, pssPtr) - RF_Raid_t *raidPtr; - RF_PSStatusHeader_t *pssTable; - RF_ReconParityStripeStatus_t *pssPtr; -{ - RF_PSStatusHeader_t *hdr = &(pssTable[RF_HASH_PSID(raidPtr, pssPtr->parityStripeID)]); - RF_ReconParityStripeStatus_t *p = hdr->chain, *pt = NULL; - - while (p) { - if (p == pssPtr) { - if (pt) - pt->next = p->next; - else - hdr->chain = p->next; - p->next = NULL; - rf_FreePSStatus(raidPtr, p); - return; - } - pt = p; - p = p->next; - } - RF_ASSERT(0); /* we must find it here */ -} -/* deletes an entry from the ps status table after reconstruction has completed */ -void -rf_RemoveFromActiveReconTable(raidPtr, row, psid, which_ru) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_ReconUnitNum_t which_ru; - RF_StripeNum_t psid; -{ - RF_PSStatusHeader_t *hdr = &(raidPtr->reconControl[row]->pssTable[RF_HASH_PSID(raidPtr, psid)]); - RF_ReconParityStripeStatus_t *p, *pt; - RF_CallbackDesc_t *cb, *cb1; - - RF_LOCK_MUTEX(hdr->mutex); - for (pt = NULL, p = hdr->chain; p; pt = p, p = p->next) { - if ((p->parityStripeID == psid) && (p->which_ru == which_ru)) - break; - } - if (p == NULL) { - rf_PrintPSStatusTable(raidPtr, row); - } - RF_ASSERT(p); /* it must be there */ - - Dprintf2("PSS: deleting pss for psid %ld ru %d\n", psid, which_ru); - - /* delete this entry from the hash chain */ - if (pt) - pt->next = p->next; - else - hdr->chain = p->next; - p->next = NULL; - - RF_UNLOCK_MUTEX(hdr->mutex); - - /* wakup anyone waiting on the parity stripe ID */ - cb = p->procWaitList; - p->procWaitList = NULL; - while (cb) { - Dprintf1("Waking up access waiting on parity stripe ID %ld\n", p->parityStripeID); - cb1 = cb->next; - (cb->callbackFunc) (cb->callbackArg); - - /* THIS IS WHAT THE ORIGINAL CODE HAD... the extra 0 is bogus, - * IMHO */ - /* (cb->callbackFunc)(cb->callbackArg, 0); */ - rf_FreeCallbackDesc(cb); - cb = cb1; - } - - rf_FreePSStatus(raidPtr, p); -} - -RF_ReconParityStripeStatus_t * -rf_AllocPSStatus(raidPtr) - RF_Raid_t *raidPtr; -{ - RF_ReconParityStripeStatus_t *p; - - RF_FREELIST_GET_INIT_ARG(raidPtr->pss_freelist, p, next, (RF_ReconParityStripeStatus_t *), init_pss, raidPtr); - if (p) { - bzero(p->issued, raidPtr->numCol); - } - p->next = NULL; - /* no need to initialize here b/c the only place we're called from is - * the above Lookup */ - return (p); -} - -void -rf_FreePSStatus(raidPtr, p) - RF_Raid_t *raidPtr; - RF_ReconParityStripeStatus_t *p; -{ - RF_ASSERT(p->procWaitList == NULL); - RF_ASSERT(p->blockWaitList == NULL); - RF_ASSERT(p->bufWaitList == NULL); - - RF_FREELIST_FREE_CLEAN_ARG(raidPtr->pss_freelist, p, next, clean_pss, raidPtr); -} - -static void -RealPrintPSStatusTable(raidPtr, pssTable) - RF_Raid_t *raidPtr; - RF_PSStatusHeader_t *pssTable; -{ - int i, j, procsWaiting, blocksWaiting, bufsWaiting; - RF_ReconParityStripeStatus_t *p; - RF_CallbackDesc_t *cb; - - printf("\nParity Stripe Status Table\n"); - for (i = 0; i < raidPtr->pssTableSize; i++) { - for (p = pssTable[i].chain; p; p = p->next) { - procsWaiting = blocksWaiting = bufsWaiting = 0; - for (cb = p->procWaitList; cb; cb = cb->next) - procsWaiting++; - for (cb = p->blockWaitList; cb; cb = cb->next) - blocksWaiting++; - for (cb = p->bufWaitList; cb; cb = cb->next) - bufsWaiting++; - printf("PSID %ld RU %d : blockCount %d %d/%d/%d proc/block/buf waiting, issued ", - (long) p->parityStripeID, p->which_ru, p->blockCount, procsWaiting, blocksWaiting, bufsWaiting); - for (j = 0; j < raidPtr->numCol; j++) - printf("%c", (p->issued[j]) ? '1' : '0'); - if (!p->flags) - printf(" flags: (none)"); - else { - if (p->flags & RF_PSS_UNDER_RECON) - printf(" under-recon"); - if (p->flags & RF_PSS_FORCED_ON_WRITE) - printf(" forced-w"); - if (p->flags & RF_PSS_FORCED_ON_READ) - printf(" forced-r"); - if (p->flags & RF_PSS_RECON_BLOCKED) - printf(" blocked"); - if (p->flags & RF_PSS_BUFFERWAIT) - printf(" bufwait"); - } - printf("\n"); - } - } -} - -void -rf_PrintPSStatusTable(raidPtr, row) - RF_Raid_t *raidPtr; - RF_RowCol_t row; -{ - RF_PSStatusHeader_t *pssTable = raidPtr->reconControl[row]->pssTable; - RealPrintPSStatusTable(raidPtr, pssTable); -} diff --git a/sys/dev/raidframe/rf_psstatus.h b/sys/dev/raidframe/rf_psstatus.h deleted file mode 100644 index c836d49..0000000 --- a/sys/dev/raidframe/rf_psstatus.h +++ /dev/null @@ -1,132 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_psstatus.h,v 1.3 1999/02/05 00:06:15 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************** - * - * psstatus.h - * - * The reconstruction code maintains a bunch of status related to the parity - * stripes that are currently under reconstruction. This header file defines - * the status structures. - * - *****************************************************************************/ - -#ifndef _RF__RF_PSSTATUS_H_ -#define _RF__RF_PSSTATUS_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_callback.h> - -#define RF_PS_MAX_BUFS 10 /* max number of bufs we'll accumulate before - * we do an XOR */ - -#define RF_PSS_DEFAULT_TABLESIZE 200 - -/* - * Macros to acquire/release the mutex lock on a parity stripe status - * descriptor. Note that we use just one lock for the whole hash chain. - */ -#define RF_HASH_PSID(_raid_,_psid_) ( (_psid_) % ((_raid_)->pssTableSize) ) /* simple hash function */ -#define RF_LOCK_PSS_MUTEX(_raidPtr, _row, _psid) \ - RF_LOCK_MUTEX((_raidPtr)->reconControl[_row]->pssTable[ RF_HASH_PSID(_raidPtr,_psid) ].mutex) -#define RF_UNLOCK_PSS_MUTEX(_raidPtr, _row, _psid) \ - RF_UNLOCK_MUTEX((_raidPtr)->reconControl[_row]->pssTable[ RF_HASH_PSID(_raidPtr,_psid) ].mutex) - -struct RF_ReconParityStripeStatus_s { - RF_StripeNum_t parityStripeID; /* the parity stripe ID */ - RF_ReconUnitNum_t which_ru; /* which reconstruction unit with the - * indicated parity stripe */ - RF_PSSFlags_t flags; /* flags indicating various conditions */ - void *rbuf; /* this is the accumulating xor sum */ - void *writeRbuf; /* DEBUG ONLY: a pointer to the rbuf after it - * has filled & been sent to disk */ - void *rbufsForXor[RF_PS_MAX_BUFS]; /* these are buffers still to - * be xored into the - * accumulating sum */ - int xorBufCount; /* num buffers waiting to be xored */ - int blockCount; /* count of # proc that have blocked recon on - * this parity stripe */ - char *issued; /* issued[i]==1 <=> column i has already - * issued a read request for the indicated RU */ - RF_CallbackDesc_t *procWaitList; /* list of user procs waiting - * for recon to be done */ - RF_CallbackDesc_t *blockWaitList; /* list of disks blocked - * waiting for user write to - * complete */ - RF_CallbackDesc_t *bufWaitList; /* list of disks blocked waiting to - * acquire a buffer for this RU */ - RF_ReconParityStripeStatus_t *next; -}; - -struct RF_PSStatusHeader_s { - RF_DECLARE_MUTEX(mutex) /* mutex for this hash chain */ - RF_ReconParityStripeStatus_t *chain; /* the hash chain */ -}; -/* masks for the "flags" field above */ -#define RF_PSS_NONE 0x00000000 /* no flags */ -#define RF_PSS_UNDER_RECON 0x00000001 /* this parity stripe is - * currently under - * reconstruction */ -#define RF_PSS_FORCED_ON_WRITE 0x00000002 /* indicates a recon was - * forced due to a user-write - * operation */ -#define RF_PSS_FORCED_ON_READ 0x00000004 /* ditto for read, but not - * currently implemented */ -#define RF_PSS_RECON_BLOCKED 0x00000008 /* reconstruction is currently - * blocked due to a pending - * user I/O */ -#define RF_PSS_CREATE 0x00000010 /* tells LookupRUStatus to - * create the entry */ -#define RF_PSS_BUFFERWAIT 0x00000020 /* someone is waiting for a - * buffer for this RU */ - -int -rf_ConfigurePSStatus(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); - -RF_PSStatusHeader_t *rf_MakeParityStripeStatusTable(RF_Raid_t * raidPtr); -void -rf_FreeParityStripeStatusTable(RF_Raid_t * raidPtr, - RF_PSStatusHeader_t * pssTable); -RF_ReconParityStripeStatus_t * -rf_LookupRUStatus(RF_Raid_t * raidPtr, - RF_PSStatusHeader_t * pssTable, RF_StripeNum_t psID, - RF_ReconUnitNum_t which_ru, RF_PSSFlags_t flags, int *created); -void -rf_PSStatusDelete(RF_Raid_t * raidPtr, RF_PSStatusHeader_t * pssTable, - RF_ReconParityStripeStatus_t * pssPtr); -void -rf_RemoveFromActiveReconTable(RF_Raid_t * raidPtr, RF_RowCol_t row, - RF_StripeNum_t psid, RF_ReconUnitNum_t which_ru); -RF_ReconParityStripeStatus_t *rf_AllocPSStatus(RF_Raid_t * raidPtr); -void rf_FreePSStatus(RF_Raid_t * raidPtr, RF_ReconParityStripeStatus_t * p); -void rf_PrintPSStatusTable(RF_Raid_t * raidPtr, RF_RowCol_t row); - -#endif /* !_RF__RF_PSSTATUS_H_ */ diff --git a/sys/dev/raidframe/rf_raid.h b/sys/dev/raidframe/rf_raid.h deleted file mode 100644 index e91a2ae..0000000 --- a/sys/dev/raidframe/rf_raid.h +++ /dev/null @@ -1,299 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_raid.h,v 1.12 2000/02/24 17:12:10 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/********************************************** - * rf_raid.h -- main header file for RAID driver - **********************************************/ - - -#ifndef _RF__RF_RAID_H_ -#define _RF__RF_RAID_H_ - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> - -#include <dev/raidframe/rf_bsd.h> - -#include <sys/disklabel.h> -#include <sys/types.h> - -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_stripelocks.h> -#include <dev/raidframe/rf_layout.h> -#include <dev/raidframe/rf_disks.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_diskqueue.h> -#include <dev/raidframe/rf_reconstruct.h> -#include <dev/raidframe/rf_acctrace.h> - -#if RF_INCLUDE_PARITYLOGGING > 0 -#include <dev/raidframe/rf_paritylog.h> -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ - -#define RF_MAX_DISKS 128 /* max disks per array */ -#if defined(__NetBSD__) -#define RF_DEV2RAIDID(_dev) (DISKUNIT(_dev)) -#endif - -#define RF_COMPONENT_LABEL_VERSION_1 1 -#define RF_COMPONENT_LABEL_VERSION 2 -#define RF_RAID_DIRTY 0 -#define RF_RAID_CLEAN 1 - - -/* - * Each row in the array is a distinct parity group, so - * each has it's own status, which is one of the following. - */ -typedef enum RF_RowStatus_e { - rf_rs_optimal, - rf_rs_degraded, - rf_rs_reconstructing, - rf_rs_reconfigured -} RF_RowStatus_t; - -struct RF_CumulativeStats_s { - struct timeval start; /* the time when the stats were last started */ - struct timeval stop; /* the time when the stats were last stopped */ - long sum_io_us; /* sum of all user response times (us) */ - long num_ios; /* total number of I/Os serviced */ - long num_sect_moved; /* total number of sectors read or written */ -}; - -struct RF_ThroughputStats_s { - RF_DECLARE_MUTEX(mutex) /* a mutex used to lock the configuration - * stuff */ - struct timeval start; /* timer started when numOutstandingRequests - * moves from 0 to 1 */ - struct timeval stop; /* timer stopped when numOutstandingRequests - * moves from 1 to 0 */ - RF_uint64 sum_io_us; /* total time timer is enabled */ - RF_uint64 num_ios; /* total number of ios processed by RAIDframe */ - long num_out_ios; /* number of outstanding ios */ -}; - -struct RF_Raid_s { - /* This portion never changes, and can be accessed without locking */ - /* an exception is Disks[][].status, which requires locking when it is - * changed. XXX this is no longer true. numSpare and friends can - * change now. - */ - u_int numRow; /* number of rows of disks, typically == # of - * ranks */ - u_int numCol; /* number of columns of disks, typically == # - * of disks/rank */ - u_int numSpare; /* number of spare disks */ - int maxQueueDepth; /* max disk queue depth */ - RF_SectorCount_t totalSectors; /* total number of sectors in the - * array */ - RF_SectorCount_t sectorsPerDisk; /* number of sectors on each - * disk */ - u_int logBytesPerSector; /* base-2 log of the number of bytes - * in a sector */ - u_int bytesPerSector; /* bytes in a sector */ - RF_int32 sectorMask; /* mask of bytes-per-sector */ - - RF_RaidLayout_t Layout; /* all information related to layout */ - RF_RaidDisk_t **Disks; /* all information related to physical disks */ - RF_DiskQueue_t **Queues;/* all information related to disk queues */ - RF_DiskQueueSW_t *qType;/* pointer to the DiskQueueSW used for the - component queues. */ - /* NOTE: This is an anchor point via which the queues can be - * accessed, but the enqueue/dequeue routines in diskqueue.c use a - * local copy of this pointer for the actual accesses. */ - /* The remainder of the structure can change, and therefore requires - * locking on reads and updates */ - RF_DECLARE_MUTEX(mutex) /* mutex used to serialize access to - * the fields below */ - RF_RowStatus_t *status; /* the status of each row in the array */ - int valid; /* indicates successful configuration */ - RF_LockTableEntry_t *lockTable; /* stripe-lock table */ - RF_LockTableEntry_t *quiesceLock; /* quiesnce table */ - int numFailures; /* total number of failures in the array */ - int numNewFailures; /* number of *new* failures (that havn't - caused a mod_counter update */ - - int parity_good; /* !0 if parity is known to be correct */ - int serial_number; /* a "serial number" for this set */ - int mod_counter; /* modification counter for component labels */ - int clean; /* the clean bit for this array. */ - - int openings; /* Number of IO's which can be scheduled - simultaneously (high-level - not a - per-component limit)*/ - - int maxOutstanding; /* maxOutstanding requests (per-component) */ - int autoconfigure; /* automatically configure this RAID set. - 0 == no, 1 == yes */ - int root_partition; /* Use this set as / - 0 == no, 1 == yes*/ - int last_unit; /* last unit number (e.g. 0 for /dev/raid0) - of this component. Used for autoconfigure - only. */ - int config_order; /* 0 .. n. The order in which the component - should be auto-configured. E.g. 0 is will - done first, (and would become raid0). - This may be in conflict with last_unit!!?! */ - /* Not currently used. */ - - /* - * Cleanup stuff - */ - RF_ShutdownList_t *shutdownList; /* shutdown activities */ - RF_AllocListElem_t *cleanupList; /* memory to be freed at - * shutdown time */ - - /* - * Recon stuff - */ - RF_HeadSepLimit_t headSepLimit; - int numFloatingReconBufs; - int reconInProgress; - RF_DECLARE_COND(waitForReconCond) - RF_RaidReconDesc_t *reconDesc; /* reconstruction descriptor */ - RF_ReconCtrl_t **reconControl; /* reconstruction control structure - * pointers for each row in the array */ - - /* - * Array-quiescence stuff - */ - RF_DECLARE_MUTEX(access_suspend_mutex) - RF_DECLARE_COND(quiescent_cond) - RF_IoCount_t accesses_suspended; - RF_IoCount_t accs_in_flight; - int access_suspend_release; - int waiting_for_quiescence; - RF_CallbackDesc_t *quiesce_wait_list; - - /* - * Statistics - */ -#if !defined(_KERNEL) && !defined(SIMULATE) - RF_ThroughputStats_t throughputstats; -#endif /* !KERNEL && !SIMULATE */ - RF_CumulativeStats_t userstats; - int parity_rewrite_stripes_done; - int recon_stripes_done; - int copyback_stripes_done; - - int recon_in_progress; - int parity_rewrite_in_progress; - int copyback_in_progress; - - /* - * Engine thread control - */ - RF_DECLARE_MUTEX(node_queue_mutex) - RF_DECLARE_COND(node_queue_cond) - RF_DagNode_t *node_queue; - RF_Thread_t parity_rewrite_thread; - RF_Thread_t copyback_thread; - RF_Thread_t engine_thread; - RF_Thread_t recon_thread; - RF_ThreadGroup_t engine_tg; - int shutdown_engine; - int dags_in_flight; /* debug */ - - /* - * PSS (Parity Stripe Status) stuff - */ - RF_FreeList_t *pss_freelist; - long pssTableSize; - - /* - * Reconstruction stuff - */ - int procsInBufWait; - int numFullReconBuffers; - RF_AccTraceEntry_t *recon_tracerecs; - unsigned long accumXorTimeUs; - RF_ReconDoneProc_t *recon_done_procs; - RF_DECLARE_MUTEX(recon_done_proc_mutex) - /* - * nAccOutstanding, waitShutdown protected by desc freelist lock - * (This may seem strange, since that's a central serialization point - * for a per-array piece of data, but otherwise, it'd be an extra - * per-array lock, and that'd only be less efficient...) - */ - RF_DECLARE_COND(outstandingCond) - int waitShutdown; - int nAccOutstanding; - - RF_DiskId_t **diskids; - RF_DiskId_t *sparediskids; - - int raidid; - RF_AccTotals_t acc_totals; - int keep_acc_totals; - - struct raidcinfo **raid_cinfo; /* array of component info */ - - int terminate_disk_queues; - - /* - * XXX - * - * config-specific information should be moved - * somewhere else, or at least hung off this - * in some generic way - */ - - /* used by rf_compute_workload_shift */ - RF_RowCol_t hist_diskreq[RF_MAXROW][RF_MAXCOL]; - - /* used by declustering */ - int noRotate; - -#if RF_INCLUDE_PARITYLOGGING > 0 - /* used by parity logging */ - RF_SectorCount_t regionLogCapacity; - RF_ParityLogQueue_t parityLogPool; /* pool of unused parity logs */ - RF_RegionInfo_t *regionInfo; /* array of region state */ - int numParityLogs; - int numSectorsPerLog; - int regionParityRange; - int logsInUse; /* debugging */ - RF_ParityLogDiskQueue_t parityLogDiskQueue; /* state of parity - * logging disk work */ - RF_RegionBufferQueue_t regionBufferPool; /* buffers for holding - * region log */ - RF_RegionBufferQueue_t parityBufferPool; /* buffers for holding - * parity */ - caddr_t parityLogBufferHeap; /* pool of unused parity logs */ - RF_Thread_t pLogDiskThreadHandle; - -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ - /* Point back to the softc for this device. This is needed to rid - * ourselves of the ugly static device arrays. - * XXX Will this affect compatibility with NetBSD? - */ - void *sc; -}; -#endif /* !_RF__RF_RAID_H_ */ diff --git a/sys/dev/raidframe/rf_raid0.c b/sys/dev/raidframe/rf_raid0.c deleted file mode 100644 index 5eefabb..0000000 --- a/sys/dev/raidframe/rf_raid0.c +++ /dev/null @@ -1,163 +0,0 @@ -/* $NetBSD: rf_raid0.c,v 1.4 2000/01/07 03:41:02 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/*************************************** - * - * rf_raid0.c -- implements RAID Level 0 - * - ***************************************/ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_raid0.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagffrd.h> -#include <dev/raidframe/rf_dagffwr.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_configure.h> -#include <dev/raidframe/rf_parityscan.h> - -typedef struct RF_Raid0ConfigInfo_s { - RF_RowCol_t *stripeIdentifier; -} RF_Raid0ConfigInfo_t; - -int -rf_ConfigureRAID0( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_Raid0ConfigInfo_t *info; - RF_RowCol_t i; - - /* create a RAID level 0 configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_Raid0ConfigInfo_t), (RF_Raid0ConfigInfo_t *), raidPtr->cleanupList); - if (info == NULL) - return (ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - - RF_MallocAndAdd(info->stripeIdentifier, raidPtr->numCol * sizeof(RF_RowCol_t), (RF_RowCol_t *), raidPtr->cleanupList); - if (info->stripeIdentifier == NULL) - return (ENOMEM); - for (i = 0; i < raidPtr->numCol; i++) - info->stripeIdentifier[i] = i; - - RF_ASSERT(raidPtr->numRow == 1); - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * raidPtr->numCol * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; - layoutPtr->dataSectorsPerStripe = raidPtr->numCol * layoutPtr->sectorsPerStripeUnit; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = raidPtr->numCol; - layoutPtr->numParityCol = 0; - return (0); -} - -void -rf_MapSectorRAID0( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - *row = 0; - *col = SUID % raidPtr->numCol; - *diskSector = (SUID / raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); -} - -void -rf_MapParityRAID0( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - *row = *col = 0; - *diskSector = 0; -} - -void -rf_IdentifyStripeRAID0( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) -{ - RF_Raid0ConfigInfo_t *info; - - info = raidPtr->Layout.layoutSpecificInfo; - *diskids = info->stripeIdentifier; - *outRow = 0; -} - -void -rf_MapSIDToPSIDRAID0( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru) -{ - *which_ru = 0; - *psID = stripeID; -} - -void -rf_RAID0DagSelect( - RF_Raid_t * raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t * asmap, - RF_VoidFuncPtr * createFunc) -{ - *createFunc = ((type == RF_IO_TYPE_READ) ? - (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG : (RF_VoidFuncPtr) rf_CreateRAID0WriteDAG); -} - -int -rf_VerifyParityRAID0( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t * parityPDA, - int correct_it, - RF_RaidAccessFlags_t flags) -{ - /* - * No parity is always okay. - */ - return (RF_PARITY_OKAY); -} diff --git a/sys/dev/raidframe/rf_raid0.h b/sys/dev/raidframe/rf_raid0.h deleted file mode 100644 index 36aae81..0000000 --- a/sys/dev/raidframe/rf_raid0.h +++ /dev/null @@ -1,58 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_raid0.h,v 1.3 1999/02/05 00:06:15 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_raid0.h - header file for RAID Level 0 */ - -#ifndef _RF__RF_RAID0_H_ -#define _RF__RF_RAID0_H_ - -int -rf_ConfigureRAID0(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -void -rf_MapSectorRAID0(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapParityRAID0(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_IdentifyStripeRAID0(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -void -rf_MapSIDToPSIDRAID0(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru); -void -rf_RAID0DagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); -int -rf_VerifyParityRAID0(RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t * parityPDA, int correct_it, RF_RaidAccessFlags_t flags); - -#endif /* !_RF__RF_RAID0_H_ */ diff --git a/sys/dev/raidframe/rf_raid1.c b/sys/dev/raidframe/rf_raid1.c deleted file mode 100644 index 845e316..0000000 --- a/sys/dev/raidframe/rf_raid1.c +++ /dev/null @@ -1,691 +0,0 @@ -/* $NetBSD: rf_raid1.c,v 1.5 2000/01/08 22:57:30 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************** - * - * rf_raid1.c -- implements RAID Level 1 - * - *****************************************************************************/ - -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_raid1.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagffrd.h> -#include <dev/raidframe/rf_dagffwr.h> -#include <dev/raidframe/rf_dagdegrd.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_diskqueue.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_utils.h> -#include <dev/raidframe/rf_parityscan.h> -#include <dev/raidframe/rf_mcpair.h> -#include <dev/raidframe/rf_layout.h> -#include <dev/raidframe/rf_map.h> -#include <dev/raidframe/rf_engine.h> -#include <dev/raidframe/rf_reconbuffer.h> -#include <dev/raidframe/rf_kintf.h> - -typedef struct RF_Raid1ConfigInfo_s { - RF_RowCol_t **stripeIdentifier; -} RF_Raid1ConfigInfo_t; -/* start of day code specific to RAID level 1 */ -int -rf_ConfigureRAID1( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_Raid1ConfigInfo_t *info; - RF_RowCol_t i; - - /* create a RAID level 1 configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_Raid1ConfigInfo_t), (RF_Raid1ConfigInfo_t *), raidPtr->cleanupList); - if (info == NULL) - return (ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - - /* ... and fill it in. */ - info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol / 2, 2, raidPtr->cleanupList); - if (info->stripeIdentifier == NULL) - return (ENOMEM); - for (i = 0; i < (raidPtr->numCol / 2); i++) { - info->stripeIdentifier[i][0] = (2 * i); - info->stripeIdentifier[i][1] = (2 * i) + 1; - } - - RF_ASSERT(raidPtr->numRow == 1); - - /* this implementation of RAID level 1 uses one row of numCol disks - * and allows multiple (numCol / 2) stripes per row. A stripe - * consists of a single data unit and a single parity (mirror) unit. - * stripe id = raidAddr / stripeUnitSize */ - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * (raidPtr->numCol / 2) * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk * (raidPtr->numCol / 2); - layoutPtr->dataSectorsPerStripe = layoutPtr->sectorsPerStripeUnit; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = 1; - layoutPtr->numParityCol = 1; - return (0); -} - - -/* returns the physical disk location of the primary copy in the mirror pair */ -void -rf_MapSectorRAID1( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2); - - *row = 0; - *col = 2 * mirrorPair; - *diskSector = ((SUID / (raidPtr->numCol / 2)) * raidPtr->Layout.sectorsPerStripeUnit) + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); -} - - -/* Map Parity - * - * returns the physical disk location of the secondary copy in the mirror - * pair - */ -void -rf_MapParityRAID1( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2); - - *row = 0; - *col = (2 * mirrorPair) + 1; - - *diskSector = ((SUID / (raidPtr->numCol / 2)) * raidPtr->Layout.sectorsPerStripeUnit) + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); -} - - -/* IdentifyStripeRAID1 - * - * returns a list of disks for a given redundancy group - */ -void -rf_IdentifyStripeRAID1( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) -{ - RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr); - RF_Raid1ConfigInfo_t *info = raidPtr->Layout.layoutSpecificInfo; - RF_ASSERT(stripeID >= 0); - RF_ASSERT(addr >= 0); - *outRow = 0; - *diskids = info->stripeIdentifier[stripeID % (raidPtr->numCol / 2)]; - RF_ASSERT(*diskids); -} - - -/* MapSIDToPSIDRAID1 - * - * maps a logical stripe to a stripe in the redundant array - */ -void -rf_MapSIDToPSIDRAID1( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru) -{ - *which_ru = 0; - *psID = stripeID; -} - - - -/****************************************************************************** - * select a graph to perform a single-stripe access - * - * Parameters: raidPtr - description of the physical array - * type - type of operation (read or write) requested - * asmap - logical & physical addresses for this access - * createFunc - name of function to use to create the graph - *****************************************************************************/ - -void -rf_RAID1DagSelect( - RF_Raid_t * raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t * asmap, - RF_VoidFuncPtr * createFunc) -{ - RF_RowCol_t frow, fcol, or, oc; - RF_PhysDiskAddr_t *failedPDA; - int prior_recon; - RF_RowStatus_t rstat; - RF_SectorNum_t oo; - - - RF_ASSERT(RF_IO_IS_R_OR_W(type)); - - if (asmap->numDataFailed + asmap->numParityFailed > 1) { - RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); - *createFunc = NULL; - return; - } - if (asmap->numDataFailed + asmap->numParityFailed) { - /* - * We've got a fault. Re-map to spare space, iff applicable. - * Shouldn't the arch-independent code do this for us? - * Anyway, it turns out if we don't do this here, then when - * we're reconstructing, writes go only to the surviving - * original disk, and aren't reflected on the reconstructed - * spare. Oops. --jimz - */ - failedPDA = asmap->failedPDAs[0]; - frow = failedPDA->row; - fcol = failedPDA->col; - rstat = raidPtr->status[frow]; - prior_recon = (rstat == rf_rs_reconfigured) || ( - (rstat == rf_rs_reconstructing) ? - rf_CheckRUReconstructed(raidPtr->reconControl[frow]->reconMap, failedPDA->startSector) : 0 - ); - if (prior_recon) { - or = frow; - oc = fcol; - oo = failedPDA->startSector; - /* - * If we did distributed sparing, we'd monkey with that here. - * But we don't, so we'll - */ - failedPDA->row = raidPtr->Disks[frow][fcol].spareRow; - failedPDA->col = raidPtr->Disks[frow][fcol].spareCol; - /* - * Redirect other components, iff necessary. This looks - * pretty suspicious to me, but it's what the raid5 - * DAG select does. - */ - if (asmap->parityInfo->next) { - if (failedPDA == asmap->parityInfo) { - failedPDA->next->row = failedPDA->row; - failedPDA->next->col = failedPDA->col; - } else { - if (failedPDA == asmap->parityInfo->next) { - asmap->parityInfo->row = failedPDA->row; - asmap->parityInfo->col = failedPDA->col; - } - } - } - if (rf_dagDebug || rf_mapDebug) { - printf("raid%d: Redirected type '%c' r %d c %d o %ld -> r %d c %d o %ld\n", - raidPtr->raidid, type, or, oc, - (long) oo, failedPDA->row, - failedPDA->col, - (long) failedPDA->startSector); - } - asmap->numDataFailed = asmap->numParityFailed = 0; - } - } - if (type == RF_IO_TYPE_READ) { - if (asmap->numDataFailed == 0) - *createFunc = (RF_VoidFuncPtr) rf_CreateMirrorIdleReadDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneDegradedReadDAG; - } else { - *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; - } -} - -int -rf_VerifyParityRAID1( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t * parityPDA, - int correct_it, - RF_RaidAccessFlags_t flags) -{ - int nbytes, bcount, stripeWidth, ret, i, j, nbad, *bbufs; - RF_DagNode_t *blockNode, *unblockNode, *wrBlock; - RF_DagHeader_t *rd_dag_h, *wr_dag_h; - RF_AccessStripeMapHeader_t *asm_h; - RF_AllocListElem_t *allocList; - RF_AccTraceEntry_t tracerec; - RF_ReconUnitNum_t which_ru; - RF_RaidLayout_t *layoutPtr; - RF_AccessStripeMap_t *aasm; - RF_SectorCount_t nsector; - RF_RaidAddr_t startAddr; - char *buf, *buf1, *buf2; - RF_PhysDiskAddr_t *pda; - RF_StripeNum_t psID; - RF_MCPair_t *mcpair; - - layoutPtr = &raidPtr->Layout; - startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr); - nsector = parityPDA->numSector; - nbytes = rf_RaidAddressToByte(raidPtr, nsector); - psID = rf_RaidAddressToParityStripeID(layoutPtr, raidAddr, &which_ru); - - asm_h = NULL; - rd_dag_h = wr_dag_h = NULL; - mcpair = NULL; - - ret = RF_PARITY_COULD_NOT_VERIFY; - - rf_MakeAllocList(allocList); - if (allocList == NULL) - return (RF_PARITY_COULD_NOT_VERIFY); - mcpair = rf_AllocMCPair(); - if (mcpair == NULL) - goto done; - RF_ASSERT(layoutPtr->numDataCol == layoutPtr->numParityCol); - stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; - bcount = nbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol); - RF_MallocAndAdd(buf, bcount, (char *), allocList); - if (buf == NULL) - goto done; - if (rf_verifyParityDebug) { - printf("raid%d: RAID1 parity verify: buf=%lx bcount=%d (%lx - %lx)\n", - raidPtr->raidid, (long) buf, bcount, (long) buf, - (long) buf + bcount); - } - /* - * Generate a DAG which will read the entire stripe- then we can - * just compare data chunks versus "parity" chunks. - */ - - rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, nbytes, buf, - rf_DiskReadFunc, rf_DiskReadUndoFunc, "Rod", allocList, flags, - RF_IO_NORMAL_PRIORITY); - if (rd_dag_h == NULL) - goto done; - blockNode = rd_dag_h->succedents[0]; - unblockNode = blockNode->succedents[0]->succedents[0]; - - /* - * Map the access to physical disk addresses (PDAs)- this will - * get us both a list of data addresses, and "parity" addresses - * (which are really mirror copies). - */ - asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, - buf, RF_DONT_REMAP); - aasm = asm_h->stripeMap; - - buf1 = buf; - /* - * Loop through the data blocks, setting up read nodes for each. - */ - for (pda = aasm->physInfo, i = 0; i < layoutPtr->numDataCol; i++, pda = pda->next) { - RF_ASSERT(pda); - - rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); - - RF_ASSERT(pda->numSector != 0); - if (rf_TryToRedirectPDA(raidPtr, pda, 0)) { - /* cannot verify parity with dead disk */ - goto done; - } - pda->bufPtr = buf1; - blockNode->succedents[i]->params[0].p = pda; - blockNode->succedents[i]->params[1].p = buf1; - blockNode->succedents[i]->params[2].v = psID; - blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - buf1 += nbytes; - } - RF_ASSERT(pda == NULL); - /* - * keep i, buf1 running - * - * Loop through parity blocks, setting up read nodes for each. - */ - for (pda = aasm->parityInfo; i < layoutPtr->numDataCol + layoutPtr->numParityCol; i++, pda = pda->next) { - RF_ASSERT(pda); - rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); - RF_ASSERT(pda->numSector != 0); - if (rf_TryToRedirectPDA(raidPtr, pda, 0)) { - /* cannot verify parity with dead disk */ - goto done; - } - pda->bufPtr = buf1; - blockNode->succedents[i]->params[0].p = pda; - blockNode->succedents[i]->params[1].p = buf1; - blockNode->succedents[i]->params[2].v = psID; - blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - buf1 += nbytes; - } - RF_ASSERT(pda == NULL); - - bzero((char *) &tracerec, sizeof(tracerec)); - rd_dag_h->tracerec = &tracerec; - - if (rf_verifyParityDebug > 1) { - printf("raid%d: RAID1 parity verify read dag:\n", - raidPtr->raidid); - rf_PrintDAGList(rd_dag_h); - } - RF_LOCK_MUTEX(mcpair->mutex); - mcpair->flag = 0; - rf_DispatchDAG(rd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, - (void *) mcpair); - while (mcpair->flag == 0) { - RF_WAIT_MCPAIR(mcpair); - } - RF_UNLOCK_MUTEX(mcpair->mutex); - - if (rd_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to verify raid1 parity: can't read stripe\n"); - ret = RF_PARITY_COULD_NOT_VERIFY; - goto done; - } - /* - * buf1 is the beginning of the data blocks chunk - * buf2 is the beginning of the parity blocks chunk - */ - buf1 = buf; - buf2 = buf + (nbytes * layoutPtr->numDataCol); - ret = RF_PARITY_OKAY; - /* - * bbufs is "bad bufs"- an array whose entries are the data - * column numbers where we had miscompares. (That is, column 0 - * and column 1 of the array are mirror copies, and are considered - * "data column 0" for this purpose). - */ - RF_MallocAndAdd(bbufs, layoutPtr->numParityCol * sizeof(int), (int *), - allocList); - nbad = 0; - /* - * Check data vs "parity" (mirror copy). - */ - for (i = 0; i < layoutPtr->numDataCol; i++) { - if (rf_verifyParityDebug) { - printf("raid%d: RAID1 parity verify %d bytes: i=%d buf1=%lx buf2=%lx buf=%lx\n", - raidPtr->raidid, nbytes, i, (long) buf1, - (long) buf2, (long) buf); - } - ret = bcmp(buf1, buf2, nbytes); - if (ret) { - if (rf_verifyParityDebug > 1) { - for (j = 0; j < nbytes; j++) { - if (buf1[j] != buf2[j]) - break; - } - printf("psid=%ld j=%d\n", (long) psID, j); - printf("buf1 %02x %02x %02x %02x %02x\n", buf1[0] & 0xff, - buf1[1] & 0xff, buf1[2] & 0xff, buf1[3] & 0xff, buf1[4] & 0xff); - printf("buf2 %02x %02x %02x %02x %02x\n", buf2[0] & 0xff, - buf2[1] & 0xff, buf2[2] & 0xff, buf2[3] & 0xff, buf2[4] & 0xff); - } - if (rf_verifyParityDebug) { - printf("raid%d: RAID1: found bad parity, i=%d\n", raidPtr->raidid, i); - } - /* - * Parity is bad. Keep track of which columns were bad. - */ - if (bbufs) - bbufs[nbad] = i; - nbad++; - ret = RF_PARITY_BAD; - } - buf1 += nbytes; - buf2 += nbytes; - } - - if ((ret != RF_PARITY_OKAY) && correct_it) { - ret = RF_PARITY_COULD_NOT_CORRECT; - if (rf_verifyParityDebug) { - printf("raid%d: RAID1 parity verify: parity not correct\n", raidPtr->raidid); - } - if (bbufs == NULL) - goto done; - /* - * Make a DAG with one write node for each bad unit. We'll simply - * write the contents of the data unit onto the parity unit for - * correction. (It's possible that the mirror copy was the correct - * copy, and that we're spooging good data by writing bad over it, - * but there's no way we can know that. - */ - wr_dag_h = rf_MakeSimpleDAG(raidPtr, nbad, nbytes, buf, - rf_DiskWriteFunc, rf_DiskWriteUndoFunc, "Wnp", allocList, flags, - RF_IO_NORMAL_PRIORITY); - if (wr_dag_h == NULL) - goto done; - wrBlock = wr_dag_h->succedents[0]; - /* - * Fill in a write node for each bad compare. - */ - for (i = 0; i < nbad; i++) { - j = i + layoutPtr->numDataCol; - pda = blockNode->succedents[j]->params[0].p; - pda->bufPtr = blockNode->succedents[i]->params[1].p; - wrBlock->succedents[i]->params[0].p = pda; - wrBlock->succedents[i]->params[1].p = pda->bufPtr; - wrBlock->succedents[i]->params[2].v = psID; - wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - bzero((char *) &tracerec, sizeof(tracerec)); - wr_dag_h->tracerec = &tracerec; - if (rf_verifyParityDebug > 1) { - printf("Parity verify write dag:\n"); - rf_PrintDAGList(wr_dag_h); - } - RF_LOCK_MUTEX(mcpair->mutex); - mcpair->flag = 0; - /* fire off the write DAG */ - rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, - (void *) mcpair); - while (!mcpair->flag) { - RF_WAIT_COND(mcpair->cond, mcpair->mutex); - } - RF_UNLOCK_MUTEX(mcpair->mutex); - if (wr_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to correct RAID1 parity in VerifyParity\n"); - goto done; - } - ret = RF_PARITY_CORRECTED; - } -done: - /* - * All done. We might've gotten here without doing part of the function, - * so cleanup what we have to and return our running status. - */ - if (asm_h) - rf_FreeAccessStripeMap(asm_h); - if (rd_dag_h) - rf_FreeDAG(rd_dag_h); - if (wr_dag_h) - rf_FreeDAG(wr_dag_h); - if (mcpair) - rf_FreeMCPair(mcpair); - rf_FreeAllocList(allocList); - if (rf_verifyParityDebug) { - printf("raid%d: RAID1 parity verify, returning %d\n", - raidPtr->raidid, ret); - } - return (ret); -} - -int -rf_SubmitReconBufferRAID1(rbuf, keep_it, use_committed) - RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */ - int keep_it; /* whether we can keep this buffer or we have - * to return it */ - int use_committed; /* whether to use a committed or an available - * recon buffer */ -{ - RF_ReconParityStripeStatus_t *pssPtr; - RF_ReconCtrl_t *reconCtrlPtr; - RF_RaidLayout_t *layoutPtr; - int retcode, created; - RF_CallbackDesc_t *cb, *p; - RF_ReconBuffer_t *t; - RF_Raid_t *raidPtr; - caddr_t ta; - - retcode = 0; - created = 0; - - raidPtr = rbuf->raidPtr; - layoutPtr = &raidPtr->Layout; - reconCtrlPtr = raidPtr->reconControl[rbuf->row]; - - RF_ASSERT(rbuf); - RF_ASSERT(rbuf->col != reconCtrlPtr->fcol); - - if (rf_reconbufferDebug) { - printf("raid%d: RAID1 reconbuffer submission r%d c%d psid %ld ru%d (failed offset %ld)\n", - raidPtr->raidid, rbuf->row, rbuf->col, - (long) rbuf->parityStripeID, rbuf->which_ru, - (long) rbuf->failedDiskSectorOffset); - } - if (rf_reconDebug) { - printf("RAID1 reconbuffer submit psid %ld buf %lx\n", - (long) rbuf->parityStripeID, (long) rbuf->buffer); - printf("RAID1 psid %ld %02x %02x %02x %02x %02x\n", - (long) rbuf->parityStripeID, - rbuf->buffer[0], rbuf->buffer[1], rbuf->buffer[2], rbuf->buffer[3], - rbuf->buffer[4]); - } - RF_LOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); - - RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); - - pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, - rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created); - RF_ASSERT(pssPtr); /* if it didn't exist, we wouldn't have gotten - * an rbuf for it */ - - /* - * Since this is simple mirroring, the first submission for a stripe is also - * treated as the last. - */ - - t = NULL; - if (keep_it) { - if (rf_reconbufferDebug) { - printf("raid%d: RAID1 rbuf submission: keeping rbuf\n", - raidPtr->raidid); - } - t = rbuf; - } else { - if (use_committed) { - if (rf_reconbufferDebug) { - printf("raid%d: RAID1 rbuf submission: using committed rbuf\n", raidPtr->raidid); - } - t = reconCtrlPtr->committedRbufs; - RF_ASSERT(t); - reconCtrlPtr->committedRbufs = t->next; - t->next = NULL; - } else - if (reconCtrlPtr->floatingRbufs) { - if (rf_reconbufferDebug) { - printf("raid%d: RAID1 rbuf submission: using floating rbuf\n", raidPtr->raidid); - } - t = reconCtrlPtr->floatingRbufs; - reconCtrlPtr->floatingRbufs = t->next; - t->next = NULL; - } - } - if (t == NULL) { - if (rf_reconbufferDebug) { - printf("raid%d: RAID1 rbuf submission: waiting for rbuf\n", raidPtr->raidid); - } - RF_ASSERT((keep_it == 0) && (use_committed == 0)); - raidPtr->procsInBufWait++; - if ((raidPtr->procsInBufWait == (raidPtr->numCol - 1)) - && (raidPtr->numFullReconBuffers == 0)) { - /* ruh-ro */ - RF_ERRORMSG("Buffer wait deadlock\n"); - rf_PrintPSStatusTable(raidPtr, rbuf->row); - RF_PANIC(); - } - pssPtr->flags |= RF_PSS_BUFFERWAIT; - cb = rf_AllocCallbackDesc(); - cb->row = rbuf->row; - cb->col = rbuf->col; - cb->callbackArg.v = rbuf->parityStripeID; - cb->callbackArg2.v = rbuf->which_ru; - cb->next = NULL; - if (reconCtrlPtr->bufferWaitList == NULL) { - /* we are the wait list- lucky us */ - reconCtrlPtr->bufferWaitList = cb; - } else { - /* append to wait list */ - for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next); - p->next = cb; - } - retcode = 1; - goto out; - } - if (t != rbuf) { - t->row = rbuf->row; - t->col = reconCtrlPtr->fcol; - t->parityStripeID = rbuf->parityStripeID; - t->which_ru = rbuf->which_ru; - t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset; - t->spRow = rbuf->spRow; - t->spCol = rbuf->spCol; - t->spOffset = rbuf->spOffset; - /* Swap buffers. DANCE! */ - ta = t->buffer; - t->buffer = rbuf->buffer; - rbuf->buffer = ta; - } - /* - * Use the rbuf we've been given as the target. - */ - RF_ASSERT(pssPtr->rbuf == NULL); - pssPtr->rbuf = t; - - t->count = 1; - /* - * Below, we use 1 for numDataCol (which is equal to the count in the - * previous line), so we'll always be done. - */ - rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, 1); - -out: - RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); - RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); - if (rf_reconbufferDebug) { - printf("raid%d: RAID1 rbuf submission: returning %d\n", - raidPtr->raidid, retcode); - } - return (retcode); -} diff --git a/sys/dev/raidframe/rf_raid1.h b/sys/dev/raidframe/rf_raid1.h deleted file mode 100644 index 484cbcf..0000000 --- a/sys/dev/raidframe/rf_raid1.h +++ /dev/null @@ -1,63 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_raid1.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: William V. Courtright II - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* header file for RAID Level 1 */ - -#ifndef _RF__RF_RAID1_H_ -#define _RF__RF_RAID1_H_ - -#include <dev/raidframe/rf_types.h> - -int -rf_ConfigureRAID1(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -void -rf_MapSectorRAID1(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapParityRAID1(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_IdentifyStripeRAID1(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -void -rf_MapSIDToPSIDRAID1(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru); -void -rf_RAID1DagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); -int -rf_VerifyParityRAID1(RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t * parityPDA, int correct_it, RF_RaidAccessFlags_t flags); -int -rf_SubmitReconBufferRAID1(RF_ReconBuffer_t * rbuf, int keep_int, - int use_committed); - -#endif /* !_RF__RF_RAID1_H_ */ diff --git a/sys/dev/raidframe/rf_raid4.c b/sys/dev/raidframe/rf_raid4.c deleted file mode 100644 index d080319..0000000 --- a/sys/dev/raidframe/rf_raid4.c +++ /dev/null @@ -1,159 +0,0 @@ -/* $NetBSD: rf_raid4.c,v 1.4 2000/01/07 03:41:02 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Rachad Youssef - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/*************************************** - * - * rf_raid4.c -- implements RAID Level 4 - * - ***************************************/ - -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_dagffrd.h> -#include <dev/raidframe/rf_dagffwr.h> -#include <dev/raidframe/rf_dagdegrd.h> -#include <dev/raidframe/rf_dagdegwr.h> -#include <dev/raidframe/rf_raid4.h> -#include <dev/raidframe/rf_general.h> - -typedef struct RF_Raid4ConfigInfo_s { - RF_RowCol_t *stripeIdentifier; /* filled in at config time & used by - * IdentifyStripe */ -} RF_Raid4ConfigInfo_t; - - - -int -rf_ConfigureRAID4( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_Raid4ConfigInfo_t *info; - int i; - - /* create a RAID level 4 configuration structure ... */ - RF_MallocAndAdd(info, sizeof(RF_Raid4ConfigInfo_t), (RF_Raid4ConfigInfo_t *), raidPtr->cleanupList); - if (info == NULL) - return (ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - - /* ... and fill it in. */ - RF_MallocAndAdd(info->stripeIdentifier, raidPtr->numCol * sizeof(RF_RowCol_t), (RF_RowCol_t *), raidPtr->cleanupList); - if (info->stripeIdentifier == NULL) - return (ENOMEM); - for (i = 0; i < raidPtr->numCol; i++) - info->stripeIdentifier[i] = i; - - RF_ASSERT(raidPtr->numRow == 1); - - /* fill in the remaining layout parameters */ - layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = raidPtr->numCol - 1; - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numParityCol = 1; - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - - return (0); -} - -int -rf_GetDefaultNumFloatingReconBuffersRAID4(RF_Raid_t * raidPtr) -{ - return (20); -} - -RF_HeadSepLimit_t -rf_GetDefaultHeadSepLimitRAID4(RF_Raid_t * raidPtr) -{ - return (20); -} - -void -rf_MapSectorRAID4( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - *row = 0; - *col = SUID % raidPtr->Layout.numDataCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); -} - -void -rf_MapParityRAID4( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - - *row = 0; - *col = raidPtr->Layout.numDataCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); -} - -void -rf_IdentifyStripeRAID4( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) -{ - RF_Raid4ConfigInfo_t *info = raidPtr->Layout.layoutSpecificInfo; - - *outRow = 0; - *diskids = info->stripeIdentifier; -} - -void -rf_MapSIDToPSIDRAID4( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru) -{ - *which_ru = 0; - *psID = stripeID; -} diff --git a/sys/dev/raidframe/rf_raid4.h b/sys/dev/raidframe/rf_raid4.h deleted file mode 100644 index 56df05a..0000000 --- a/sys/dev/raidframe/rf_raid4.h +++ /dev/null @@ -1,57 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_raid4.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Rachad Youssef - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_raid4.h header file for RAID Level 4 */ - -#ifndef _RF__RF_RAID4_H_ -#define _RF__RF_RAID4_H_ - -int -rf_ConfigureRAID4(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int rf_GetDefaultNumFloatingReconBuffersRAID4(RF_Raid_t * raidPtr); -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitRAID4(RF_Raid_t * raidPtr); -void -rf_MapSectorRAID4(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapParityRAID4(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_IdentifyStripeRAID4(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -void -rf_MapSIDToPSIDRAID4(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru); -void -rf_RAID4DagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); - -#endif /* !_RF__RF_RAID4_H_ */ diff --git a/sys/dev/raidframe/rf_raid5.c b/sys/dev/raidframe/rf_raid5.c deleted file mode 100644 index 794e5a3..0000000 --- a/sys/dev/raidframe/rf_raid5.c +++ /dev/null @@ -1,322 +0,0 @@ -/* $NetBSD: rf_raid5.c,v 1.4 2000/01/08 22:57:30 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/****************************************************************************** - * - * rf_raid5.c -- implements RAID Level 5 - * - *****************************************************************************/ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_raid5.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagffrd.h> -#include <dev/raidframe/rf_dagffwr.h> -#include <dev/raidframe/rf_dagdegrd.h> -#include <dev/raidframe/rf_dagdegwr.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_map.h> -#include <dev/raidframe/rf_utils.h> - -typedef struct RF_Raid5ConfigInfo_s { - RF_RowCol_t **stripeIdentifier; /* filled in at config time and used - * by IdentifyStripe */ -} RF_Raid5ConfigInfo_t; - -int -rf_ConfigureRAID5( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_Raid5ConfigInfo_t *info; - RF_RowCol_t i, j, startdisk; - - /* create a RAID level 5 configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_Raid5ConfigInfo_t), (RF_Raid5ConfigInfo_t *), raidPtr->cleanupList); - if (info == NULL) - return (ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - - RF_ASSERT(raidPtr->numRow == 1); - - /* the stripe identifier must identify the disks in each stripe, IN - * THE ORDER THAT THEY APPEAR IN THE STRIPE. */ - info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, raidPtr->numCol, raidPtr->cleanupList); - if (info->stripeIdentifier == NULL) - return (ENOMEM); - startdisk = 0; - for (i = 0; i < raidPtr->numCol; i++) { - for (j = 0; j < raidPtr->numCol; j++) { - info->stripeIdentifier[i][j] = (startdisk + j) % raidPtr->numCol; - } - if ((--startdisk) < 0) - startdisk = raidPtr->numCol - 1; - } - - /* fill in the remaining layout parameters */ - layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = raidPtr->numCol - 1; - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numParityCol = 1; - layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk; - - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - - return (0); -} - -int -rf_GetDefaultNumFloatingReconBuffersRAID5(RF_Raid_t * raidPtr) -{ - return (20); -} - -RF_HeadSepLimit_t -rf_GetDefaultHeadSepLimitRAID5(RF_Raid_t * raidPtr) -{ - return (10); -} -#if !defined(__NetBSD__) && !defined(__FreeBSD__) && !defined(_KERNEL) -/* not currently used */ -int -rf_ShutdownRAID5(RF_Raid_t * raidPtr) -{ - return (0); -} -#endif - -void -rf_MapSectorRAID5( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - *row = 0; - *col = (SUID % raidPtr->numCol); - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); -} - -void -rf_MapParityRAID5( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - - *row = 0; - *col = raidPtr->Layout.numDataCol - (SUID / raidPtr->Layout.numDataCol) % raidPtr->numCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); -} - -void -rf_IdentifyStripeRAID5( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) -{ - RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr); - RF_Raid5ConfigInfo_t *info = (RF_Raid5ConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - - *outRow = 0; - *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol]; -} - -void -rf_MapSIDToPSIDRAID5( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru) -{ - *which_ru = 0; - *psID = stripeID; -} -/* select an algorithm for performing an access. Returns two pointers, - * one to a function that will return information about the DAG, and - * another to a function that will create the dag. - */ -void -rf_RaidFiveDagSelect( - RF_Raid_t * raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t * asmap, - RF_VoidFuncPtr * createFunc) -{ - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_PhysDiskAddr_t *failedPDA = NULL; - RF_RowCol_t frow, fcol; - RF_RowStatus_t rstat; - int prior_recon; - - RF_ASSERT(RF_IO_IS_R_OR_W(type)); - - if (asmap->numDataFailed + asmap->numParityFailed > 1) { - RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); - /* *infoFunc = */ *createFunc = NULL; - return; - } else - if (asmap->numDataFailed + asmap->numParityFailed == 1) { - - /* if under recon & already reconstructed, redirect - * the access to the spare drive and eliminate the - * failure indication */ - failedPDA = asmap->failedPDAs[0]; - frow = failedPDA->row; - fcol = failedPDA->col; - rstat = raidPtr->status[failedPDA->row]; - prior_recon = (rstat == rf_rs_reconfigured) || ( - (rstat == rf_rs_reconstructing) ? - rf_CheckRUReconstructed(raidPtr->reconControl[frow]->reconMap, failedPDA->startSector) : 0 - ); - if (prior_recon) { - RF_RowCol_t or = failedPDA->row, oc = failedPDA->col; - RF_SectorNum_t oo = failedPDA->startSector; - - if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) { /* redirect to dist - * spare space */ - - if (failedPDA == asmap->parityInfo) { - - /* parity has failed */ - (layoutPtr->map->MapParity) (raidPtr, failedPDA->raidAddress, &failedPDA->row, - &failedPDA->col, &failedPDA->startSector, RF_REMAP); - - if (asmap->parityInfo->next) { /* redir 2nd component, - * if any */ - RF_PhysDiskAddr_t *p = asmap->parityInfo->next; - RF_SectorNum_t SUoffs = p->startSector % layoutPtr->sectorsPerStripeUnit; - p->row = failedPDA->row; - p->col = failedPDA->col; - p->startSector = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, failedPDA->startSector) + - SUoffs; /* cheating: - * startSector is not - * really a RAID address */ - } - } else - if (asmap->parityInfo->next && failedPDA == asmap->parityInfo->next) { - RF_ASSERT(0); /* should not ever - * happen */ - } else { - - /* data has failed */ - (layoutPtr->map->MapSector) (raidPtr, failedPDA->raidAddress, &failedPDA->row, - &failedPDA->col, &failedPDA->startSector, RF_REMAP); - - } - - } else { /* redirect to dedicated spare - * space */ - - failedPDA->row = raidPtr->Disks[frow][fcol].spareRow; - failedPDA->col = raidPtr->Disks[frow][fcol].spareCol; - - /* the parity may have two distinct - * components, both of which may need - * to be redirected */ - if (asmap->parityInfo->next) { - if (failedPDA == asmap->parityInfo) { - failedPDA->next->row = failedPDA->row; - failedPDA->next->col = failedPDA->col; - } else - if (failedPDA == asmap->parityInfo->next) { /* paranoid: should - * never occur */ - asmap->parityInfo->row = failedPDA->row; - asmap->parityInfo->col = failedPDA->col; - } - } - } - - RF_ASSERT(failedPDA->col != -1); - - if (rf_dagDebug || rf_mapDebug) { - printf("raid%d: Redirected type '%c' r %d c %d o %ld -> r %d c %d o %ld\n", - raidPtr->raidid, type, or, oc, - (long) oo, failedPDA->row, - failedPDA->col, - (long) failedPDA->startSector); - } - asmap->numDataFailed = asmap->numParityFailed = 0; - } - } - /* all dags begin/end with block/unblock node therefore, hdrSucc & - * termAnt counts should always be 1 also, these counts should not be - * visible outside dag creation routines - manipulating the counts - * here should be removed */ - if (type == RF_IO_TYPE_READ) { - if (asmap->numDataFailed == 0) - *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; - else - *createFunc = (RF_VoidFuncPtr) rf_CreateRaidFiveDegradedReadDAG; - } else { - - - /* if mirroring, always use large writes. If the access - * requires two distinct parity updates, always do a small - * write. If the stripe contains a failure but the access - * does not, do a small write. The first conditional - * (numStripeUnitsAccessed <= numDataCol/2) uses a - * less-than-or-equal rather than just a less-than because - * when G is 3 or 4, numDataCol/2 is 1, and I want - * single-stripe-unit updates to use just one disk. */ - if ((asmap->numDataFailed + asmap->numParityFailed) == 0) { - if (rf_suppressLocksAndLargeWrites || - (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) || - (asmap->parityInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) { - *createFunc = (RF_VoidFuncPtr) rf_CreateSmallWriteDAG; - } else - *createFunc = (RF_VoidFuncPtr) rf_CreateLargeWriteDAG; - } else { - if (asmap->numParityFailed == 1) - *createFunc = (RF_VoidFuncPtr) rf_CreateNonRedundantWriteDAG; - else - if (asmap->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit) - *createFunc = NULL; - else - *createFunc = (RF_VoidFuncPtr) rf_CreateDegradedWriteDAG; - } - } -} diff --git a/sys/dev/raidframe/rf_raid5.h b/sys/dev/raidframe/rf_raid5.h deleted file mode 100644 index 17549fe..0000000 --- a/sys/dev/raidframe/rf_raid5.h +++ /dev/null @@ -1,57 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_raid5.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_raid5.h - header file for RAID Level 5 */ - -#ifndef _RF__RF_RAID5_H_ -#define _RF__RF_RAID5_H_ - -int -rf_ConfigureRAID5(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int rf_GetDefaultNumFloatingReconBuffersRAID5(RF_Raid_t * raidPtr); -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitRAID5(RF_Raid_t * raidPtr); -void -rf_MapSectorRAID5(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapParityRAID5(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_IdentifyStripeRAID5(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -void -rf_MapSIDToPSIDRAID5(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru); -void -rf_RaidFiveDagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); - -#endif /* !_RF__RF_RAID5_H_ */ diff --git a/sys/dev/raidframe/rf_raid5_rotatedspare.c b/sys/dev/raidframe/rf_raid5_rotatedspare.c deleted file mode 100644 index f167a5f..0000000 --- a/sys/dev/raidframe/rf_raid5_rotatedspare.c +++ /dev/null @@ -1,177 +0,0 @@ -/* $NetBSD: rf_raid5_rotatedspare.c,v 1.5 2001/01/26 05:16:58 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Khalil Amiri - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/************************************************************************** - * - * rf_raid5_rotated_spare.c -- implements RAID Level 5 with rotated sparing - * - **************************************************************************/ - -#include <dev/raidframe/rf_archs.h> - -#if RF_INCLUDE_RAID5_RS > 0 - -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_raid5.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_dagfuncs.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_utils.h> -#include <dev/raidframe/rf_raid5_rotatedspare.h> - -typedef struct RF_Raid5RSConfigInfo_s { - RF_RowCol_t **stripeIdentifier; /* filled in at config time & used by - * IdentifyStripe */ -} RF_Raid5RSConfigInfo_t; - -int -rf_ConfigureRAID5_RS( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_Raid5RSConfigInfo_t *info; - RF_RowCol_t i, j, startdisk; - - /* create a RAID level 5 configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_Raid5RSConfigInfo_t), (RF_Raid5RSConfigInfo_t *), raidPtr->cleanupList); - if (info == NULL) - return (ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - - RF_ASSERT(raidPtr->numRow == 1); - RF_ASSERT(raidPtr->numCol >= 3); - - /* the stripe identifier must identify the disks in each stripe, IN - * THE ORDER THAT THEY APPEAR IN THE STRIPE. */ - info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, raidPtr->numCol, raidPtr->cleanupList); - if (info->stripeIdentifier == NULL) - return (ENOMEM); - startdisk = 0; - for (i = 0; i < raidPtr->numCol; i++) { - for (j = 0; j < raidPtr->numCol; j++) { - info->stripeIdentifier[i][j] = (startdisk + j) % raidPtr->numCol; - } - if ((--startdisk) < 0) - startdisk = raidPtr->numCol - 1; - } - - /* fill in the remaining layout parameters */ - layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = raidPtr->numCol - 2; - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numParityCol = 1; - layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk; - raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; - - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - - return (0); -} - -RF_ReconUnitCount_t -rf_GetNumSpareRUsRAID5_RS(raidPtr) - RF_Raid_t *raidPtr; -{ - return (raidPtr->Layout.stripeUnitsPerDisk / raidPtr->numCol); -} - -void -rf_MapSectorRAID5_RS( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - - *row = 0; - if (remap) { - *col = raidPtr->numCol - 1 - (1 + SUID / raidPtr->Layout.numDataCol) % raidPtr->numCol; - *col = (*col + 1) % raidPtr->numCol; /* spare unit is rotated - * with parity; line - * above maps to parity */ - } else { - *col = (SUID + (SUID / raidPtr->Layout.numDataCol)) % raidPtr->numCol; - } - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); -} - -void -rf_MapParityRAID5_RS( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) -{ - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - - *row = 0; - *col = raidPtr->numCol - 1 - (1 + SUID / raidPtr->Layout.numDataCol) % raidPtr->numCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - if (remap) - *col = (*col + 1) % raidPtr->numCol; -} - -void -rf_IdentifyStripeRAID5_RS( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) -{ - RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr); - RF_Raid5RSConfigInfo_t *info = (RF_Raid5RSConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - *outRow = 0; - *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol]; - -} - -void -rf_MapSIDToPSIDRAID5_RS( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru) -{ - *which_ru = 0; - *psID = stripeID; -} -#endif /* RF_INCLUDE_RAID5_RS > 0 */ diff --git a/sys/dev/raidframe/rf_raid5_rotatedspare.h b/sys/dev/raidframe/rf_raid5_rotatedspare.h deleted file mode 100644 index 779150f..0000000 --- a/sys/dev/raidframe/rf_raid5_rotatedspare.h +++ /dev/null @@ -1,53 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_raid5_rotatedspare.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Khalil Amiri - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_raid5_rotatedspare.h - header file for RAID Level 5 with rotated sparing */ - -#ifndef _RF__RF_RAID5_ROTATEDSPARE_H_ -#define _RF__RF_RAID5_ROTATEDSPARE_H_ - -int -rf_ConfigureRAID5_RS(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -RF_ReconUnitCount_t rf_GetNumSpareRUsRAID5_RS(RF_Raid_t * raidPtr); -void -rf_MapSectorRAID5_RS(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapParityRAID5_RS(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_IdentifyStripeRAID5_RS(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -void -rf_MapSIDToPSIDRAID5_RS(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru); - -#endif /* !_RF__RF_RAID5_ROTATEDSPARE_H_ */ diff --git a/sys/dev/raidframe/rf_raidframe.h b/sys/dev/raidframe/rf_raidframe.h deleted file mode 100644 index fd711bd..0000000 --- a/sys/dev/raidframe/rf_raidframe.h +++ /dev/null @@ -1,162 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_raidframe.h,v 1.11 2000/05/28 00:48:31 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************** - * - * rf_raidframe.h - * - * main header file for using raidframe in the kernel. - * - *****************************************************/ - - -#ifndef _RF__RF_RAIDFRAME_H_ -#define _RF__RF_RAIDFRAME_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_configure.h> -#include <dev/raidframe/rf_disks.h> -#include <dev/raidframe/rf_raid.h> - -typedef RF_uint32 RF_ReconReqFlags_t; - -struct rf_recon_req { /* used to tell the kernel to fail a disk */ - RF_RowCol_t row, col; - RF_ReconReqFlags_t flags; - void *raidPtr; /* used internally; need not be set at ioctl - * time */ - struct rf_recon_req *next; /* used internally; need not be set at - * ioctl time */ -}; - -struct RF_SparetWait_s { - int C, G, fcol; /* C = # disks in row, G = # units in stripe, - * fcol = which disk has failed */ - - RF_StripeCount_t SUsPerPU; /* this stuff is the info required to - * create a spare table */ - int TablesPerSpareRegion; - int BlocksPerTable; - RF_StripeCount_t TableDepthInPUs; - RF_StripeCount_t SpareSpaceDepthPerRegionInSUs; - - RF_SparetWait_t *next; /* used internally; need not be set at ioctl - * time */ -}; - -typedef struct RF_DeviceConfig_s { - u_int rows; - u_int cols; - u_int maxqdepth; - int ndevs; - RF_RaidDisk_t devs[RF_MAX_DISKS]; - int nspares; - RF_RaidDisk_t spares[RF_MAX_DISKS]; -} RF_DeviceConfig_t; - -typedef struct RF_ProgressInfo_s { - RF_uint64 remaining; - RF_uint64 completed; - RF_uint64 total; -} RF_ProgressInfo_t; - -/* flags that can be put in the rf_recon_req structure */ -#define RF_FDFLAGS_NONE 0x0 /* just fail the disk */ -#define RF_FDFLAGS_RECON 0x1 /* fail and initiate recon */ - -#define RAIDFRAME_CONFIGURE _IOW ('r', 1, void *) /* config an array */ -#if defined(__NetBSD__) -#define RAIDFRAME_SHUTDOWN _IO ('r', 2) /* shutdown the array */ -#elif defined(__FreeBSD__) -#define RAIDFRAME_SHUTDOWN _IOW ('r', 2, int) /* shutdown the array */ -#endif -#define RAIDFRAME_TUR _IOW ('r', 3, dev_t) /* debug only: test - * ready */ -#define RAIDFRAME_TEST_ACC _IOWR('r', 4, struct rf_test_acc) - /* run a test access */ -#define RAIDFRAME_FAIL_DISK _IOW ('r', 5, struct rf_recon_req) - /* fail a disk & - * optionally start - * recon */ -#define RAIDFRAME_CHECK_RECON_STATUS _IOR('r', 6, int) /* get reconstruction % - * complete on indicated - * row */ -#define RAIDFRAME_REWRITEPARITY _IO ('r', 7) /* rewrite (initialize) - * all parity */ -#define RAIDFRAME_COPYBACK _IO ('r', 8) /* copy reconstructed - * data back to replaced - * disk */ -#define RAIDFRAME_SPARET_WAIT _IOR ('r', 9, RF_SparetWait_t) - /* does not return until - * kernel needs a spare - * table */ -#define RAIDFRAME_SEND_SPARET _IOW ('r', 10, void *) /* used to send a spare - * table down into the - * kernel */ -#define RAIDFRAME_ABORT_SPARET_WAIT _IO ('r', 11) /* used to wake up the - * sparemap daemon & - * tell it to exit */ -#define RAIDFRAME_START_ATRACE _IO ('r', 12) /* start tracing - * accesses */ -#define RAIDFRAME_STOP_ATRACE _IO ('r', 13) /* stop tracing - * accesses */ -#define RAIDFRAME_GET_SIZE _IOR ('r', 14, int) /* get size (# sectors) - * in raid device */ -#define RAIDFRAME_GET_INFO _IOWR ('r', 15, RF_DeviceConfig_t *) - /* get configuration */ -#define RAIDFRAME_RESET_ACCTOTALS _IO ('r', 16) /* reset AccTotals for - * device */ -#define RAIDFRAME_GET_ACCTOTALS _IOR ('r', 17, RF_AccTotals_t) - /* retrieve AccTotals - * for device */ -#define RAIDFRAME_KEEP_ACCTOTALS _IOW ('r', 18, int) /* turn AccTotals on or - * off for device */ -#define RAIDFRAME_GET_COMPONENT_LABEL _IOWR ('r', 19, RF_ComponentLabel_t) -#define RAIDFRAME_SET_COMPONENT_LABEL _IOW ('r', 20, RF_ComponentLabel_t) - -#define RAIDFRAME_INIT_LABELS _IOW ('r', 21, RF_ComponentLabel_t) -#define RAIDFRAME_ADD_HOT_SPARE _IOW ('r', 22, RF_SingleComponent_t) -#define RAIDFRAME_REMOVE_HOT_SPARE _IOW ('r', 23, RF_SingleComponent_t) -#define RAIDFRAME_REBUILD_IN_PLACE _IOW ('r', 24, RF_SingleComponent_t) -#define RAIDFRAME_CHECK_PARITY _IOWR ('r', 25, int) -#define RAIDFRAME_CHECK_PARITYREWRITE_STATUS _IOR ('r', 26, int) -#define RAIDFRAME_CHECK_COPYBACK_STATUS _IOR ('r', 27, int) -#define RAIDFRAME_SET_AUTOCONFIG _IOWR ('r', 28, int) -#define RAIDFRAME_SET_ROOT _IOWR ('r', 29, int) -#define RAIDFRAME_DELETE_COMPONENT _IOW ('r', 30, RF_SingleComponent_t) -#define RAIDFRAME_INCORPORATE_HOT_SPARE _IOW ('r', 31, RF_SingleComponent_t) - -/* 'Extended' status versions */ -#define RAIDFRAME_CHECK_RECON_STATUS_EXT _IOR('r', 32, RF_ProgressInfo_t) -#define RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT _IOR ('r', 33, \ - RF_ProgressInfo_t) -#define RAIDFRAME_CHECK_COPYBACK_STATUS_EXT _IOR ('r', 34, RF_ProgressInfo_t) -#define RAIDFRAME_GET_UNIT _IOWR ('r', 35, int) - -#endif /* !_RF__RF_RAIDFRAME_H_ */ diff --git a/sys/dev/raidframe/rf_reconbuffer.c b/sys/dev/raidframe/rf_reconbuffer.c deleted file mode 100644 index 5831d5a..0000000 --- a/sys/dev/raidframe/rf_reconbuffer.c +++ /dev/null @@ -1,468 +0,0 @@ -/* $NetBSD: rf_reconbuffer.c,v 1.5 2001/01/27 20:10:49 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/*************************************************** - * - * rf_reconbuffer.c -- reconstruction buffer manager - * - ***************************************************/ - -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_reconbuffer.h> -#include <dev/raidframe/rf_acctrace.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_debugprint.h> -#include <dev/raidframe/rf_revent.h> -#include <dev/raidframe/rf_reconutil.h> -#include <dev/raidframe/rf_nwayxor.h> - -#define Dprintf1(s,a) if (rf_reconbufferDebug) printf(s,a) -#define Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s,a,b) -#define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s,a,b,c) -#define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s,a,b,c,d) -#define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s,a,b,c,d,e) - -/***************************************************************************** - * - * Submit a reconstruction buffer to the manager for XOR. We can only - * submit a buffer if (1) we can xor into an existing buffer, which - * means we don't have to acquire a new one, (2) we can acquire a - * floating recon buffer, or (3) the caller has indicated that we are - * allowed to keep the submitted buffer. - * - * Returns non-zero if and only if we were not able to submit. - * In this case, we append the current disk ID to the wait list on the - * indicated RU, so that it will be re-enabled when we acquire a buffer - * for this RU. - * - ****************************************************************************/ - -/* - * nWayXorFuncs[i] is a pointer to a function that will xor "i" - * bufs into the accumulating sum. - */ -static RF_VoidFuncPtr nWayXorFuncs[] = { - NULL, - (RF_VoidFuncPtr) rf_nWayXor1, - (RF_VoidFuncPtr) rf_nWayXor2, - (RF_VoidFuncPtr) rf_nWayXor3, - (RF_VoidFuncPtr) rf_nWayXor4, - (RF_VoidFuncPtr) rf_nWayXor5, - (RF_VoidFuncPtr) rf_nWayXor6, - (RF_VoidFuncPtr) rf_nWayXor7, - (RF_VoidFuncPtr) rf_nWayXor8, - (RF_VoidFuncPtr) rf_nWayXor9 -}; - -int -rf_SubmitReconBuffer(rbuf, keep_it, use_committed) - RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */ - int keep_it; /* whether we can keep this buffer or we have - * to return it */ - int use_committed; /* whether to use a committed or an available - * recon buffer */ -{ - RF_LayoutSW_t *lp; - int rc; - - lp = rbuf->raidPtr->Layout.map; - rc = lp->SubmitReconBuffer(rbuf, keep_it, use_committed); - return (rc); -} - -int -rf_SubmitReconBufferBasic(rbuf, keep_it, use_committed) - RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */ - int keep_it; /* whether we can keep this buffer or we have - * to return it */ - int use_committed; /* whether to use a committed or an available - * recon buffer */ -{ - RF_Raid_t *raidPtr = rbuf->raidPtr; - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[rbuf->row]; - RF_ReconParityStripeStatus_t *pssPtr; - RF_ReconBuffer_t *targetRbuf, *t = NULL; /* temporary rbuf - * pointers */ - caddr_t ta; /* temporary data buffer pointer */ - RF_CallbackDesc_t *cb, *p; - int retcode = 0, created = 0; - - RF_Etimer_t timer; - - /* makes no sense to have a submission from the failed disk */ - RF_ASSERT(rbuf); - RF_ASSERT(rbuf->col != reconCtrlPtr->fcol); - - Dprintf5("RECON: submission by row %d col %d for psid %ld ru %d (failed offset %ld)\n", - rbuf->row, rbuf->col, (long) rbuf->parityStripeID, rbuf->which_ru, (long) rbuf->failedDiskSectorOffset); - - RF_LOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); - - RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); - - pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created); - RF_ASSERT(pssPtr); /* if it didn't exist, we wouldn't have gotten - * an rbuf for it */ - - /* check to see if enough buffers have accumulated to do an XOR. If - * so, there's no need to acquire a floating rbuf. Before we can do - * any XORing, we must have acquired a destination buffer. If we - * have, then we can go ahead and do the XOR if (1) including this - * buffer, enough bufs have accumulated, or (2) this is the last - * submission for this stripe. Otherwise, we have to go acquire a - * floating rbuf. */ - - targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; - if ((targetRbuf != NULL) && - ((pssPtr->xorBufCount == rf_numBufsToAccumulate - 1) || (targetRbuf->count + pssPtr->xorBufCount + 1 == layoutPtr->numDataCol))) { - pssPtr->rbufsForXor[pssPtr->xorBufCount++] = rbuf; /* install this buffer */ - Dprintf3("RECON: row %d col %d invoking a %d-way XOR\n", rbuf->row, rbuf->col, pssPtr->xorBufCount); - RF_ETIMER_START(timer); - rf_MultiWayReconXor(raidPtr, pssPtr); - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - raidPtr->accumXorTimeUs += RF_ETIMER_VAL_US(timer); - if (!keep_it) { - raidPtr->recon_tracerecs[rbuf->col].xor_us = RF_ETIMER_VAL_US(timer); - RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us += - RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - - rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]); - } - rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol); - - /* if use_committed is on, we _must_ consume a buffer off the - * committed list. */ - if (use_committed) { - t = reconCtrlPtr->committedRbufs; - RF_ASSERT(t); - reconCtrlPtr->committedRbufs = t->next; - rf_ReleaseFloatingReconBuffer(raidPtr, rbuf->row, t); - } - if (keep_it) { - RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); - RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); - rf_FreeReconBuffer(rbuf); - return (retcode); - } - goto out; - } - /* set the value of "t", which we'll use as the rbuf from here on */ - if (keep_it) { - t = rbuf; - } else { - if (use_committed) { /* if a buffer has been committed to - * us, use it */ - t = reconCtrlPtr->committedRbufs; - RF_ASSERT(t); - reconCtrlPtr->committedRbufs = t->next; - t->next = NULL; - } else - if (reconCtrlPtr->floatingRbufs) { - t = reconCtrlPtr->floatingRbufs; - reconCtrlPtr->floatingRbufs = t->next; - t->next = NULL; - } - } - - /* If we weren't able to acquire a buffer, append to the end of the - * buf list in the recon ctrl struct. */ - if (!t) { - RF_ASSERT(!keep_it && !use_committed); - Dprintf2("RECON: row %d col %d failed to acquire floating rbuf\n", rbuf->row, rbuf->col); - - raidPtr->procsInBufWait++; - if ((raidPtr->procsInBufWait == raidPtr->numCol - 1) && (raidPtr->numFullReconBuffers == 0)) { - printf("Buffer wait deadlock detected. Exiting.\n"); - rf_PrintPSStatusTable(raidPtr, rbuf->row); - RF_PANIC(); - } - pssPtr->flags |= RF_PSS_BUFFERWAIT; - cb = rf_AllocCallbackDesc(); /* append to buf wait list in - * recon ctrl structure */ - cb->row = rbuf->row; - cb->col = rbuf->col; - cb->callbackArg.v = rbuf->parityStripeID; - cb->callbackArg2.v = rbuf->which_ru; - cb->next = NULL; - if (!reconCtrlPtr->bufferWaitList) - reconCtrlPtr->bufferWaitList = cb; - else { /* might want to maintain head/tail pointers - * here rather than search for end of list */ - for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next); - p->next = cb; - } - retcode = 1; - goto out; - } - Dprintf2("RECON: row %d col %d acquired rbuf\n", rbuf->row, rbuf->col); - RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us += - RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - - rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]); - - /* initialize the buffer */ - if (t != rbuf) { - t->row = rbuf->row; - t->col = reconCtrlPtr->fcol; - t->parityStripeID = rbuf->parityStripeID; - t->which_ru = rbuf->which_ru; - t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset; - t->spRow = rbuf->spRow; - t->spCol = rbuf->spCol; - t->spOffset = rbuf->spOffset; - - ta = t->buffer; - t->buffer = rbuf->buffer; - rbuf->buffer = ta; /* swap buffers */ - } - /* the first installation always gets installed as the destination - * buffer. subsequent installations get stacked up to allow for - * multi-way XOR */ - if (!pssPtr->rbuf) { - pssPtr->rbuf = t; - t->count = 1; - } else - pssPtr->rbufsForXor[pssPtr->xorBufCount++] = t; /* install this buffer */ - - rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol); /* the buffer is full if - * G=2 */ - -out: - RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); - RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); - return (retcode); -} - -int -rf_MultiWayReconXor(raidPtr, pssPtr) - RF_Raid_t *raidPtr; - RF_ReconParityStripeStatus_t *pssPtr; /* the pss descriptor for this - * parity stripe */ -{ - int i, numBufs = pssPtr->xorBufCount; - int numBytes = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU); - RF_ReconBuffer_t **rbufs = (RF_ReconBuffer_t **) pssPtr->rbufsForXor; - RF_ReconBuffer_t *targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; - - RF_ASSERT(pssPtr->rbuf != NULL); - RF_ASSERT(numBufs > 0 && numBufs < RF_PS_MAX_BUFS); -#ifdef _KERNEL -#ifndef __NetBSD__ -#ifndef __FreeBSD__ - thread_block(); /* yield the processor before doing a big XOR */ -#endif -#endif -#endif /* _KERNEL */ - /* - * XXX - * - * What if more than 9 bufs? - */ - nWayXorFuncs[numBufs] (pssPtr->rbufsForXor, targetRbuf, numBytes / sizeof(long)); - - /* release all the reconstruction buffers except the last one, which - * belongs to the disk whose submission caused this XOR to take place */ - for (i = 0; i < numBufs - 1; i++) { - if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING) - rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]->row, rbufs[i]); - else - if (rbufs[i]->type == RF_RBUF_TYPE_FORCED) - rf_FreeReconBuffer(rbufs[i]); - else - RF_ASSERT(0); - } - targetRbuf->count += pssPtr->xorBufCount; - pssPtr->xorBufCount = 0; - return (0); -} -/* removes one full buffer from one of the full-buffer lists and returns it. - * - * ASSUMES THE RB_MUTEX IS UNLOCKED AT ENTRY. - */ -RF_ReconBuffer_t * -rf_GetFullReconBuffer(reconCtrlPtr) - RF_ReconCtrl_t *reconCtrlPtr; -{ - RF_ReconBuffer_t *p; - - RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); - - if ((p = reconCtrlPtr->priorityList) != NULL) { - reconCtrlPtr->priorityList = p->next; - p->next = NULL; - goto out; - } - if ((p = reconCtrlPtr->fullBufferList) != NULL) { - reconCtrlPtr->fullBufferList = p->next; - p->next = NULL; - goto out; - } -out: - RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); - return (p); -} - - -/* if the reconstruction buffer is full, move it to the full list, - * which is maintained sorted by failed disk sector offset - * - * ASSUMES THE RB_MUTEX IS LOCKED AT ENTRY. */ -int -rf_CheckForFullRbuf(raidPtr, reconCtrl, pssPtr, numDataCol) - RF_Raid_t *raidPtr; - RF_ReconCtrl_t *reconCtrl; - RF_ReconParityStripeStatus_t *pssPtr; - int numDataCol; -{ - RF_ReconBuffer_t *p, *pt, *rbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; - - if (rbuf->count == numDataCol) { - raidPtr->numFullReconBuffers++; - Dprintf2("RECON: rbuf for psid %ld ru %d has filled\n", - (long) rbuf->parityStripeID, rbuf->which_ru); - if (!reconCtrl->fullBufferList || (rbuf->failedDiskSectorOffset < reconCtrl->fullBufferList->failedDiskSectorOffset)) { - Dprintf2("RECON: rbuf for psid %ld ru %d is head of list\n", - (long) rbuf->parityStripeID, rbuf->which_ru); - rbuf->next = reconCtrl->fullBufferList; - reconCtrl->fullBufferList = rbuf; - } else { - for (pt = reconCtrl->fullBufferList, p = pt->next; p && p->failedDiskSectorOffset < rbuf->failedDiskSectorOffset; pt = p, p = p->next); - rbuf->next = p; - pt->next = rbuf; - Dprintf2("RECON: rbuf for psid %ld ru %d is in list\n", - (long) rbuf->parityStripeID, rbuf->which_ru); - } -#if 0 - pssPtr->writeRbuf = pssPtr->rbuf; /* DEBUG ONLY: we like - * to be able to find - * this rbuf while it's - * awaiting write */ -#else - rbuf->pssPtr = pssPtr; -#endif - pssPtr->rbuf = NULL; - rf_CauseReconEvent(raidPtr, rbuf->row, rbuf->col, NULL, RF_REVENT_BUFREADY); - } - return (0); -} - - -/* release a floating recon buffer for someone else to use. - * assumes the rb_mutex is LOCKED at entry - */ -void -rf_ReleaseFloatingReconBuffer(raidPtr, row, rbuf) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_ReconBuffer_t *rbuf; -{ - RF_ReconCtrl_t *rcPtr = raidPtr->reconControl[row]; - RF_CallbackDesc_t *cb; - - Dprintf2("RECON: releasing rbuf for psid %ld ru %d\n", - (long) rbuf->parityStripeID, rbuf->which_ru); - - /* if anyone is waiting on buffers, wake one of them up. They will - * subsequently wake up anyone else waiting on their RU */ - if (rcPtr->bufferWaitList) { - rbuf->next = rcPtr->committedRbufs; - rcPtr->committedRbufs = rbuf; - cb = rcPtr->bufferWaitList; - rcPtr->bufferWaitList = cb->next; - rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 1, RF_REVENT_BUFCLEAR); /* arg==1 => we've - * committed a buffer */ - rf_FreeCallbackDesc(cb); - raidPtr->procsInBufWait--; - } else { - rbuf->next = rcPtr->floatingRbufs; - rcPtr->floatingRbufs = rbuf; - } -} -/* release any disk that is waiting on a buffer for the indicated RU. - * assumes the rb_mutex is LOCKED at entry - */ -void -rf_ReleaseBufferWaiters(raidPtr, pssPtr) - RF_Raid_t *raidPtr; - RF_ReconParityStripeStatus_t *pssPtr; -{ - RF_CallbackDesc_t *cb1, *cb = pssPtr->bufWaitList; - - Dprintf2("RECON: releasing buf waiters for psid %ld ru %d\n", - (long) pssPtr->parityStripeID, pssPtr->which_ru); - pssPtr->flags &= ~RF_PSS_BUFFERWAIT; - while (cb) { - cb1 = cb->next; - cb->next = NULL; - rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFCLEAR); /* arg==0 => we haven't - * committed a buffer */ - rf_FreeCallbackDesc(cb); - cb = cb1; - } - pssPtr->bufWaitList = NULL; -} -/* when reconstruction is forced on an RU, there may be some disks waiting to - * acquire a buffer for that RU. Since we allocate a new buffer as part of - * the forced-reconstruction process, we no longer have to wait for any - * buffers, so we wakeup any waiter that we find in the bufferWaitList - * - * assumes the rb_mutex is LOCKED at entry - */ -void -rf_ReleaseBufferWaiter(rcPtr, rbuf) - RF_ReconCtrl_t *rcPtr; - RF_ReconBuffer_t *rbuf; -{ - RF_CallbackDesc_t *cb, *cbt; - - for (cbt = NULL, cb = rcPtr->bufferWaitList; cb; cbt = cb, cb = cb->next) { - if ((cb->callbackArg.v == rbuf->parityStripeID) && (cb->callbackArg2.v == rbuf->which_ru)) { - Dprintf2("RECON: Dropping row %d col %d from buffer wait list\n", cb->row, cb->col); - if (cbt) - cbt->next = cb->next; - else - rcPtr->bufferWaitList = cb->next; - rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFREADY); /* arg==0 => no - * committed buffer */ - rf_FreeCallbackDesc(cb); - return; - } - } -} diff --git a/sys/dev/raidframe/rf_reconbuffer.h b/sys/dev/raidframe/rf_reconbuffer.h deleted file mode 100644 index 1a5407e..0000000 --- a/sys/dev/raidframe/rf_reconbuffer.h +++ /dev/null @@ -1,63 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_reconbuffer.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/******************************************************************* - * - * rf_reconbuffer.h -- header file for reconstruction buffer manager - * - *******************************************************************/ - -#ifndef _RF__RF_RECONBUFFER_H_ -#define _RF__RF_RECONBUFFER_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_reconstruct.h> - -int -rf_SubmitReconBuffer(RF_ReconBuffer_t * rbuf, int keep_int, - int use_committed); -int -rf_SubmitReconBufferBasic(RF_ReconBuffer_t * rbuf, int keep_int, - int use_committed); -int -rf_MultiWayReconXor(RF_Raid_t * raidPtr, - RF_ReconParityStripeStatus_t * pssPtr); -RF_ReconBuffer_t *rf_GetFullReconBuffer(RF_ReconCtrl_t * reconCtrlPtr); -int -rf_CheckForFullRbuf(RF_Raid_t * raidPtr, RF_ReconCtrl_t * reconCtrl, - RF_ReconParityStripeStatus_t * pssPtr, int numDataCol); -void -rf_ReleaseFloatingReconBuffer(RF_Raid_t * raidPtr, RF_RowCol_t row, - RF_ReconBuffer_t * rbuf); -void -rf_ReleaseBufferWaiters(RF_Raid_t * raidPtr, - RF_ReconParityStripeStatus_t * pssPtr); -void rf_ReleaseBufferWaiter(RF_ReconCtrl_t * rcPtr, RF_ReconBuffer_t * rbuf); - -#endif /* !_RF__RF_RECONBUFFER_H_ */ diff --git a/sys/dev/raidframe/rf_reconmap.c b/sys/dev/raidframe/rf_reconmap.c deleted file mode 100644 index 261d339..0000000 --- a/sys/dev/raidframe/rf_reconmap.c +++ /dev/null @@ -1,396 +0,0 @@ -/* $NetBSD: rf_reconmap.c,v 1.6 1999/08/14 21:44:24 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/************************************************************************* - * rf_reconmap.c - * - * code to maintain a map of what sectors have/have not been reconstructed - * - *************************************************************************/ - -#include <dev/raidframe/rf_raid.h> -#include <sys/time.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_utils.h> - -/* special pointer values indicating that a reconstruction unit - * has been either totally reconstructed or not at all. Both - * are illegal pointer values, so you have to be careful not to - * dereference through them. RU_NOTHING must be zero, since - * MakeReconMap uses bzero to initialize the structure. These are used - * only at the head of the list. - */ -#define RU_ALL ((RF_ReconMapListElem_t *) -1) -#define RU_NOTHING ((RF_ReconMapListElem_t *) 0) - -/* used to mark the end of the list */ -#define RU_NIL ((RF_ReconMapListElem_t *) 0) - - -static void -compact_stat_entry(RF_Raid_t * raidPtr, RF_ReconMap_t * mapPtr, - int i); -static void crunch_list(RF_ReconMap_t * mapPtr, RF_ReconMapListElem_t * listPtr); -static RF_ReconMapListElem_t * -MakeReconMapListElem(RF_SectorNum_t startSector, - RF_SectorNum_t stopSector, RF_ReconMapListElem_t * next); -static void -FreeReconMapListElem(RF_ReconMap_t * mapPtr, - RF_ReconMapListElem_t * p); -static void update_size(RF_ReconMap_t * mapPtr, int size); -static void PrintList(RF_ReconMapListElem_t * listPtr); - -/*----------------------------------------------------------------------------- - * - * Creates and initializes new Reconstruction map - * - *-----------------------------------------------------------------------------*/ - -RF_ReconMap_t * -rf_MakeReconMap(raidPtr, ru_sectors, disk_sectors, spareUnitsPerDisk) - RF_Raid_t *raidPtr; - RF_SectorCount_t ru_sectors; /* size of reconstruction unit in - * sectors */ - RF_SectorCount_t disk_sectors; /* size of disk in sectors */ - RF_ReconUnitCount_t spareUnitsPerDisk; /* zero unless distributed - * sparing */ -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_ReconUnitCount_t num_rus = layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerRU; - RF_ReconMap_t *p; - int rc; - - RF_Malloc(p, sizeof(RF_ReconMap_t), (RF_ReconMap_t *)); - p->sectorsPerReconUnit = ru_sectors; - p->sectorsInDisk = disk_sectors; - - p->totalRUs = num_rus; - p->spareRUs = spareUnitsPerDisk; - p->unitsLeft = num_rus - spareUnitsPerDisk; - - RF_Malloc(p->status, num_rus * sizeof(RF_ReconMapListElem_t *), (RF_ReconMapListElem_t **)); - RF_ASSERT(p->status != (RF_ReconMapListElem_t **) NULL); - - (void) bzero((char *) p->status, num_rus * sizeof(RF_ReconMapListElem_t *)); - - p->size = sizeof(RF_ReconMap_t) + num_rus * sizeof(RF_ReconMapListElem_t *); - p->maxSize = p->size; - - rc = rf_mutex_init(&p->mutex, __FUNCTION__); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - RF_Free(p->status, num_rus * sizeof(RF_ReconMapListElem_t *)); - RF_Free(p, sizeof(RF_ReconMap_t)); - return (NULL); - } - return (p); -} - - -/*----------------------------------------------------------------------------- - * - * marks a new set of sectors as reconstructed. All the possible mergings get - * complicated. To simplify matters, the approach I take is to just dump - * something into the list, and then clean it up (i.e. merge elements and - * eliminate redundant ones) in a second pass over the list (compact_stat_entry()). - * Not 100% efficient, since a structure can be allocated and then immediately - * freed, but it keeps this code from becoming (more of) a nightmare of - * special cases. The only thing that compact_stat_entry() assumes is that the - * list is sorted by startSector, and so this is the only condition I maintain - * here. (MCH) - * - *-----------------------------------------------------------------------------*/ - -void -rf_ReconMapUpdate(raidPtr, mapPtr, startSector, stopSector) - RF_Raid_t *raidPtr; - RF_ReconMap_t *mapPtr; - RF_SectorNum_t startSector; - RF_SectorNum_t stopSector; -{ - RF_SectorCount_t sectorsPerReconUnit = mapPtr->sectorsPerReconUnit; - RF_SectorNum_t i, first_in_RU, last_in_RU; - RF_ReconMapListElem_t *p, *pt; - - RF_LOCK_MUTEX(mapPtr->mutex); - RF_ASSERT(startSector >= 0 && stopSector < mapPtr->sectorsInDisk && stopSector >= startSector); - - while (startSector <= stopSector) { - i = startSector / mapPtr->sectorsPerReconUnit; - first_in_RU = i * sectorsPerReconUnit; - last_in_RU = first_in_RU + sectorsPerReconUnit - 1; - p = mapPtr->status[i]; - if (p != RU_ALL) { - if (p == RU_NOTHING || p->startSector > startSector) { /* insert at front of - * list */ - - mapPtr->status[i] = MakeReconMapListElem(startSector, RF_MIN(stopSector, last_in_RU), (p == RU_NOTHING) ? NULL : p); - update_size(mapPtr, sizeof(RF_ReconMapListElem_t)); - - } else {/* general case */ - do { /* search for place to insert */ - pt = p; - p = p->next; - } while (p && (p->startSector < startSector)); - pt->next = MakeReconMapListElem(startSector, RF_MIN(stopSector, last_in_RU), p); - update_size(mapPtr, sizeof(RF_ReconMapListElem_t)); - } - compact_stat_entry(raidPtr, mapPtr, i); - } - startSector = RF_MIN(stopSector, last_in_RU) + 1; - } - RF_UNLOCK_MUTEX(mapPtr->mutex); -} - - - -/*----------------------------------------------------------------------------- - * - * performs whatever list compactions can be done, and frees any space - * that is no longer necessary. Assumes only that the list is sorted - * by startSector. crunch_list() compacts a single list as much as possible, - * and the second block of code deletes the entire list if possible. - * crunch_list() is also called from MakeReconMapAccessList(). - * - * When a recon unit is detected to be fully reconstructed, we set the - * corresponding bit in the parity stripe map so that the head follow - * code will not select this parity stripe again. This is redundant (but - * harmless) when compact_stat_entry is called from the reconstruction code, - * but necessary when called from the user-write code. - * - *-----------------------------------------------------------------------------*/ - -static void -compact_stat_entry(raidPtr, mapPtr, i) - RF_Raid_t *raidPtr; - RF_ReconMap_t *mapPtr; - int i; -{ - RF_SectorCount_t sectorsPerReconUnit = mapPtr->sectorsPerReconUnit; - RF_ReconMapListElem_t *p = mapPtr->status[i]; - - crunch_list(mapPtr, p); - - if ((p->startSector == i * sectorsPerReconUnit) && - (p->stopSector == i * sectorsPerReconUnit + sectorsPerReconUnit - 1)) { - mapPtr->status[i] = RU_ALL; - mapPtr->unitsLeft--; - FreeReconMapListElem(mapPtr, p); - } -} - -static void -crunch_list(mapPtr, listPtr) - RF_ReconMap_t *mapPtr; - RF_ReconMapListElem_t *listPtr; -{ - RF_ReconMapListElem_t *pt, *p = listPtr; - - if (!p) - return; - pt = p; - p = p->next; - while (p) { - if (pt->stopSector >= p->startSector - 1) { - pt->stopSector = RF_MAX(pt->stopSector, p->stopSector); - pt->next = p->next; - FreeReconMapListElem(mapPtr, p); - p = pt->next; - } else { - pt = p; - p = p->next; - } - } -} -/*----------------------------------------------------------------------------- - * - * Allocate and fill a new list element - * - *-----------------------------------------------------------------------------*/ - -static RF_ReconMapListElem_t * -MakeReconMapListElem( - RF_SectorNum_t startSector, - RF_SectorNum_t stopSector, - RF_ReconMapListElem_t * next) -{ - RF_ReconMapListElem_t *p; - - RF_Malloc(p, sizeof(RF_ReconMapListElem_t), (RF_ReconMapListElem_t *)); - if (p == NULL) - return (NULL); - p->startSector = startSector; - p->stopSector = stopSector; - p->next = next; - return (p); -} -/*----------------------------------------------------------------------------- - * - * Free a list element - * - *-----------------------------------------------------------------------------*/ - -static void -FreeReconMapListElem(mapPtr, p) - RF_ReconMap_t *mapPtr; - RF_ReconMapListElem_t *p; -{ - int delta; - - if (mapPtr) { - delta = 0 - (int) sizeof(RF_ReconMapListElem_t); - update_size(mapPtr, delta); - } - RF_Free(p, sizeof(*p)); -} -/*----------------------------------------------------------------------------- - * - * Free an entire status structure. Inefficient, but can be called at any time. - * - *-----------------------------------------------------------------------------*/ -void -rf_FreeReconMap(mapPtr) - RF_ReconMap_t *mapPtr; -{ - RF_ReconMapListElem_t *p, *q; - RF_ReconUnitCount_t numRUs; - RF_ReconUnitNum_t i; - - numRUs = mapPtr->sectorsInDisk / mapPtr->sectorsPerReconUnit; - if (mapPtr->sectorsInDisk % mapPtr->sectorsPerReconUnit) - numRUs++; - - for (i = 0; i < numRUs; i++) { - p = mapPtr->status[i]; - while (p != RU_NOTHING && p != RU_ALL) { - q = p; - p = p->next; - RF_Free(q, sizeof(*q)); - } - } - rf_mutex_destroy(&mapPtr->mutex); - RF_Free(mapPtr->status, mapPtr->totalRUs * sizeof(RF_ReconMapListElem_t *)); - RF_Free(mapPtr, sizeof(RF_ReconMap_t)); -} -/*----------------------------------------------------------------------------- - * - * returns nonzero if the indicated RU has been reconstructed already - * - *---------------------------------------------------------------------------*/ - -int -rf_CheckRUReconstructed(mapPtr, startSector) - RF_ReconMap_t *mapPtr; - RF_SectorNum_t startSector; -{ - RF_ReconMapListElem_t *l; /* used for searching */ - RF_ReconUnitNum_t i; - - i = startSector / mapPtr->sectorsPerReconUnit; - l = mapPtr->status[i]; - return ((l == RU_ALL) ? 1 : 0); -} - -RF_ReconUnitCount_t -rf_UnitsLeftToReconstruct(mapPtr) - RF_ReconMap_t *mapPtr; -{ - RF_ASSERT(mapPtr != NULL); - return (mapPtr->unitsLeft); -} -/* updates the size fields of a status descriptor */ -static void -update_size(mapPtr, size) - RF_ReconMap_t *mapPtr; - int size; -{ - mapPtr->size += size; - mapPtr->maxSize = RF_MAX(mapPtr->size, mapPtr->maxSize); -} - -static void -PrintList(listPtr) - RF_ReconMapListElem_t *listPtr; -{ - while (listPtr) { - printf("%d,%d -> ", (int) listPtr->startSector, (int) listPtr->stopSector); - listPtr = listPtr->next; - } - printf("\n"); -} - -void -rf_PrintReconMap(raidPtr, mapPtr, frow, fcol) - RF_Raid_t *raidPtr; - RF_ReconMap_t *mapPtr; - RF_RowCol_t frow; - RF_RowCol_t fcol; -{ - RF_ReconUnitCount_t numRUs; - RF_ReconMapListElem_t *p; - RF_ReconUnitNum_t i; - - numRUs = mapPtr->totalRUs; - if (mapPtr->sectorsInDisk % mapPtr->sectorsPerReconUnit) - numRUs++; - - for (i = 0; i < numRUs; i++) { - p = mapPtr->status[i]; - if (p == RU_ALL)/* printf("[%d] ALL\n",i) */ - ; - else - if (p == RU_NOTHING) { - printf("%d: Unreconstructed\n", i); - } else { - printf("%d: ", i); - PrintList(p); - } - } -} - -void -rf_PrintReconSchedule(mapPtr, starttime) - RF_ReconMap_t *mapPtr; - struct timeval *starttime; -{ - static int old_pctg = -1; - struct timeval tv, diff; - int new_pctg; - - new_pctg = 100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 / mapPtr->totalRUs); - if (new_pctg != old_pctg) { - RF_GETTIME(tv); - RF_TIMEVAL_DIFF(starttime, &tv, &diff); - printf("%d %d.%06d\n", (int) new_pctg, (int) diff.tv_sec, (int) diff.tv_usec); - old_pctg = new_pctg; - } -} diff --git a/sys/dev/raidframe/rf_reconmap.h b/sys/dev/raidframe/rf_reconmap.h deleted file mode 100644 index 2fee059..0000000 --- a/sys/dev/raidframe/rf_reconmap.h +++ /dev/null @@ -1,86 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_reconmap.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/****************************************************************************** - * rf_reconMap.h -- Header file describing reconstruction status data structure - ******************************************************************************/ - -#ifndef _RF__RF_RECONMAP_H_ -#define _RF__RF_RECONMAP_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> - -/* - * Main reconstruction status descriptor. size and maxsize are used for - * monitoring only: they have no function for reconstruction. - */ -struct RF_ReconMap_s { - RF_SectorCount_t sectorsPerReconUnit; /* sectors per reconstruct - * unit */ - RF_SectorCount_t sectorsInDisk; /* total sectors in disk */ - RF_SectorCount_t unitsLeft; /* recon units left to recon */ - RF_ReconUnitCount_t totalRUs; /* total recon units on disk */ - RF_ReconUnitCount_t spareRUs; /* total number of spare RUs on failed - * disk */ - RF_StripeCount_t totalParityStripes; /* total number of parity - * stripes in array */ - u_int size; /* overall size of this structure */ - u_int maxSize; /* maximum size so far */ - RF_ReconMapListElem_t **status; /* array of ptrs to list elements */ - RF_DECLARE_MUTEX(mutex) -}; -/* a list element */ -struct RF_ReconMapListElem_s { - RF_SectorNum_t startSector; /* bounding sect nums on this block */ - RF_SectorNum_t stopSector; - RF_ReconMapListElem_t *next; /* next element in list */ -}; - -RF_ReconMap_t * -rf_MakeReconMap(RF_Raid_t * raidPtr, RF_SectorCount_t ru_sectors, - RF_SectorCount_t disk_sectors, RF_ReconUnitCount_t spareUnitsPerDisk); - -void -rf_ReconMapUpdate(RF_Raid_t * raidPtr, RF_ReconMap_t * mapPtr, - RF_SectorNum_t startSector, RF_SectorNum_t stopSector); - -void rf_FreeReconMap(RF_ReconMap_t * mapPtr); - -int rf_CheckRUReconstructed(RF_ReconMap_t * mapPtr, RF_SectorNum_t startSector); - -RF_ReconUnitCount_t rf_UnitsLeftToReconstruct(RF_ReconMap_t * mapPtr); - -void -rf_PrintReconMap(RF_Raid_t * raidPtr, RF_ReconMap_t * mapPtr, - RF_RowCol_t frow, RF_RowCol_t fcol); - -void rf_PrintReconSchedule(RF_ReconMap_t * mapPtr, struct timeval * starttime); - -#endif /* !_RF__RF_RECONMAP_H_ */ diff --git a/sys/dev/raidframe/rf_reconstruct.c b/sys/dev/raidframe/rf_reconstruct.c deleted file mode 100644 index e24d440..0000000 --- a/sys/dev/raidframe/rf_reconstruct.c +++ /dev/null @@ -1,1682 +0,0 @@ -/* $NetBSD: rf_reconstruct.c,v 1.27 2001/01/26 02:16:24 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/************************************************************ - * - * rf_reconstruct.c -- code to perform on-line reconstruction - * - ************************************************************/ - -#include <dev/raidframe/rf_types.h> -#include <sys/time.h> -#if defined(__FreeBSD__) -#include <sys/systm.h> -#if __FreeBSD_version > 500005 -#include <sys/bio.h> -#endif -#endif -#include <sys/buf.h> -#include <sys/errno.h> - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/proc.h> -#if defined(__NetBSD__) -#include <sys/ioctl.h> -#elif defined(__FreeBSD__) -#include <sys/ioccom.h> -#endif -#include <sys/fcntl.h> -#include <sys/vnode.h> - - -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_reconutil.h> -#include <dev/raidframe/rf_revent.h> -#include <dev/raidframe/rf_reconbuffer.h> -#include <dev/raidframe/rf_acctrace.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_desc.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_freelist.h> -#include <dev/raidframe/rf_debugprint.h> -#include <dev/raidframe/rf_driver.h> -#include <dev/raidframe/rf_utils.h> -#include <dev/raidframe/rf_shutdown.h> - -#include <dev/raidframe/rf_kintf.h> - -/* setting these to -1 causes them to be set to their default values if not set by debug options */ - -#define Dprintf(s) if (rf_reconDebug) rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf1(s,a) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf2(s,a,b) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf3(s,a,b,c) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL) -#define Dprintf4(s,a,b,c,d) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),NULL,NULL,NULL,NULL) -#define Dprintf5(s,a,b,c,d,e) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),NULL,NULL,NULL) -#define Dprintf6(s,a,b,c,d,e,f) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),NULL,NULL) -#define Dprintf7(s,a,b,c,d,e,f,g) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),(void *)((unsigned long)g),NULL) - -#define DDprintf1(s,a) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL) -#define DDprintf2(s,a,b) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL) - -static RF_FreeList_t *rf_recond_freelist; -#define RF_MAX_FREE_RECOND 4 -#define RF_RECOND_INC 1 - -static RF_RaidReconDesc_t * -AllocRaidReconDesc(RF_Raid_t * raidPtr, - RF_RowCol_t row, RF_RowCol_t col, RF_RaidDisk_t * spareDiskPtr, - int numDisksDone, RF_RowCol_t srow, RF_RowCol_t scol); -static void FreeReconDesc(RF_RaidReconDesc_t * reconDesc); -static int -ProcessReconEvent(RF_Raid_t * raidPtr, RF_RowCol_t frow, - RF_ReconEvent_t * event); -static int -IssueNextReadRequest(RF_Raid_t * raidPtr, RF_RowCol_t row, - RF_RowCol_t col); -static int TryToRead(RF_Raid_t * raidPtr, RF_RowCol_t row, RF_RowCol_t col); -static int -ComputePSDiskOffsets(RF_Raid_t * raidPtr, RF_StripeNum_t psid, - RF_RowCol_t row, RF_RowCol_t col, RF_SectorNum_t * outDiskOffset, - RF_SectorNum_t * outFailedDiskSectorOffset, RF_RowCol_t * spRow, - RF_RowCol_t * spCol, RF_SectorNum_t * spOffset); -static int IssueNextWriteRequest(RF_Raid_t * raidPtr, RF_RowCol_t row); -static int ReconReadDoneProc(void *arg, int status); -static int ReconWriteDoneProc(void *arg, int status); -static void -CheckForNewMinHeadSep(RF_Raid_t * raidPtr, RF_RowCol_t row, - RF_HeadSepLimit_t hsCtr); -static int -CheckHeadSeparation(RF_Raid_t * raidPtr, RF_PerDiskReconCtrl_t * ctrl, - RF_RowCol_t row, RF_RowCol_t col, RF_HeadSepLimit_t hsCtr, - RF_ReconUnitNum_t which_ru); -static int -CheckForcedOrBlockedReconstruction(RF_Raid_t * raidPtr, - RF_ReconParityStripeStatus_t * pssPtr, RF_PerDiskReconCtrl_t * ctrl, - RF_RowCol_t row, RF_RowCol_t col, RF_StripeNum_t psid, - RF_ReconUnitNum_t which_ru); -static void ForceReconReadDoneProc(void *arg, int status); - -static void rf_ShutdownReconstruction(void *); - -struct RF_ReconDoneProc_s { - void (*proc) (RF_Raid_t *, void *); - void *arg; - RF_ReconDoneProc_t *next; -}; - -static RF_FreeList_t *rf_rdp_freelist; -#define RF_MAX_FREE_RDP 4 -#define RF_RDP_INC 1 - -static void -SignalReconDone(RF_Raid_t * raidPtr) -{ - RF_ReconDoneProc_t *p; - - RF_LOCK_MUTEX(raidPtr->recon_done_proc_mutex); - for (p = raidPtr->recon_done_procs; p; p = p->next) { - p->proc(raidPtr, p->arg); - } - RF_UNLOCK_MUTEX(raidPtr->recon_done_proc_mutex); -} - -int -rf_RegisterReconDoneProc( - RF_Raid_t * raidPtr, - void (*proc) (RF_Raid_t *, void *), - void *arg, - RF_ReconDoneProc_t ** handlep) -{ - RF_ReconDoneProc_t *p; - - RF_FREELIST_GET(rf_rdp_freelist, p, next, (RF_ReconDoneProc_t *)); - if (p == NULL) - return (ENOMEM); - p->proc = proc; - p->arg = arg; - RF_LOCK_MUTEX(raidPtr->recon_done_proc_mutex); - p->next = raidPtr->recon_done_procs; - raidPtr->recon_done_procs = p; - RF_UNLOCK_MUTEX(raidPtr->recon_done_proc_mutex); - if (handlep) - *handlep = p; - return (0); -} -/************************************************************************** - * - * sets up the parameters that will be used by the reconstruction process - * currently there are none, except for those that the layout-specific - * configuration (e.g. rf_ConfigureDeclustered) routine sets up. - * - * in the kernel, we fire off the recon thread. - * - **************************************************************************/ -static void -rf_ShutdownReconstruction(ignored) - void *ignored; -{ - RF_FREELIST_DESTROY(rf_recond_freelist, next, (RF_RaidReconDesc_t *)); - RF_FREELIST_DESTROY(rf_rdp_freelist, next, (RF_ReconDoneProc_t *)); -} - -int -rf_ConfigureReconstruction(listp) - RF_ShutdownList_t **listp; -{ - int rc; - - RF_FREELIST_CREATE(rf_recond_freelist, RF_MAX_FREE_RECOND, - RF_RECOND_INC, sizeof(RF_RaidReconDesc_t)); - if (rf_recond_freelist == NULL) - return (ENOMEM); - RF_FREELIST_CREATE(rf_rdp_freelist, RF_MAX_FREE_RDP, - RF_RDP_INC, sizeof(RF_ReconDoneProc_t)); - if (rf_rdp_freelist == NULL) { - RF_FREELIST_DESTROY(rf_recond_freelist, next, (RF_RaidReconDesc_t *)); - return (ENOMEM); - } - rc = rf_ShutdownCreate(listp, rf_ShutdownReconstruction, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - rf_ShutdownReconstruction(NULL); - return (rc); - } - return (0); -} - -static RF_RaidReconDesc_t * -AllocRaidReconDesc(raidPtr, row, col, spareDiskPtr, numDisksDone, srow, scol) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_RowCol_t col; - RF_RaidDisk_t *spareDiskPtr; - int numDisksDone; - RF_RowCol_t srow; - RF_RowCol_t scol; -{ - - RF_RaidReconDesc_t *reconDesc; - - RF_FREELIST_GET(rf_recond_freelist, reconDesc, next, (RF_RaidReconDesc_t *)); - - reconDesc->raidPtr = raidPtr; - reconDesc->row = row; - reconDesc->col = col; - reconDesc->spareDiskPtr = spareDiskPtr; - reconDesc->numDisksDone = numDisksDone; - reconDesc->srow = srow; - reconDesc->scol = scol; - reconDesc->state = 0; - reconDesc->next = NULL; - - return (reconDesc); -} - -static void -FreeReconDesc(reconDesc) - RF_RaidReconDesc_t *reconDesc; -{ -#if RF_RECON_STATS > 0 - printf("RAIDframe: %lu recon event waits, %lu recon delays\n", - (long) reconDesc->numReconEventWaits, (long) reconDesc->numReconExecDelays); -#endif /* RF_RECON_STATS > 0 */ - printf("RAIDframe: %lu max exec ticks\n", - (long) reconDesc->maxReconExecTicks); -#if (RF_RECON_STATS > 0) || defined(KERNEL) - printf("\n"); -#endif /* (RF_RECON_STATS > 0) || KERNEL */ - RF_FREELIST_FREE(rf_recond_freelist, reconDesc, next); -} - - -/***************************************************************************** - * - * primary routine to reconstruct a failed disk. This should be called from - * within its own thread. It won't return until reconstruction completes, - * fails, or is aborted. - *****************************************************************************/ -int -rf_ReconstructFailedDisk(raidPtr, row, col) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_RowCol_t col; -{ - RF_LayoutSW_t *lp; - int rc; - - lp = raidPtr->Layout.map; - if (lp->SubmitReconBuffer) { - /* - * The current infrastructure only supports reconstructing one - * disk at a time for each array. - */ - RF_LOCK_MUTEX(raidPtr->mutex); - while (raidPtr->reconInProgress) { - RF_WAIT_COND(raidPtr->waitForReconCond, raidPtr->mutex); - } - raidPtr->reconInProgress++; - RF_UNLOCK_MUTEX(raidPtr->mutex); - rc = rf_ReconstructFailedDiskBasic(raidPtr, row, col); - RF_LOCK_MUTEX(raidPtr->mutex); - raidPtr->reconInProgress--; - RF_UNLOCK_MUTEX(raidPtr->mutex); - } else { - RF_ERRORMSG1("RECON: no way to reconstruct failed disk for arch %c\n", - lp->parityConfig); - rc = EIO; - } - RF_SIGNAL_COND(raidPtr->waitForReconCond); - wakeup(&raidPtr->waitForReconCond); /* XXX Methinks this will be - * needed at some point... GO */ - return (rc); -} - -int -rf_ReconstructFailedDiskBasic(raidPtr, row, col) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_RowCol_t col; -{ - RF_ComponentLabel_t *c_label; - RF_RaidDisk_t *spareDiskPtr = NULL; - RF_RaidReconDesc_t *reconDesc; - RF_RowCol_t srow, scol; - int numDisksDone = 0, rc; - - RF_Malloc(c_label, sizeof(RF_ComponentLabel_t), (RF_ComponentLabel_t *)); - if (c_label == NULL) { - printf("rf_ReconstructInPlace: Out of memory?\n"); - return (ENOMEM); - } - - /* first look for a spare drive onto which to reconstruct the data */ - /* spare disk descriptors are stored in row 0. This may have to - * change eventually */ - - RF_LOCK_MUTEX(raidPtr->mutex); - RF_ASSERT(raidPtr->Disks[row][col].status == rf_ds_failed); - - if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { - if (raidPtr->status[row] != rf_rs_degraded) { - RF_ERRORMSG2("Unable to reconstruct disk at row %d col %d because status not degraded\n", row, col); - RF_UNLOCK_MUTEX(raidPtr->mutex); - RF_Free(c_label, sizeof(RF_ComponentLabel_t)); - return (EINVAL); - } - srow = row; - scol = (-1); - } else { - srow = 0; - for (scol = raidPtr->numCol; scol < raidPtr->numCol + raidPtr->numSpare; scol++) { - if (raidPtr->Disks[srow][scol].status == rf_ds_spare) { - spareDiskPtr = &raidPtr->Disks[srow][scol]; - spareDiskPtr->status = rf_ds_used_spare; - break; - } - } - if (!spareDiskPtr) { - RF_ERRORMSG2("Unable to reconstruct disk at row %d col %d because no spares are available\n", row, col); - RF_UNLOCK_MUTEX(raidPtr->mutex); - RF_Free(c_label, sizeof(RF_ComponentLabel_t)); - return (ENOSPC); - } - printf("RECON: initiating reconstruction on row %d col %d -> spare at row %d col %d\n", row, col, srow, scol); - } - RF_UNLOCK_MUTEX(raidPtr->mutex); - - reconDesc = AllocRaidReconDesc((void *) raidPtr, row, col, spareDiskPtr, numDisksDone, srow, scol); - raidPtr->reconDesc = (void *) reconDesc; -#if RF_RECON_STATS > 0 - reconDesc->hsStallCount = 0; - reconDesc->numReconExecDelays = 0; - reconDesc->numReconEventWaits = 0; -#endif /* RF_RECON_STATS > 0 */ - reconDesc->reconExecTimerRunning = 0; - reconDesc->reconExecTicks = 0; - reconDesc->maxReconExecTicks = 0; - rc = rf_ContinueReconstructFailedDisk(reconDesc); - - if (!rc) { - /* fix up the component label */ - /* Don't actually need the read here.. */ - raidread_component_label( - raidPtr->raid_cinfo[srow][scol].ci_dev, - raidPtr->raid_cinfo[srow][scol].ci_vp, - c_label); - - raid_init_component_label( raidPtr, c_label); - c_label->row = row; - c_label->column = col; - c_label->clean = RF_RAID_DIRTY; - c_label->status = rf_ds_optimal; - c_label->partitionSize = raidPtr->Disks[srow][scol].partitionSize; - - /* We've just done a rebuild based on all the other - disks, so at this point the parity is known to be - clean, even if it wasn't before. */ - - /* XXX doesn't hold for RAID 6!! */ - - raidPtr->parity_good = RF_RAID_CLEAN; - - /* XXXX MORE NEEDED HERE */ - - raidwrite_component_label( - raidPtr->raid_cinfo[srow][scol].ci_dev, - raidPtr->raid_cinfo[srow][scol].ci_vp, - c_label); - - } - RF_Free(c_label, sizeof(RF_ComponentLabel_t)); - return (rc); -} - -/* - - Allow reconstructing a disk in-place -- i.e. component /dev/sd2e goes AWOL, - and you don't get a spare until the next Monday. With this function - (and hot-swappable drives) you can now put your new disk containing - /dev/sd2e on the bus, scsictl it alive, and then use raidctl(8) to - rebuild the data "on the spot". - -*/ - -int -rf_ReconstructInPlace(raidPtr, row, col) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_RowCol_t col; -{ - RF_RaidDisk_t *spareDiskPtr = NULL; - RF_RaidReconDesc_t *reconDesc; - RF_LayoutSW_t *lp; - RF_RaidDisk_t *badDisk; - RF_ComponentLabel_t *c_label; - int numDisksDone = 0, rc; - struct vnode *vp; - int retcode; - int ac; - - RF_Malloc(c_label, sizeof(RF_ComponentLabel_t), (RF_ComponentLabel_t *)); - if (c_label == NULL) { - printf("rf_ReconstructInPlace: Out of memory?\n"); - return (ENOMEM); - } - - lp = raidPtr->Layout.map; - if (lp->SubmitReconBuffer) { - /* - * The current infrastructure only supports reconstructing one - * disk at a time for each array. - */ - RF_LOCK_MUTEX(raidPtr->mutex); - if ((raidPtr->Disks[row][col].status == rf_ds_optimal) && - (raidPtr->numFailures > 0)) { - /* XXX 0 above shouldn't be constant!!! */ - /* some component other than this has failed. - Let's not make things worse than they already - are... */ - printf("RAIDFRAME: Unable to reconstruct to disk at:\n"); - printf(" Row: %d Col: %d Too many failures.\n", - row, col); - RF_UNLOCK_MUTEX(raidPtr->mutex); - RF_Free(c_label, sizeof(RF_ComponentLabel_t)); - return (EINVAL); - } - if (raidPtr->Disks[row][col].status == rf_ds_reconstructing) { - printf("RAIDFRAME: Unable to reconstruct to disk at:\n"); - printf(" Row: %d Col: %d Reconstruction already occuring!\n", row, col); - - RF_UNLOCK_MUTEX(raidPtr->mutex); - RF_Free(c_label, sizeof(RF_ComponentLabel_t)); - return (EINVAL); - } - - - if (raidPtr->Disks[row][col].status != rf_ds_failed) { - /* "It's gone..." */ - raidPtr->numFailures++; - raidPtr->Disks[row][col].status = rf_ds_failed; - raidPtr->status[row] = rf_rs_degraded; - rf_update_component_labels(raidPtr, - RF_NORMAL_COMPONENT_UPDATE); - } - - while (raidPtr->reconInProgress) { - RF_WAIT_COND(raidPtr->waitForReconCond, raidPtr->mutex); - } - - raidPtr->reconInProgress++; - - - /* first look for a spare drive onto which to reconstruct - the data. spare disk descriptors are stored in row 0. - This may have to change eventually */ - - /* Actually, we don't care if it's failed or not... - On a RAID set with correct parity, this function - should be callable on any component without ill affects. */ - /* RF_ASSERT(raidPtr->Disks[row][col].status == rf_ds_failed); - */ - - if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { - RF_ERRORMSG2("Unable to reconstruct to disk at row %d col %d: operation not supported for RF_DISTRIBUTE_SPARE\n", row, col); - - raidPtr->reconInProgress--; - RF_UNLOCK_MUTEX(raidPtr->mutex); - RF_Free(c_label, sizeof(RF_ComponentLabel_t)); - return (EINVAL); - } - - /* XXX need goop here to see if the disk is alive, - and, if not, make it so... */ - - - - badDisk = &raidPtr->Disks[row][col]; - - /* This device may have been opened successfully the - first time. Close it before trying to open it again.. */ - - if (raidPtr->raid_cinfo[row][col].ci_vp != NULL) { - printf("Closed the open device: %s\n", - raidPtr->Disks[row][col].devname); - vp = raidPtr->raid_cinfo[row][col].ci_vp; - ac = raidPtr->Disks[row][col].auto_configured; - rf_close_component(raidPtr, vp, ac); - raidPtr->raid_cinfo[row][col].ci_vp = NULL; - } - /* note that this disk was *not* auto_configured (any longer)*/ - raidPtr->Disks[row][col].auto_configured = 0; - - printf("About to (re-)open the device for rebuilding: %s\n", - raidPtr->Disks[row][col].devname); - - retcode = raid_getcomponentsize(raidPtr, row, col); - - if (retcode) { - printf("raid%d: rebuilding: raidlookup on device: %s failed: %d!\n", - raidPtr->raidid, raidPtr->Disks[row][col].devname, - retcode); - - /* XXX the component isn't responding properly... - must be still dead :-( */ - raidPtr->reconInProgress--; - RF_UNLOCK_MUTEX(raidPtr->mutex); - RF_Free(c_label, sizeof(RF_ComponentLabel_t)); - return(retcode); - - } - - spareDiskPtr = &raidPtr->Disks[row][col]; - spareDiskPtr->status = rf_ds_used_spare; - - printf("RECON: initiating in-place reconstruction on\n"); - printf(" row %d col %d -> spare at row %d col %d\n", - row, col, row, col); - - RF_UNLOCK_MUTEX(raidPtr->mutex); - - reconDesc = AllocRaidReconDesc((void *) raidPtr, row, col, - spareDiskPtr, numDisksDone, - row, col); - raidPtr->reconDesc = (void *) reconDesc; -#if RF_RECON_STATS > 0 - reconDesc->hsStallCount = 0; - reconDesc->numReconExecDelays = 0; - reconDesc->numReconEventWaits = 0; -#endif /* RF_RECON_STATS > 0 */ - reconDesc->reconExecTimerRunning = 0; - reconDesc->reconExecTicks = 0; - reconDesc->maxReconExecTicks = 0; - rc = rf_ContinueReconstructFailedDisk(reconDesc); - - RF_LOCK_MUTEX(raidPtr->mutex); - raidPtr->reconInProgress--; - RF_UNLOCK_MUTEX(raidPtr->mutex); - - } else { - RF_ERRORMSG1("RECON: no way to reconstruct failed disk for arch %c\n", - lp->parityConfig); - rc = EIO; - } - RF_LOCK_MUTEX(raidPtr->mutex); - - if (!rc) { - /* Need to set these here, as at this point it'll be claiming - that the disk is in rf_ds_spared! But we know better :-) */ - - raidPtr->Disks[row][col].status = rf_ds_optimal; - raidPtr->status[row] = rf_rs_optimal; - - /* fix up the component label */ - /* Don't actually need the read here.. */ - raidread_component_label(raidPtr->raid_cinfo[row][col].ci_dev, - raidPtr->raid_cinfo[row][col].ci_vp, - c_label); - - raid_init_component_label(raidPtr, c_label); - - c_label->row = row; - c_label->column = col; - - /* We've just done a rebuild based on all the other - disks, so at this point the parity is known to be - clean, even if it wasn't before. */ - - /* XXX doesn't hold for RAID 6!! */ - - raidPtr->parity_good = RF_RAID_CLEAN; - - raidwrite_component_label(raidPtr->raid_cinfo[row][col].ci_dev, - raidPtr->raid_cinfo[row][col].ci_vp, - c_label); - - } - RF_UNLOCK_MUTEX(raidPtr->mutex); - RF_SIGNAL_COND(raidPtr->waitForReconCond); - wakeup(&raidPtr->waitForReconCond); - RF_Free(c_label, sizeof(RF_ComponentLabel_t)); - return (rc); -} - - -int -rf_ContinueReconstructFailedDisk(reconDesc) - RF_RaidReconDesc_t *reconDesc; -{ - RF_Raid_t *raidPtr = reconDesc->raidPtr; - RF_RowCol_t row = reconDesc->row; - RF_RowCol_t col = reconDesc->col; - RF_RowCol_t srow = reconDesc->srow; - RF_RowCol_t scol = reconDesc->scol; - RF_ReconMap_t *mapPtr; - - RF_ReconEvent_t *event; - struct timeval etime, elpsd; - unsigned long xor_s, xor_resid_us; - int retcode, i, ds; - - switch (reconDesc->state) { - - - case 0: - - raidPtr->accumXorTimeUs = 0; - - /* create one trace record per physical disk */ - RF_Malloc(raidPtr->recon_tracerecs, raidPtr->numCol * sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); - - /* quiesce the array prior to starting recon. this is needed - * to assure no nasty interactions with pending user writes. - * We need to do this before we change the disk or row status. */ - reconDesc->state = 1; - - Dprintf("RECON: begin request suspend\n"); - retcode = rf_SuspendNewRequestsAndWait(raidPtr); - Dprintf("RECON: end request suspend\n"); - rf_StartUserStats(raidPtr); /* zero out the stats kept on - * user accs */ - - /* fall through to state 1 */ - - case 1: - - RF_LOCK_MUTEX(raidPtr->mutex); - - /* create the reconstruction control pointer and install it in - * the right slot */ - raidPtr->reconControl[row] = rf_MakeReconControl(reconDesc, row, col, srow, scol); - mapPtr = raidPtr->reconControl[row]->reconMap; - raidPtr->status[row] = rf_rs_reconstructing; - raidPtr->Disks[row][col].status = rf_ds_reconstructing; - raidPtr->Disks[row][col].spareRow = srow; - raidPtr->Disks[row][col].spareCol = scol; - - RF_UNLOCK_MUTEX(raidPtr->mutex); - - RF_GETTIME(raidPtr->reconControl[row]->starttime); - - /* now start up the actual reconstruction: issue a read for - * each surviving disk */ - - reconDesc->numDisksDone = 0; - for (i = 0; i < raidPtr->numCol; i++) { - if (i != col) { - /* find and issue the next I/O on the - * indicated disk */ - if (IssueNextReadRequest(raidPtr, row, i)) { - Dprintf2("RECON: done issuing for r%d c%d\n", row, i); - reconDesc->numDisksDone++; - } - } - } - - case 2: - Dprintf("RECON: resume requests\n"); - rf_ResumeNewRequests(raidPtr); - - - reconDesc->state = 3; - - case 3: - - /* process reconstruction events until all disks report that - * they've completed all work */ - mapPtr = raidPtr->reconControl[row]->reconMap; - - - - while (reconDesc->numDisksDone < raidPtr->numCol - 1) { - - event = rf_GetNextReconEvent(reconDesc, row, (void (*) (void *)) rf_ContinueReconstructFailedDisk, reconDesc); - RF_ASSERT(event); - - if (ProcessReconEvent(raidPtr, row, event)) - reconDesc->numDisksDone++; - raidPtr->reconControl[row]->numRUsTotal = - mapPtr->totalRUs; - raidPtr->reconControl[row]->numRUsComplete = - mapPtr->totalRUs - - rf_UnitsLeftToReconstruct(mapPtr); - - raidPtr->reconControl[row]->percentComplete = - (raidPtr->reconControl[row]->numRUsComplete * 100 / raidPtr->reconControl[row]->numRUsTotal); - if (rf_prReconSched) { - rf_PrintReconSchedule(raidPtr->reconControl[row]->reconMap, &(raidPtr->reconControl[row]->starttime)); - } - } - - - - reconDesc->state = 4; - - - case 4: - mapPtr = raidPtr->reconControl[row]->reconMap; - if (rf_reconDebug) { - printf("RECON: all reads completed\n"); - } - /* at this point all the reads have completed. We now wait - * for any pending writes to complete, and then we're done */ - - while (rf_UnitsLeftToReconstruct(raidPtr->reconControl[row]->reconMap) > 0) { - - event = rf_GetNextReconEvent(reconDesc, row, (void (*) (void *)) rf_ContinueReconstructFailedDisk, reconDesc); - RF_ASSERT(event); - - (void) ProcessReconEvent(raidPtr, row, event); /* ignore return code */ - raidPtr->reconControl[row]->percentComplete = 100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 / mapPtr->totalRUs); - if (rf_prReconSched) { - rf_PrintReconSchedule(raidPtr->reconControl[row]->reconMap, &(raidPtr->reconControl[row]->starttime)); - } - } - reconDesc->state = 5; - - case 5: - /* Success: mark the dead disk as reconstructed. We quiesce - * the array here to assure no nasty interactions with pending - * user accesses when we free up the psstatus structure as - * part of FreeReconControl() */ - - reconDesc->state = 6; - - retcode = rf_SuspendNewRequestsAndWait(raidPtr); - rf_StopUserStats(raidPtr); - rf_PrintUserStats(raidPtr); /* print out the stats on user - * accs accumulated during - * recon */ - - /* fall through to state 6 */ - case 6: - - - - RF_LOCK_MUTEX(raidPtr->mutex); - raidPtr->numFailures--; - ds = (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE); - raidPtr->Disks[row][col].status = (ds) ? rf_ds_dist_spared : rf_ds_spared; - raidPtr->status[row] = (ds) ? rf_rs_reconfigured : rf_rs_optimal; - RF_UNLOCK_MUTEX(raidPtr->mutex); - RF_GETTIME(etime); - RF_TIMEVAL_DIFF(&(raidPtr->reconControl[row]->starttime), &etime, &elpsd); - - /* XXX -- why is state 7 different from state 6 if there is no - * return() here? -- XXX Note that I set elpsd above & use it - * below, so if you put a return here you'll have to fix this. - * (also, FreeReconControl is called below) */ - - case 7: - - rf_ResumeNewRequests(raidPtr); - - printf("Reconstruction of disk at row %d col %d completed\n", - row, col); - xor_s = raidPtr->accumXorTimeUs / 1000000; - xor_resid_us = raidPtr->accumXorTimeUs % 1000000; - printf("Recon time was %d.%06d seconds, accumulated XOR time was %ld us (%ld.%06ld)\n", - (int) elpsd.tv_sec, (int) elpsd.tv_usec, raidPtr->accumXorTimeUs, xor_s, xor_resid_us); - printf(" (start time %d sec %d usec, end time %d sec %d usec)\n", - (int) raidPtr->reconControl[row]->starttime.tv_sec, - (int) raidPtr->reconControl[row]->starttime.tv_usec, - (int) etime.tv_sec, (int) etime.tv_usec); - -#if RF_RECON_STATS > 0 - printf("Total head-sep stall count was %d\n", - (int) reconDesc->hsStallCount); -#endif /* RF_RECON_STATS > 0 */ - rf_FreeReconControl(raidPtr, row); - RF_Free(raidPtr->recon_tracerecs, raidPtr->numCol * sizeof(RF_AccTraceEntry_t)); - FreeReconDesc(reconDesc); - - } - - SignalReconDone(raidPtr); - return (0); -} -/***************************************************************************** - * do the right thing upon each reconstruction event. - * returns nonzero if and only if there is nothing left unread on the - * indicated disk - *****************************************************************************/ -static int -ProcessReconEvent(raidPtr, frow, event) - RF_Raid_t *raidPtr; - RF_RowCol_t frow; - RF_ReconEvent_t *event; -{ - int retcode = 0, submitblocked; - RF_ReconBuffer_t *rbuf; - RF_SectorCount_t sectorsPerRU; - - Dprintf1("RECON: ProcessReconEvent type %d\n", event->type); - switch (event->type) { - - /* a read I/O has completed */ - case RF_REVENT_READDONE: - rbuf = raidPtr->reconControl[frow]->perDiskInfo[event->col].rbuf; - Dprintf3("RECON: READDONE EVENT: row %d col %d psid %ld\n", - frow, event->col, rbuf->parityStripeID); - Dprintf7("RECON: done read psid %ld buf %lx %02x %02x %02x %02x %02x\n", - rbuf->parityStripeID, rbuf->buffer, rbuf->buffer[0] & 0xff, rbuf->buffer[1] & 0xff, - rbuf->buffer[2] & 0xff, rbuf->buffer[3] & 0xff, rbuf->buffer[4] & 0xff); - rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg); - submitblocked = rf_SubmitReconBuffer(rbuf, 0, 0); - Dprintf1("RECON: submitblocked=%d\n", submitblocked); - if (!submitblocked) - retcode = IssueNextReadRequest(raidPtr, frow, event->col); - break; - - /* a write I/O has completed */ - case RF_REVENT_WRITEDONE: - if (rf_floatingRbufDebug) { - rf_CheckFloatingRbufCount(raidPtr, 1); - } - sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU; - rbuf = (RF_ReconBuffer_t *) event->arg; - rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg); - Dprintf3("RECON: WRITEDONE EVENT: psid %d ru %d (%d %% complete)\n", - rbuf->parityStripeID, rbuf->which_ru, raidPtr->reconControl[frow]->percentComplete); - rf_ReconMapUpdate(raidPtr, raidPtr->reconControl[frow]->reconMap, - rbuf->failedDiskSectorOffset, rbuf->failedDiskSectorOffset + sectorsPerRU - 1); - rf_RemoveFromActiveReconTable(raidPtr, frow, rbuf->parityStripeID, rbuf->which_ru); - - if (rbuf->type == RF_RBUF_TYPE_FLOATING) { - RF_LOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex); - raidPtr->numFullReconBuffers--; - rf_ReleaseFloatingReconBuffer(raidPtr, frow, rbuf); - RF_UNLOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex); - } else - if (rbuf->type == RF_RBUF_TYPE_FORCED) - rf_FreeReconBuffer(rbuf); - else - RF_ASSERT(0); - break; - - case RF_REVENT_BUFCLEAR: /* A buffer-stall condition has been - * cleared */ - Dprintf2("RECON: BUFCLEAR EVENT: row %d col %d\n", frow, event->col); - submitblocked = rf_SubmitReconBuffer(raidPtr->reconControl[frow]->perDiskInfo[event->col].rbuf, 0, (int) (long) event->arg); - RF_ASSERT(!submitblocked); /* we wouldn't have gotten the - * BUFCLEAR event if we - * couldn't submit */ - retcode = IssueNextReadRequest(raidPtr, frow, event->col); - break; - - case RF_REVENT_BLOCKCLEAR: /* A user-write reconstruction - * blockage has been cleared */ - DDprintf2("RECON: BLOCKCLEAR EVENT: row %d col %d\n", frow, event->col); - retcode = TryToRead(raidPtr, frow, event->col); - break; - - case RF_REVENT_HEADSEPCLEAR: /* A max-head-separation - * reconstruction blockage has been - * cleared */ - Dprintf2("RECON: HEADSEPCLEAR EVENT: row %d col %d\n", frow, event->col); - retcode = TryToRead(raidPtr, frow, event->col); - break; - - /* a buffer has become ready to write */ - case RF_REVENT_BUFREADY: - Dprintf2("RECON: BUFREADY EVENT: row %d col %d\n", frow, event->col); - retcode = IssueNextWriteRequest(raidPtr, frow); - if (rf_floatingRbufDebug) { - rf_CheckFloatingRbufCount(raidPtr, 1); - } - break; - - /* we need to skip the current RU entirely because it got - * recon'd while we were waiting for something else to happen */ - case RF_REVENT_SKIP: - DDprintf2("RECON: SKIP EVENT: row %d col %d\n", frow, event->col); - retcode = IssueNextReadRequest(raidPtr, frow, event->col); - break; - - /* a forced-reconstruction read access has completed. Just - * submit the buffer */ - case RF_REVENT_FORCEDREADDONE: - rbuf = (RF_ReconBuffer_t *) event->arg; - rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg); - DDprintf2("RECON: FORCEDREADDONE EVENT: row %d col %d\n", frow, event->col); - submitblocked = rf_SubmitReconBuffer(rbuf, 1, 0); - RF_ASSERT(!submitblocked); - break; - - default: - RF_PANIC(); - } - rf_FreeReconEventDesc(event); - return (retcode); -} -/***************************************************************************** - * - * find the next thing that's needed on the indicated disk, and issue - * a read request for it. We assume that the reconstruction buffer - * associated with this process is free to receive the data. If - * reconstruction is blocked on the indicated RU, we issue a - * blockage-release request instead of a physical disk read request. - * If the current disk gets too far ahead of the others, we issue a - * head-separation wait request and return. - * - * ctrl->{ru_count, curPSID, diskOffset} and - * rbuf->failedDiskSectorOffset are maintained to point to the unit - * we're currently accessing. Note that this deviates from the - * standard C idiom of having counters point to the next thing to be - * accessed. This allows us to easily retry when we're blocked by - * head separation or reconstruction-blockage events. - * - * returns nonzero if and only if there is nothing left unread on the - * indicated disk - * - *****************************************************************************/ -static int -IssueNextReadRequest(raidPtr, row, col) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_RowCol_t col; -{ - RF_PerDiskReconCtrl_t *ctrl = &raidPtr->reconControl[row]->perDiskInfo[col]; - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_ReconBuffer_t *rbuf = ctrl->rbuf; - RF_ReconUnitCount_t RUsPerPU = layoutPtr->SUsPerPU / layoutPtr->SUsPerRU; - RF_SectorCount_t sectorsPerRU = layoutPtr->sectorsPerStripeUnit * layoutPtr->SUsPerRU; - int do_new_check = 0, retcode = 0, status; - - /* if we are currently the slowest disk, mark that we have to do a new - * check */ - if (ctrl->headSepCounter <= raidPtr->reconControl[row]->minHeadSepCounter) - do_new_check = 1; - - while (1) { - - ctrl->ru_count++; - if (ctrl->ru_count < RUsPerPU) { - ctrl->diskOffset += sectorsPerRU; - rbuf->failedDiskSectorOffset += sectorsPerRU; - } else { - ctrl->curPSID++; - ctrl->ru_count = 0; - /* code left over from when head-sep was based on - * parity stripe id */ - if (ctrl->curPSID >= raidPtr->reconControl[row]->lastPSID) { - CheckForNewMinHeadSep(raidPtr, row, ++(ctrl->headSepCounter)); - return (1); /* finito! */ - } - /* find the disk offsets of the start of the parity - * stripe on both the current disk and the failed - * disk. skip this entire parity stripe if either disk - * does not appear in the indicated PS */ - status = ComputePSDiskOffsets(raidPtr, ctrl->curPSID, row, col, &ctrl->diskOffset, &rbuf->failedDiskSectorOffset, - &rbuf->spRow, &rbuf->spCol, &rbuf->spOffset); - if (status) { - ctrl->ru_count = RUsPerPU - 1; - continue; - } - } - rbuf->which_ru = ctrl->ru_count; - - /* skip this RU if it's already been reconstructed */ - if (rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, rbuf->failedDiskSectorOffset)) { - Dprintf2("Skipping psid %ld ru %d: already reconstructed\n", ctrl->curPSID, ctrl->ru_count); - continue; - } - break; - } - ctrl->headSepCounter++; - if (do_new_check) - CheckForNewMinHeadSep(raidPtr, row, ctrl->headSepCounter); /* update min if needed */ - - - /* at this point, we have definitely decided what to do, and we have - * only to see if we can actually do it now */ - rbuf->parityStripeID = ctrl->curPSID; - rbuf->which_ru = ctrl->ru_count; - bzero((char *) &raidPtr->recon_tracerecs[col], sizeof(raidPtr->recon_tracerecs[col])); - raidPtr->recon_tracerecs[col].reconacc = 1; - RF_ETIMER_START(raidPtr->recon_tracerecs[col].recon_timer); - retcode = TryToRead(raidPtr, row, col); - return (retcode); -} - -/* - * tries to issue the next read on the indicated disk. We may be - * blocked by (a) the heads being too far apart, or (b) recon on the - * indicated RU being blocked due to a write by a user thread. In - * this case, we issue a head-sep or blockage wait request, which will - * cause this same routine to be invoked again later when the blockage - * has cleared. - */ - -static int -TryToRead(raidPtr, row, col) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_RowCol_t col; -{ - RF_PerDiskReconCtrl_t *ctrl = &raidPtr->reconControl[row]->perDiskInfo[col]; - RF_SectorCount_t sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU; - RF_StripeNum_t psid = ctrl->curPSID; - RF_ReconUnitNum_t which_ru = ctrl->ru_count; - RF_DiskQueueData_t *req; - int status, created = 0; - RF_ReconParityStripeStatus_t *pssPtr; - - /* if the current disk is too far ahead of the others, issue a - * head-separation wait and return */ - if (CheckHeadSeparation(raidPtr, ctrl, row, col, ctrl->headSepCounter, which_ru)) - return (0); - RF_LOCK_PSS_MUTEX(raidPtr, row, psid); - pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl[row]->pssTable, psid, which_ru, RF_PSS_CREATE, &created); - - /* if recon is blocked on the indicated parity stripe, issue a - * block-wait request and return. this also must mark the indicated RU - * in the stripe as under reconstruction if not blocked. */ - status = CheckForcedOrBlockedReconstruction(raidPtr, pssPtr, ctrl, row, col, psid, which_ru); - if (status == RF_PSS_RECON_BLOCKED) { - Dprintf2("RECON: Stalling psid %ld ru %d: recon blocked\n", psid, which_ru); - goto out; - } else - if (status == RF_PSS_FORCED_ON_WRITE) { - rf_CauseReconEvent(raidPtr, row, col, NULL, RF_REVENT_SKIP); - goto out; - } - /* make one last check to be sure that the indicated RU didn't get - * reconstructed while we were waiting for something else to happen. - * This is unfortunate in that it causes us to make this check twice - * in the normal case. Might want to make some attempt to re-work - * this so that we only do this check if we've definitely blocked on - * one of the above checks. When this condition is detected, we may - * have just created a bogus status entry, which we need to delete. */ - if (rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, ctrl->rbuf->failedDiskSectorOffset)) { - Dprintf2("RECON: Skipping psid %ld ru %d: prior recon after stall\n", psid, which_ru); - if (created) - rf_PSStatusDelete(raidPtr, raidPtr->reconControl[row]->pssTable, pssPtr); - rf_CauseReconEvent(raidPtr, row, col, NULL, RF_REVENT_SKIP); - goto out; - } - /* found something to read. issue the I/O */ - Dprintf5("RECON: Read for psid %ld on row %d col %d offset %ld buf %lx\n", - psid, row, col, ctrl->diskOffset, ctrl->rbuf->buffer); - RF_ETIMER_STOP(raidPtr->recon_tracerecs[col].recon_timer); - RF_ETIMER_EVAL(raidPtr->recon_tracerecs[col].recon_timer); - raidPtr->recon_tracerecs[col].specific.recon.recon_start_to_fetch_us = - RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[col].recon_timer); - RF_ETIMER_START(raidPtr->recon_tracerecs[col].recon_timer); - - /* should be ok to use a NULL proc pointer here, all the bufs we use - * should be in kernel space */ - req = rf_CreateDiskQueueData(RF_IO_TYPE_READ, ctrl->diskOffset, sectorsPerRU, ctrl->rbuf->buffer, psid, which_ru, - ReconReadDoneProc, (void *) ctrl, NULL, &raidPtr->recon_tracerecs[col], (void *) raidPtr, 0, NULL); - - RF_ASSERT(req); /* XXX -- fix this -- XXX */ - - ctrl->rbuf->arg = (void *) req; - rf_DiskIOEnqueue(&raidPtr->Queues[row][col], req, RF_IO_RECON_PRIORITY); - pssPtr->issued[col] = 1; - -out: - RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid); - return (0); -} - - -/* - * given a parity stripe ID, we want to find out whether both the - * current disk and the failed disk exist in that parity stripe. If - * not, we want to skip this whole PS. If so, we want to find the - * disk offset of the start of the PS on both the current disk and the - * failed disk. - * - * this works by getting a list of disks comprising the indicated - * parity stripe, and searching the list for the current and failed - * disks. Once we've decided they both exist in the parity stripe, we - * need to decide whether each is data or parity, so that we'll know - * which mapping function to call to get the corresponding disk - * offsets. - * - * this is kind of unpleasant, but doing it this way allows the - * reconstruction code to use parity stripe IDs rather than physical - * disks address to march through the failed disk, which greatly - * simplifies a lot of code, as well as eliminating the need for a - * reverse-mapping function. I also think it will execute faster, - * since the calls to the mapping module are kept to a minimum. - * - * ASSUMES THAT THE STRIPE IDENTIFIER IDENTIFIES THE DISKS COMPRISING - * THE STRIPE IN THE CORRECT ORDER */ - - -static int -ComputePSDiskOffsets( - RF_Raid_t * raidPtr, /* raid descriptor */ - RF_StripeNum_t psid, /* parity stripe identifier */ - RF_RowCol_t row, /* row and column of disk to find the offsets - * for */ - RF_RowCol_t col, - RF_SectorNum_t * outDiskOffset, - RF_SectorNum_t * outFailedDiskSectorOffset, - RF_RowCol_t * spRow, /* OUT: row,col of spare unit for failed unit */ - RF_RowCol_t * spCol, - RF_SectorNum_t * spOffset) -{ /* OUT: offset into disk containing spare unit */ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_RowCol_t fcol = raidPtr->reconControl[row]->fcol; - RF_RaidAddr_t sosRaidAddress; /* start-of-stripe */ - RF_RowCol_t *diskids; - u_int i, j, k, i_offset, j_offset; - RF_RowCol_t prow, pcol; - int testcol, testrow; - RF_RowCol_t stripe; - RF_SectorNum_t poffset; - char i_is_parity = 0, j_is_parity = 0; - RF_RowCol_t stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; - - /* get a listing of the disks comprising that stripe */ - sosRaidAddress = rf_ParityStripeIDToRaidAddress(layoutPtr, psid); - (layoutPtr->map->IdentifyStripe) (raidPtr, sosRaidAddress, &diskids, &stripe); - RF_ASSERT(diskids); - - /* reject this entire parity stripe if it does not contain the - * indicated disk or it does not contain the failed disk */ - if (row != stripe) - goto skipit; - for (i = 0; i < stripeWidth; i++) { - if (col == diskids[i]) - break; - } - if (i == stripeWidth) - goto skipit; - for (j = 0; j < stripeWidth; j++) { - if (fcol == diskids[j]) - break; - } - if (j == stripeWidth) { - goto skipit; - } - /* find out which disk the parity is on */ - (layoutPtr->map->MapParity) (raidPtr, sosRaidAddress, &prow, &pcol, &poffset, RF_DONT_REMAP); - - /* find out if either the current RU or the failed RU is parity */ - /* also, if the parity occurs in this stripe prior to the data and/or - * failed col, we need to decrement i and/or j */ - for (k = 0; k < stripeWidth; k++) - if (diskids[k] == pcol) - break; - RF_ASSERT(k < stripeWidth); - i_offset = i; - j_offset = j; - if (k < i) - i_offset--; - else - if (k == i) { - i_is_parity = 1; - i_offset = 0; - } /* set offsets to zero to disable multiply - * below */ - if (k < j) - j_offset--; - else - if (k == j) { - j_is_parity = 1; - j_offset = 0; - } - /* at this point, [ij]_is_parity tells us whether the [current,failed] - * disk is parity at the start of this RU, and, if data, "[ij]_offset" - * tells us how far into the stripe the [current,failed] disk is. */ - - /* call the mapping routine to get the offset into the current disk, - * repeat for failed disk. */ - if (i_is_parity) - layoutPtr->map->MapParity(raidPtr, sosRaidAddress + i_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outDiskOffset, RF_DONT_REMAP); - else - layoutPtr->map->MapSector(raidPtr, sosRaidAddress + i_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outDiskOffset, RF_DONT_REMAP); - - RF_ASSERT(row == testrow && col == testcol); - - if (j_is_parity) - layoutPtr->map->MapParity(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outFailedDiskSectorOffset, RF_DONT_REMAP); - else - layoutPtr->map->MapSector(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outFailedDiskSectorOffset, RF_DONT_REMAP); - RF_ASSERT(row == testrow && fcol == testcol); - - /* now locate the spare unit for the failed unit */ - if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) { - if (j_is_parity) - layoutPtr->map->MapParity(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, spRow, spCol, spOffset, RF_REMAP); - else - layoutPtr->map->MapSector(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, spRow, spCol, spOffset, RF_REMAP); - } else { - *spRow = raidPtr->reconControl[row]->spareRow; - *spCol = raidPtr->reconControl[row]->spareCol; - *spOffset = *outFailedDiskSectorOffset; - } - - return (0); - -skipit: - Dprintf3("RECON: Skipping psid %ld: nothing needed from r%d c%d\n", - psid, row, col); - return (1); -} -/* this is called when a buffer has become ready to write to the replacement disk */ -static int -IssueNextWriteRequest(raidPtr, row) - RF_Raid_t *raidPtr; - RF_RowCol_t row; -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_SectorCount_t sectorsPerRU = layoutPtr->sectorsPerStripeUnit * layoutPtr->SUsPerRU; - RF_RowCol_t fcol = raidPtr->reconControl[row]->fcol; - RF_ReconBuffer_t *rbuf; - RF_DiskQueueData_t *req; - - rbuf = rf_GetFullReconBuffer(raidPtr->reconControl[row]); - RF_ASSERT(rbuf); /* there must be one available, or we wouldn't - * have gotten the event that sent us here */ - RF_ASSERT(rbuf->pssPtr); - - rbuf->pssPtr->writeRbuf = rbuf; - rbuf->pssPtr = NULL; - - Dprintf7("RECON: New write (r %d c %d offs %d) for psid %ld ru %d (failed disk offset %ld) buf %lx\n", - rbuf->spRow, rbuf->spCol, rbuf->spOffset, rbuf->parityStripeID, - rbuf->which_ru, rbuf->failedDiskSectorOffset, rbuf->buffer); - Dprintf6("RECON: new write psid %ld %02x %02x %02x %02x %02x\n", - rbuf->parityStripeID, rbuf->buffer[0] & 0xff, rbuf->buffer[1] & 0xff, - rbuf->buffer[2] & 0xff, rbuf->buffer[3] & 0xff, rbuf->buffer[4] & 0xff); - - /* should be ok to use a NULL b_proc here b/c all addrs should be in - * kernel space */ - req = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, rbuf->spOffset, - sectorsPerRU, rbuf->buffer, - rbuf->parityStripeID, rbuf->which_ru, - ReconWriteDoneProc, (void *) rbuf, NULL, - &raidPtr->recon_tracerecs[fcol], - (void *) raidPtr, 0, NULL); - - RF_ASSERT(req); /* XXX -- fix this -- XXX */ - - rbuf->arg = (void *) req; - rf_DiskIOEnqueue(&raidPtr->Queues[rbuf->spRow][rbuf->spCol], req, RF_IO_RECON_PRIORITY); - - return (0); -} - -/* - * this gets called upon the completion of a reconstruction read - * operation the arg is a pointer to the per-disk reconstruction - * control structure for the process that just finished a read. - * - * called at interrupt context in the kernel, so don't do anything - * illegal here. - */ -static int -ReconReadDoneProc(arg, status) - void *arg; - int status; -{ - RF_PerDiskReconCtrl_t *ctrl = (RF_PerDiskReconCtrl_t *) arg; - RF_Raid_t *raidPtr = ctrl->reconCtrl->reconDesc->raidPtr; - - if (status) { - /* - * XXX - */ - printf("Recon read failed!\n"); - RF_PANIC(); - } - RF_ETIMER_STOP(raidPtr->recon_tracerecs[ctrl->col].recon_timer); - RF_ETIMER_EVAL(raidPtr->recon_tracerecs[ctrl->col].recon_timer); - raidPtr->recon_tracerecs[ctrl->col].specific.recon.recon_fetch_to_return_us = - RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[ctrl->col].recon_timer); - RF_ETIMER_START(raidPtr->recon_tracerecs[ctrl->col].recon_timer); - - rf_CauseReconEvent(raidPtr, ctrl->row, ctrl->col, NULL, RF_REVENT_READDONE); - return (0); -} -/* this gets called upon the completion of a reconstruction write operation. - * the arg is a pointer to the rbuf that was just written - * - * called at interrupt context in the kernel, so don't do anything illegal here. - */ -static int -ReconWriteDoneProc(arg, status) - void *arg; - int status; -{ - RF_ReconBuffer_t *rbuf = (RF_ReconBuffer_t *) arg; - - Dprintf2("Reconstruction completed on psid %ld ru %d\n", rbuf->parityStripeID, rbuf->which_ru); - if (status) { - printf("Recon write failed!\n"); /* fprintf(stderr,"Recon - * write failed!\n"); */ - RF_PANIC(); - } - rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, rbuf->row, rbuf->col, arg, RF_REVENT_WRITEDONE); - return (0); -} - - -/* - * computes a new minimum head sep, and wakes up anyone who needs to - * be woken as a result - */ -static void -CheckForNewMinHeadSep(raidPtr, row, hsCtr) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_HeadSepLimit_t hsCtr; -{ - RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[row]; - RF_HeadSepLimit_t new_min; - RF_RowCol_t i; - RF_CallbackDesc_t *p; - RF_ASSERT(hsCtr >= reconCtrlPtr->minHeadSepCounter); /* from the definition - * of a minimum */ - - - RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); - - new_min = ~(1L << (8 * sizeof(long) - 1)); /* 0x7FFF....FFF */ - for (i = 0; i < raidPtr->numCol; i++) - if (i != reconCtrlPtr->fcol) { - if (reconCtrlPtr->perDiskInfo[i].headSepCounter < new_min) - new_min = reconCtrlPtr->perDiskInfo[i].headSepCounter; - } - /* set the new minimum and wake up anyone who can now run again */ - if (new_min != reconCtrlPtr->minHeadSepCounter) { - reconCtrlPtr->minHeadSepCounter = new_min; - Dprintf1("RECON: new min head pos counter val is %ld\n", new_min); - while (reconCtrlPtr->headSepCBList) { - if (reconCtrlPtr->headSepCBList->callbackArg.v > new_min) - break; - p = reconCtrlPtr->headSepCBList; - reconCtrlPtr->headSepCBList = p->next; - p->next = NULL; - rf_CauseReconEvent(raidPtr, p->row, p->col, NULL, RF_REVENT_HEADSEPCLEAR); - rf_FreeCallbackDesc(p); - } - - } - RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); -} - -/* - * checks to see that the maximum head separation will not be violated - * if we initiate a reconstruction I/O on the indicated disk. - * Limiting the maximum head separation between two disks eliminates - * the nasty buffer-stall conditions that occur when one disk races - * ahead of the others and consumes all of the floating recon buffers. - * This code is complex and unpleasant but it's necessary to avoid - * some very nasty, albeit fairly rare, reconstruction behavior. - * - * returns non-zero if and only if we have to stop working on the - * indicated disk due to a head-separation delay. - */ -static int -CheckHeadSeparation( - RF_Raid_t * raidPtr, - RF_PerDiskReconCtrl_t * ctrl, - RF_RowCol_t row, - RF_RowCol_t col, - RF_HeadSepLimit_t hsCtr, - RF_ReconUnitNum_t which_ru) -{ - RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[row]; - RF_CallbackDesc_t *cb, *p, *pt; - int retval = 0; - - /* if we're too far ahead of the slowest disk, stop working on this - * disk until the slower ones catch up. We do this by scheduling a - * wakeup callback for the time when the slowest disk has caught up. - * We define "caught up" with 20% hysteresis, i.e. the head separation - * must have fallen to at most 80% of the max allowable head - * separation before we'll wake up. - * - */ - RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); - if ((raidPtr->headSepLimit >= 0) && - ((ctrl->headSepCounter - reconCtrlPtr->minHeadSepCounter) > raidPtr->headSepLimit)) { - Dprintf6("raid%d: RECON: head sep stall: row %d col %d hsCtr %ld minHSCtr %ld limit %ld\n", - raidPtr->raidid, row, col, ctrl->headSepCounter, - reconCtrlPtr->minHeadSepCounter, - raidPtr->headSepLimit); - cb = rf_AllocCallbackDesc(); - /* the minHeadSepCounter value we have to get to before we'll - * wake up. build in 20% hysteresis. */ - cb->callbackArg.v = (ctrl->headSepCounter - raidPtr->headSepLimit + raidPtr->headSepLimit / 5); - cb->row = row; - cb->col = col; - cb->next = NULL; - - /* insert this callback descriptor into the sorted list of - * pending head-sep callbacks */ - p = reconCtrlPtr->headSepCBList; - if (!p) - reconCtrlPtr->headSepCBList = cb; - else - if (cb->callbackArg.v < p->callbackArg.v) { - cb->next = reconCtrlPtr->headSepCBList; - reconCtrlPtr->headSepCBList = cb; - } else { - for (pt = p, p = p->next; p && (p->callbackArg.v < cb->callbackArg.v); pt = p, p = p->next); - cb->next = p; - pt->next = cb; - } - retval = 1; -#if RF_RECON_STATS > 0 - ctrl->reconCtrl->reconDesc->hsStallCount++; -#endif /* RF_RECON_STATS > 0 */ - } - RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); - - return (retval); -} -/* - * checks to see if reconstruction has been either forced or blocked - * by a user operation. if forced, we skip this RU entirely. else if - * blocked, put ourselves on the wait list. else return 0. - * - * ASSUMES THE PSS MUTEX IS LOCKED UPON ENTRY - */ -static int -CheckForcedOrBlockedReconstruction( - RF_Raid_t * raidPtr, - RF_ReconParityStripeStatus_t * pssPtr, - RF_PerDiskReconCtrl_t * ctrl, - RF_RowCol_t row, - RF_RowCol_t col, - RF_StripeNum_t psid, - RF_ReconUnitNum_t which_ru) -{ - RF_CallbackDesc_t *cb; - int retcode = 0; - - if ((pssPtr->flags & RF_PSS_FORCED_ON_READ) || (pssPtr->flags & RF_PSS_FORCED_ON_WRITE)) - retcode = RF_PSS_FORCED_ON_WRITE; - else - if (pssPtr->flags & RF_PSS_RECON_BLOCKED) { - Dprintf4("RECON: row %d col %d blocked at psid %ld ru %d\n", row, col, psid, which_ru); - cb = rf_AllocCallbackDesc(); /* append ourselves to - * the blockage-wait - * list */ - cb->row = row; - cb->col = col; - cb->next = pssPtr->blockWaitList; - pssPtr->blockWaitList = cb; - retcode = RF_PSS_RECON_BLOCKED; - } - if (!retcode) - pssPtr->flags |= RF_PSS_UNDER_RECON; /* mark this RU as under - * reconstruction */ - - return (retcode); -} -/* - * if reconstruction is currently ongoing for the indicated stripeID, - * reconstruction is forced to completion and we return non-zero to - * indicate that the caller must wait. If not, then reconstruction is - * blocked on the indicated stripe and the routine returns zero. If - * and only if we return non-zero, we'll cause the cbFunc to get - * invoked with the cbArg when the reconstruction has completed. - */ -int -rf_ForceOrBlockRecon(raidPtr, asmap, cbFunc, cbArg) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *asmap; - void (*cbFunc) (RF_Raid_t *, void *); - void *cbArg; -{ - RF_RowCol_t row = asmap->physInfo->row; /* which row of the array - * we're working on */ - RF_StripeNum_t stripeID = asmap->stripeID; /* the stripe ID we're - * forcing recon on */ - RF_SectorCount_t sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU; /* num sects in one RU */ - RF_ReconParityStripeStatus_t *pssPtr; /* a pointer to the parity - * stripe status structure */ - RF_StripeNum_t psid; /* parity stripe id */ - RF_SectorNum_t offset, fd_offset; /* disk offset, failed-disk - * offset */ - RF_RowCol_t *diskids; - RF_RowCol_t stripe; - RF_ReconUnitNum_t which_ru; /* RU within parity stripe */ - RF_RowCol_t fcol, diskno, i; - RF_ReconBuffer_t *new_rbuf; /* ptr to newly allocated rbufs */ - RF_DiskQueueData_t *req;/* disk I/O req to be enqueued */ - RF_CallbackDesc_t *cb; - int created = 0, nPromoted; - - psid = rf_MapStripeIDToParityStripeID(&raidPtr->Layout, stripeID, &which_ru); - - RF_LOCK_PSS_MUTEX(raidPtr, row, psid); - - pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl[row]->pssTable, psid, which_ru, RF_PSS_CREATE | RF_PSS_RECON_BLOCKED, &created); - - /* if recon is not ongoing on this PS, just return */ - if (!(pssPtr->flags & RF_PSS_UNDER_RECON)) { - RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid); - return (0); - } - /* otherwise, we have to wait for reconstruction to complete on this - * RU. */ - /* In order to avoid waiting for a potentially large number of - * low-priority accesses to complete, we force a normal-priority (i.e. - * not low-priority) reconstruction on this RU. */ - if (!(pssPtr->flags & RF_PSS_FORCED_ON_WRITE) && !(pssPtr->flags & RF_PSS_FORCED_ON_READ)) { - DDprintf1("Forcing recon on psid %ld\n", psid); - pssPtr->flags |= RF_PSS_FORCED_ON_WRITE; /* mark this RU as under - * forced recon */ - pssPtr->flags &= ~RF_PSS_RECON_BLOCKED; /* clear the blockage - * that we just set */ - fcol = raidPtr->reconControl[row]->fcol; - - /* get a listing of the disks comprising the indicated stripe */ - (raidPtr->Layout.map->IdentifyStripe) (raidPtr, asmap->raidAddress, &diskids, &stripe); - RF_ASSERT(row == stripe); - - /* For previously issued reads, elevate them to normal - * priority. If the I/O has already completed, it won't be - * found in the queue, and hence this will be a no-op. For - * unissued reads, allocate buffers and issue new reads. The - * fact that we've set the FORCED bit means that the regular - * recon procs will not re-issue these reqs */ - for (i = 0; i < raidPtr->Layout.numDataCol + raidPtr->Layout.numParityCol; i++) - if ((diskno = diskids[i]) != fcol) { - if (pssPtr->issued[diskno]) { - nPromoted = rf_DiskIOPromote(&raidPtr->Queues[row][diskno], psid, which_ru); - if (rf_reconDebug && nPromoted) - printf("raid%d: promoted read from row %d col %d\n", raidPtr->raidid, row, diskno); - } else { - new_rbuf = rf_MakeReconBuffer(raidPtr, row, diskno, RF_RBUF_TYPE_FORCED); /* create new buf */ - ComputePSDiskOffsets(raidPtr, psid, row, diskno, &offset, &fd_offset, - &new_rbuf->spRow, &new_rbuf->spCol, &new_rbuf->spOffset); /* find offsets & spare - * location */ - new_rbuf->parityStripeID = psid; /* fill in the buffer */ - new_rbuf->which_ru = which_ru; - new_rbuf->failedDiskSectorOffset = fd_offset; - new_rbuf->priority = RF_IO_NORMAL_PRIORITY; - - /* use NULL b_proc b/c all addrs - * should be in kernel space */ - req = rf_CreateDiskQueueData(RF_IO_TYPE_READ, offset + which_ru * sectorsPerRU, sectorsPerRU, new_rbuf->buffer, - psid, which_ru, (int (*) (void *, int)) ForceReconReadDoneProc, (void *) new_rbuf, NULL, - NULL, (void *) raidPtr, 0, NULL); - - RF_ASSERT(req); /* XXX -- fix this -- - * XXX */ - - new_rbuf->arg = req; - rf_DiskIOEnqueue(&raidPtr->Queues[row][diskno], req, RF_IO_NORMAL_PRIORITY); /* enqueue the I/O */ - Dprintf3("raid%d: Issued new read req on row %d col %d\n", raidPtr->raidid, row, diskno); - } - } - /* if the write is sitting in the disk queue, elevate its - * priority */ - if (rf_DiskIOPromote(&raidPtr->Queues[row][fcol], psid, which_ru)) - printf("raid%d: promoted write to row %d col %d\n", - raidPtr->raidid, row, fcol); - } - /* install a callback descriptor to be invoked when recon completes on - * this parity stripe. */ - cb = rf_AllocCallbackDesc(); - /* XXX the following is bogus.. These functions don't really match!! - * GO */ - cb->callbackFunc = (void (*) (RF_CBParam_t)) cbFunc; - cb->callbackArg.p = (void *) cbArg; - cb->next = pssPtr->procWaitList; - pssPtr->procWaitList = cb; - DDprintf2("raid%d: Waiting for forced recon on psid %ld\n", - raidPtr->raidid, psid); - - RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid); - return (1); -} -/* called upon the completion of a forced reconstruction read. - * all we do is schedule the FORCEDREADONE event. - * called at interrupt context in the kernel, so don't do anything illegal here. - */ -static void -ForceReconReadDoneProc(arg, status) - void *arg; - int status; -{ - RF_ReconBuffer_t *rbuf = arg; - - if (status) { - printf("Forced recon read failed!\n"); /* fprintf(stderr,"Forced - * recon read - * failed!\n"); */ - RF_PANIC(); - } - rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, rbuf->row, rbuf->col, (void *) rbuf, RF_REVENT_FORCEDREADDONE); -} -/* releases a block on the reconstruction of the indicated stripe */ -int -rf_UnblockRecon(raidPtr, asmap) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *asmap; -{ - RF_RowCol_t row = asmap->origRow; - RF_StripeNum_t stripeID = asmap->stripeID; - RF_ReconParityStripeStatus_t *pssPtr; - RF_ReconUnitNum_t which_ru; - RF_StripeNum_t psid; - int created = 0; - RF_CallbackDesc_t *cb; - - psid = rf_MapStripeIDToParityStripeID(&raidPtr->Layout, stripeID, &which_ru); - RF_LOCK_PSS_MUTEX(raidPtr, row, psid); - pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl[row]->pssTable, psid, which_ru, RF_PSS_NONE, &created); - - /* When recon is forced, the pss desc can get deleted before we get - * back to unblock recon. But, this can _only_ happen when recon is - * forced. It would be good to put some kind of sanity check here, but - * how to decide if recon was just forced or not? */ - if (!pssPtr) { - /* printf("Warning: no pss descriptor upon unblock on psid %ld - * RU %d\n",psid,which_ru); */ - if (rf_reconDebug || rf_pssDebug) - printf("Warning: no pss descriptor upon unblock on psid %ld RU %d\n", (long) psid, which_ru); - goto out; - } - pssPtr->blockCount--; - Dprintf3("raid%d: unblocking recon on psid %ld: blockcount is %d\n", - raidPtr->raidid, psid, pssPtr->blockCount); - if (pssPtr->blockCount == 0) { /* if recon blockage has been released */ - - /* unblock recon before calling CauseReconEvent in case - * CauseReconEvent causes us to try to issue a new read before - * returning here. */ - pssPtr->flags &= ~RF_PSS_RECON_BLOCKED; - - - while (pssPtr->blockWaitList) { - /* spin through the block-wait list and - release all the waiters */ - cb = pssPtr->blockWaitList; - pssPtr->blockWaitList = cb->next; - cb->next = NULL; - rf_CauseReconEvent(raidPtr, cb->row, cb->col, NULL, RF_REVENT_BLOCKCLEAR); - rf_FreeCallbackDesc(cb); - } - if (!(pssPtr->flags & RF_PSS_UNDER_RECON)) { - /* if no recon was requested while recon was blocked */ - rf_PSStatusDelete(raidPtr, raidPtr->reconControl[row]->pssTable, pssPtr); - } - } -out: - RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid); - return (0); -} diff --git a/sys/dev/raidframe/rf_reconstruct.h b/sys/dev/raidframe/rf_reconstruct.h deleted file mode 100644 index 318d546..0000000 --- a/sys/dev/raidframe/rf_reconstruct.h +++ /dev/null @@ -1,202 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_reconstruct.h,v 1.5 2000/05/28 00:48:30 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/********************************************************* - * rf_reconstruct.h -- header file for reconstruction code - *********************************************************/ - -#ifndef _RF__RF_RECONSTRUCT_H_ -#define _RF__RF_RECONSTRUCT_H_ - -#include <dev/raidframe/rf_types.h> -#include <sys/time.h> -#include <dev/raidframe/rf_reconmap.h> -#include <dev/raidframe/rf_psstatus.h> - -/* reconstruction configuration information */ -struct RF_ReconConfig_s { - unsigned numFloatingReconBufs; /* number of floating recon bufs to - * use */ - RF_HeadSepLimit_t headSepLimit; /* how far apart the heads are allow - * to become, in parity stripes */ -}; -/* a reconstruction buffer */ -struct RF_ReconBuffer_s { - RF_Raid_t *raidPtr; /* void * to avoid recursive includes */ - caddr_t buffer; /* points to the data */ - RF_StripeNum_t parityStripeID; /* the parity stripe that this data - * relates to */ - int which_ru; /* which reconstruction unit within the PSS */ - RF_SectorNum_t failedDiskSectorOffset; /* the offset into the failed - * disk */ - RF_RowCol_t row, col; /* which disk this buffer belongs to or is - * targeted at */ - RF_StripeCount_t count; /* counts the # of SUs installed so far */ - int priority; /* used to force hi priority recon */ - RF_RbufType_t type; /* FORCED or FLOATING */ - char *arrived; /* [x] = 1/0 if SU from disk x has/hasn't - * arrived */ - RF_ReconBuffer_t *next; /* used for buffer management */ - void *arg; /* generic field for general use */ - RF_RowCol_t spRow, spCol; /* spare disk to which this buf should - * be written */ - /* if dist sparing off, always identifies the replacement disk */ - RF_SectorNum_t spOffset;/* offset into the spare disk */ - /* if dist sparing off, identical to failedDiskSectorOffset */ - RF_ReconParityStripeStatus_t *pssPtr; /* debug- pss associated with - * issue-pending write */ -}; -/* a reconstruction event descriptor. The event types currently are: - * RF_REVENT_READDONE -- a read operation has completed - * RF_REVENT_WRITEDONE -- a write operation has completed - * RF_REVENT_BUFREADY -- the buffer manager has produced a full buffer - * RF_REVENT_BLOCKCLEAR -- a reconstruction blockage has been cleared - * RF_REVENT_BUFCLEAR -- the buffer manager has released a process blocked on submission - * RF_REVENT_SKIP -- we need to skip the current RU and go on to the next one, typ. b/c we found recon forced - * RF_REVENT_FORCEDREADONE- a forced-reconstructoin read operation has completed - */ -typedef enum RF_Revent_e { - RF_REVENT_READDONE, - RF_REVENT_WRITEDONE, - RF_REVENT_BUFREADY, - RF_REVENT_BLOCKCLEAR, - RF_REVENT_BUFCLEAR, - RF_REVENT_HEADSEPCLEAR, - RF_REVENT_SKIP, - RF_REVENT_FORCEDREADDONE -} RF_Revent_t; - -struct RF_ReconEvent_s { - RF_Revent_t type; /* what kind of event has occurred */ - RF_RowCol_t col; /* row ID is implicit in the queue in which - * the event is placed */ - void *arg; /* a generic argument */ - RF_ReconEvent_t *next; -}; -/* - * Reconstruction control information maintained per-disk - * (for surviving disks) - */ -struct RF_PerDiskReconCtrl_s { - RF_ReconCtrl_t *reconCtrl; - RF_RowCol_t row, col; /* to make this structure self-identifying */ - RF_StripeNum_t curPSID; /* the next parity stripe ID to check on this - * disk */ - RF_HeadSepLimit_t headSepCounter; /* counter used to control - * maximum head separation */ - RF_SectorNum_t diskOffset; /* the offset into the indicated disk - * of the current PU */ - RF_ReconUnitNum_t ru_count; /* this counts off the recon units - * within each parity unit */ - RF_ReconBuffer_t *rbuf; /* the recon buffer assigned to this disk */ -}; -/* main reconstruction control structure */ -struct RF_ReconCtrl_s { - RF_RaidReconDesc_t *reconDesc; - RF_RowCol_t fcol; /* which column has failed */ - RF_PerDiskReconCtrl_t *perDiskInfo; /* information maintained - * per-disk */ - RF_ReconMap_t *reconMap;/* map of what has/has not been reconstructed */ - RF_RowCol_t spareRow; /* which of the spare disks we're using */ - RF_RowCol_t spareCol; - RF_StripeNum_t lastPSID;/* the ID of the last parity stripe we want - * reconstructed */ - int percentComplete;/* percentage completion of reconstruction */ - int numRUsComplete; /* number of Reconstruction Units done */ - int numRUsTotal; /* total number of Reconstruction Units */ - - /* reconstruction event queue */ - RF_ReconEvent_t *eventQueue; /* queue of pending reconstruction - * events */ - RF_DECLARE_MUTEX(eq_mutex) /* mutex for locking event - * queue */ - RF_DECLARE_COND(eq_cond) /* condition variable for - * signalling recon events */ - int eq_count; /* debug only */ - - /* reconstruction buffer management */ - RF_DECLARE_MUTEX(rb_mutex) /* mutex for messing around - * with recon buffers */ - RF_ReconBuffer_t *floatingRbufs; /* available floating - * reconstruction buffers */ - RF_ReconBuffer_t *committedRbufs; /* recon buffers that have - * been committed to some - * waiting disk */ - RF_ReconBuffer_t *fullBufferList; /* full buffers waiting to be - * written out */ - RF_ReconBuffer_t *priorityList; /* full buffers that have been - * elevated to higher priority */ - RF_CallbackDesc_t *bufferWaitList; /* disks that are currently - * blocked waiting for buffers */ - - /* parity stripe status table */ - RF_PSStatusHeader_t *pssTable; /* stores the reconstruction status of - * active parity stripes */ - - /* maximum-head separation control */ - RF_HeadSepLimit_t minHeadSepCounter; /* the minimum hs counter over - * all disks */ - RF_CallbackDesc_t *headSepCBList; /* list of callbacks to be - * done as minPSID advances */ - - /* performance monitoring */ - struct timeval starttime; /* recon start time */ - - void (*continueFunc) (void *); /* function to call when io - * returns */ - void *continueArg; /* argument for Func */ -}; -/* the default priority for reconstruction accesses */ -#define RF_IO_RECON_PRIORITY RF_IO_LOW_PRIORITY - -int rf_ConfigureReconstruction(RF_ShutdownList_t ** listp); - -int -rf_ReconstructFailedDisk(RF_Raid_t * raidPtr, RF_RowCol_t row, - RF_RowCol_t col); - -int -rf_ReconstructFailedDiskBasic(RF_Raid_t * raidPtr, RF_RowCol_t row, - RF_RowCol_t col); - -int -rf_ReconstructInPlace(RF_Raid_t * raidPtr, RF_RowCol_t row, RF_RowCol_t col); - -int rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t * reconDesc); - -int -rf_ForceOrBlockRecon(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, - void (*cbFunc) (RF_Raid_t *, void *), void *cbArg); - - int rf_UnblockRecon(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap); - - int rf_RegisterReconDoneProc(RF_Raid_t * raidPtr, void (*proc) (RF_Raid_t *, void *), void *arg, - RF_ReconDoneProc_t ** handlep); - -#endif /* !_RF__RF_RECONSTRUCT_H_ */ diff --git a/sys/dev/raidframe/rf_reconutil.c b/sys/dev/raidframe/rf_reconutil.c deleted file mode 100644 index bafff69..0000000 --- a/sys/dev/raidframe/rf_reconutil.c +++ /dev/null @@ -1,338 +0,0 @@ -/* $NetBSD: rf_reconutil.c,v 1.3 1999/02/05 00:06:17 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/******************************************** - * rf_reconutil.c -- reconstruction utilities - ********************************************/ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_desc.h> -#include <dev/raidframe/rf_reconutil.h> -#include <dev/raidframe/rf_reconbuffer.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_decluster.h> -#include <dev/raidframe/rf_raid5_rotatedspare.h> -#include <dev/raidframe/rf_interdecluster.h> -#include <dev/raidframe/rf_chaindecluster.h> - -/******************************************************************* - * allocates/frees the reconstruction control information structures - *******************************************************************/ -RF_ReconCtrl_t * -rf_MakeReconControl(reconDesc, frow, fcol, srow, scol) - RF_RaidReconDesc_t *reconDesc; - RF_RowCol_t frow; /* failed row and column */ - RF_RowCol_t fcol; - RF_RowCol_t srow; /* identifies which spare we're using */ - RF_RowCol_t scol; -{ - RF_Raid_t *raidPtr = reconDesc->raidPtr; - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_ReconUnitCount_t RUsPerPU = layoutPtr->SUsPerPU / layoutPtr->SUsPerRU; - RF_ReconUnitCount_t numSpareRUs; - RF_ReconCtrl_t *reconCtrlPtr; - RF_ReconBuffer_t *rbuf; - RF_LayoutSW_t *lp; - int retcode, rc; - RF_RowCol_t i; - - lp = raidPtr->Layout.map; - - /* make and zero the global reconstruction structure and the per-disk - * structure */ - RF_Calloc(reconCtrlPtr, 1, sizeof(RF_ReconCtrl_t), (RF_ReconCtrl_t *)); - RF_Calloc(reconCtrlPtr->perDiskInfo, raidPtr->numCol, sizeof(RF_PerDiskReconCtrl_t), (RF_PerDiskReconCtrl_t *)); /* this zeros it */ - reconCtrlPtr->reconDesc = reconDesc; - reconCtrlPtr->fcol = fcol; - reconCtrlPtr->spareRow = srow; - reconCtrlPtr->spareCol = scol; - reconCtrlPtr->lastPSID = layoutPtr->numStripe / layoutPtr->SUsPerPU; - reconCtrlPtr->percentComplete = 0; - - /* initialize each per-disk recon information structure */ - for (i = 0; i < raidPtr->numCol; i++) { - reconCtrlPtr->perDiskInfo[i].reconCtrl = reconCtrlPtr; - reconCtrlPtr->perDiskInfo[i].row = frow; - reconCtrlPtr->perDiskInfo[i].col = i; - reconCtrlPtr->perDiskInfo[i].curPSID = -1; /* make it appear as if - * we just finished an - * RU */ - reconCtrlPtr->perDiskInfo[i].ru_count = RUsPerPU - 1; - } - - /* Get the number of spare units per disk and the sparemap in case - * spare is distributed */ - - if (lp->GetNumSpareRUs) { - numSpareRUs = lp->GetNumSpareRUs(raidPtr); - } else { - numSpareRUs = 0; - } - - /* - * Not all distributed sparing archs need dynamic mappings - */ - if (lp->InstallSpareTable) { - retcode = rf_InstallSpareTable(raidPtr, frow, fcol); - if (retcode) { - RF_PANIC(); /* XXX fix this */ - } - } - /* make the reconstruction map */ - reconCtrlPtr->reconMap = rf_MakeReconMap(raidPtr, (int) (layoutPtr->SUsPerRU * layoutPtr->sectorsPerStripeUnit), - raidPtr->sectorsPerDisk, numSpareRUs); - - /* make the per-disk reconstruction buffers */ - for (i = 0; i < raidPtr->numCol; i++) { - reconCtrlPtr->perDiskInfo[i].rbuf = (i == fcol) ? NULL : rf_MakeReconBuffer(raidPtr, frow, i, RF_RBUF_TYPE_EXCLUSIVE); - } - - /* initialize the event queue */ - rc = rf_mutex_init(&reconCtrlPtr->eq_mutex, __FUNCTION__); - if (rc) { - /* XXX deallocate, cleanup */ - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return (NULL); - } - rc = rf_cond_init(&reconCtrlPtr->eq_cond); - if (rc) { - /* XXX deallocate, cleanup */ - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return (NULL); - } - reconCtrlPtr->eventQueue = NULL; - reconCtrlPtr->eq_count = 0; - - /* make the floating recon buffers and append them to the free list */ - rc = rf_mutex_init(&reconCtrlPtr->rb_mutex, __FUNCTION__); - if (rc) { - /* XXX deallocate, cleanup */ - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return (NULL); - } - reconCtrlPtr->fullBufferList = NULL; - reconCtrlPtr->priorityList = NULL; - reconCtrlPtr->floatingRbufs = NULL; - reconCtrlPtr->committedRbufs = NULL; - for (i = 0; i < raidPtr->numFloatingReconBufs; i++) { - rbuf = rf_MakeReconBuffer(raidPtr, frow, fcol, RF_RBUF_TYPE_FLOATING); - rbuf->next = reconCtrlPtr->floatingRbufs; - reconCtrlPtr->floatingRbufs = rbuf; - } - - /* create the parity stripe status table */ - reconCtrlPtr->pssTable = rf_MakeParityStripeStatusTable(raidPtr); - - /* set the initial min head sep counter val */ - reconCtrlPtr->minHeadSepCounter = 0; - - return (reconCtrlPtr); -} - -void -rf_FreeReconControl(raidPtr, row) - RF_Raid_t *raidPtr; - RF_RowCol_t row; -{ - RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[row]; - RF_ReconBuffer_t *t; - RF_ReconUnitNum_t i; - - RF_ASSERT(reconCtrlPtr); - for (i = 0; i < raidPtr->numCol; i++) - if (reconCtrlPtr->perDiskInfo[i].rbuf) - rf_FreeReconBuffer(reconCtrlPtr->perDiskInfo[i].rbuf); - for (i = 0; i < raidPtr->numFloatingReconBufs; i++) { - t = reconCtrlPtr->floatingRbufs; - RF_ASSERT(t); - reconCtrlPtr->floatingRbufs = t->next; - rf_FreeReconBuffer(t); - } - rf_mutex_destroy(&reconCtrlPtr->rb_mutex); - rf_mutex_destroy(&reconCtrlPtr->eq_mutex); - rf_cond_destroy(&reconCtrlPtr->eq_cond); - rf_FreeReconMap(reconCtrlPtr->reconMap); - rf_FreeParityStripeStatusTable(raidPtr, reconCtrlPtr->pssTable); - RF_Free(reconCtrlPtr->perDiskInfo, raidPtr->numCol * sizeof(RF_PerDiskReconCtrl_t)); - RF_Free(reconCtrlPtr, sizeof(*reconCtrlPtr)); -} - - -/****************************************************************************** - * computes the default head separation limit - *****************************************************************************/ -RF_HeadSepLimit_t -rf_GetDefaultHeadSepLimit(raidPtr) - RF_Raid_t *raidPtr; -{ - RF_HeadSepLimit_t hsl; - RF_LayoutSW_t *lp; - - lp = raidPtr->Layout.map; - if (lp->GetDefaultHeadSepLimit == NULL) - return (-1); - hsl = lp->GetDefaultHeadSepLimit(raidPtr); - return (hsl); -} - - -/****************************************************************************** - * computes the default number of floating recon buffers - *****************************************************************************/ -int -rf_GetDefaultNumFloatingReconBuffers(raidPtr) - RF_Raid_t *raidPtr; -{ - RF_LayoutSW_t *lp; - int nrb; - - lp = raidPtr->Layout.map; - if (lp->GetDefaultNumFloatingReconBuffers == NULL) - return (3 * raidPtr->numCol); - nrb = lp->GetDefaultNumFloatingReconBuffers(raidPtr); - return (nrb); -} - - -/****************************************************************************** - * creates and initializes a reconstruction buffer - *****************************************************************************/ -RF_ReconBuffer_t * -rf_MakeReconBuffer( - RF_Raid_t * raidPtr, - RF_RowCol_t row, - RF_RowCol_t col, - RF_RbufType_t type) -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_ReconBuffer_t *t; - u_int recon_buffer_size = rf_RaidAddressToByte(raidPtr, layoutPtr->SUsPerRU * layoutPtr->sectorsPerStripeUnit); - - RF_Malloc(t, sizeof(RF_ReconBuffer_t), (RF_ReconBuffer_t *)); - RF_Malloc(t->buffer, recon_buffer_size, (caddr_t)); - RF_Malloc(t->arrived, raidPtr->numCol * sizeof(char), (char *)); - t->raidPtr = raidPtr; - t->row = row; - t->col = col; - t->priority = RF_IO_RECON_PRIORITY; - t->type = type; - t->pssPtr = NULL; - t->next = NULL; - return (t); -} -/****************************************************************************** - * frees a reconstruction buffer - *****************************************************************************/ -void -rf_FreeReconBuffer(rbuf) - RF_ReconBuffer_t *rbuf; -{ - RF_Raid_t *raidPtr = rbuf->raidPtr; - u_int recon_buffer_size = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.SUsPerRU * raidPtr->Layout.sectorsPerStripeUnit); - - RF_Free(rbuf->arrived, raidPtr->numCol * sizeof(char)); - RF_Free(rbuf->buffer, recon_buffer_size); - RF_Free(rbuf, sizeof(*rbuf)); -} - - -/****************************************************************************** - * debug only: sanity check the number of floating recon bufs in use - *****************************************************************************/ -void -rf_CheckFloatingRbufCount(raidPtr, dolock) - RF_Raid_t *raidPtr; - int dolock; -{ - RF_ReconParityStripeStatus_t *p; - RF_PSStatusHeader_t *pssTable; - RF_ReconBuffer_t *rbuf; - int i, j, sum = 0; - RF_RowCol_t frow = 0; - - for (i = 0; i < raidPtr->numRow; i++) - if (raidPtr->reconControl[i]) { - frow = i; - break; - } - RF_ASSERT(frow >= 0); - - if (dolock) - RF_LOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex); - pssTable = raidPtr->reconControl[frow]->pssTable; - - for (i = 0; i < raidPtr->pssTableSize; i++) { - RF_LOCK_MUTEX(pssTable[i].mutex); - for (p = pssTable[i].chain; p; p = p->next) { - rbuf = (RF_ReconBuffer_t *) p->rbuf; - if (rbuf && rbuf->type == RF_RBUF_TYPE_FLOATING) - sum++; - - rbuf = (RF_ReconBuffer_t *) p->writeRbuf; - if (rbuf && rbuf->type == RF_RBUF_TYPE_FLOATING) - sum++; - - for (j = 0; j < p->xorBufCount; j++) { - rbuf = (RF_ReconBuffer_t *) p->rbufsForXor[j]; - RF_ASSERT(rbuf); - if (rbuf->type == RF_RBUF_TYPE_FLOATING) - sum++; - } - } - RF_UNLOCK_MUTEX(pssTable[i].mutex); - } - - for (rbuf = raidPtr->reconControl[frow]->floatingRbufs; rbuf; rbuf = rbuf->next) { - if (rbuf->type == RF_RBUF_TYPE_FLOATING) - sum++; - } - for (rbuf = raidPtr->reconControl[frow]->committedRbufs; rbuf; rbuf = rbuf->next) { - if (rbuf->type == RF_RBUF_TYPE_FLOATING) - sum++; - } - for (rbuf = raidPtr->reconControl[frow]->fullBufferList; rbuf; rbuf = rbuf->next) { - if (rbuf->type == RF_RBUF_TYPE_FLOATING) - sum++; - } - for (rbuf = raidPtr->reconControl[frow]->priorityList; rbuf; rbuf = rbuf->next) { - if (rbuf->type == RF_RBUF_TYPE_FLOATING) - sum++; - } - - RF_ASSERT(sum == raidPtr->numFloatingReconBufs); - - if (dolock) - RF_UNLOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex); -} diff --git a/sys/dev/raidframe/rf_reconutil.h b/sys/dev/raidframe/rf_reconutil.h deleted file mode 100644 index 744d7b9..0000000 --- a/sys/dev/raidframe/rf_reconutil.h +++ /dev/null @@ -1,52 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_reconutil.h,v 1.3 1999/02/05 00:06:17 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/************************************************************ - * rf_reconutil.h -- header file for reconstruction utilities - ************************************************************/ - -#ifndef _RF__RF_RECONUTIL_H_ -#define _RF__RF_RECONUTIL_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_reconstruct.h> - -RF_ReconCtrl_t * -rf_MakeReconControl(RF_RaidReconDesc_t * reconDesc, - RF_RowCol_t frow, RF_RowCol_t fcol, RF_RowCol_t srow, RF_RowCol_t scol); -void rf_FreeReconControl(RF_Raid_t * raidPtr, RF_RowCol_t row); -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimit(RF_Raid_t * raidPtr); -int rf_GetDefaultNumFloatingReconBuffers(RF_Raid_t * raidPtr); -RF_ReconBuffer_t * -rf_MakeReconBuffer(RF_Raid_t * raidPtr, RF_RowCol_t row, - RF_RowCol_t col, RF_RbufType_t type); -void rf_FreeReconBuffer(RF_ReconBuffer_t * rbuf); -void rf_CheckFloatingRbufCount(RF_Raid_t * raidPtr, int dolock); - -#endif /* !_RF__RF_RECONUTIL_H_ */ diff --git a/sys/dev/raidframe/rf_revent.c b/sys/dev/raidframe/rf_revent.c deleted file mode 100644 index fcdf82e..0000000 --- a/sys/dev/raidframe/rf_revent.c +++ /dev/null @@ -1,230 +0,0 @@ -/* $NetBSD: rf_revent.c,v 1.9 2000/09/21 01:45:46 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ -/* - * revent.c -- reconstruction event handling code - */ - -#include <sys/errno.h> - -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_revent.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_freelist.h> -#include <dev/raidframe/rf_desc.h> -#include <dev/raidframe/rf_shutdown.h> -#include <dev/raidframe/rf_kintf.h> - -static RF_FreeList_t *rf_revent_freelist; -#define RF_MAX_FREE_REVENT 128 -#define RF_REVENT_INC 8 -#define RF_REVENT_INITIAL 8 - - - -#include <sys/proc.h> -#include <sys/kernel.h> - -#define DO_WAIT(_rc) \ - RF_LTSLEEP(&(_rc)->eventQueue, PRIBIO, "raidframe eventq", \ - 0, &((_rc)->eq_mutex)) - -#define DO_SIGNAL(_rc) wakeup(&(_rc)->eventQueue) - - -static void rf_ShutdownReconEvent(void *); - -static RF_ReconEvent_t * -GetReconEventDesc(RF_RowCol_t row, RF_RowCol_t col, - void *arg, RF_Revent_t type); - -static void rf_ShutdownReconEvent(ignored) - void *ignored; -{ - RF_FREELIST_DESTROY(rf_revent_freelist, next, (RF_ReconEvent_t *)); -} - -int -rf_ConfigureReconEvent(listp) - RF_ShutdownList_t **listp; -{ - int rc; - - RF_FREELIST_CREATE(rf_revent_freelist, RF_MAX_FREE_REVENT, - RF_REVENT_INC, sizeof(RF_ReconEvent_t)); - if (rf_revent_freelist == NULL) - return (ENOMEM); - rc = rf_ShutdownCreate(listp, rf_ShutdownReconEvent, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownReconEvent(NULL); - return (rc); - } - RF_FREELIST_PRIME(rf_revent_freelist, RF_REVENT_INITIAL, next, - (RF_ReconEvent_t *)); - return (0); -} - -/* returns the next reconstruction event, blocking the calling thread - * until one becomes available. will now return null if it is blocked - * or will return an event if it is not */ - -RF_ReconEvent_t * -rf_GetNextReconEvent(reconDesc, row, continueFunc, continueArg) - RF_RaidReconDesc_t *reconDesc; - RF_RowCol_t row; - void (*continueFunc) (void *); - void *continueArg; -{ - RF_Raid_t *raidPtr = reconDesc->raidPtr; - RF_ReconCtrl_t *rctrl = raidPtr->reconControl[row]; - RF_ReconEvent_t *event; - - RF_ASSERT(row >= 0 && row <= raidPtr->numRow); - RF_LOCK_MUTEX(rctrl->eq_mutex); - /* q null and count==0 must be equivalent conditions */ - RF_ASSERT((rctrl->eventQueue == NULL) == (rctrl->eq_count == 0)); - - rctrl->continueFunc = continueFunc; - rctrl->continueArg = continueArg; - - - /* mpsleep timeout value: secs = timo_val/hz. 'ticks' here is - defined as cycle-counter ticks, not softclock ticks */ - -#define MAX_RECON_EXEC_USECS (100 * 1000) /* 100 ms */ -#define RECON_DELAY_MS 25 -#define RECON_TIMO ((RECON_DELAY_MS * hz) / 1000) - - /* we are not pre-emptible in the kernel, but we don't want to run - * forever. If we run w/o blocking for more than MAX_RECON_EXEC_TICKS - * ticks of the cycle counter, delay for RECON_DELAY before - * continuing. this may murder us with context switches, so we may - * need to increase both the MAX...TICKS and the RECON_DELAY_MS. */ - if (reconDesc->reconExecTimerRunning) { - int status; - - RF_ETIMER_STOP(reconDesc->recon_exec_timer); - RF_ETIMER_EVAL(reconDesc->recon_exec_timer); - reconDesc->reconExecTicks += - RF_ETIMER_VAL_US(reconDesc->recon_exec_timer); - if (reconDesc->reconExecTicks > reconDesc->maxReconExecTicks) - reconDesc->maxReconExecTicks = - reconDesc->reconExecTicks; - if (reconDesc->reconExecTicks >= MAX_RECON_EXEC_USECS) { - /* we've been running too long. delay for - * RECON_DELAY_MS */ -#if RF_RECON_STATS > 0 - reconDesc->numReconExecDelays++; -#endif /* RF_RECON_STATS > 0 */ - - status = RF_LTSLEEP(&reconDesc->reconExecTicks, PRIBIO, - "recon delay", RECON_TIMO, - &rctrl->eq_mutex); - RF_ASSERT(status == EWOULDBLOCK); - reconDesc->reconExecTicks = 0; - } - } - while (!rctrl->eventQueue) { -#if RF_RECON_STATS > 0 - reconDesc->numReconEventWaits++; -#endif /* RF_RECON_STATS > 0 */ - DO_WAIT(rctrl); - reconDesc->reconExecTicks = 0; /* we've just waited */ - } - - reconDesc->reconExecTimerRunning = 1; - if (RF_ETIMER_VAL_US(reconDesc->recon_exec_timer)!=0) { - /* it moved!! reset the timer. */ - RF_ETIMER_START(reconDesc->recon_exec_timer); - } - event = rctrl->eventQueue; - rctrl->eventQueue = event->next; - event->next = NULL; - rctrl->eq_count--; - - /* q null and count==0 must be equivalent conditions */ - RF_ASSERT((rctrl->eventQueue == NULL) == (rctrl->eq_count == 0)); - RF_UNLOCK_MUTEX(rctrl->eq_mutex); - return (event); -} -/* enqueues a reconstruction event on the indicated queue */ -void -rf_CauseReconEvent(raidPtr, row, col, arg, type) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_RowCol_t col; - void *arg; - RF_Revent_t type; -{ - RF_ReconCtrl_t *rctrl = raidPtr->reconControl[row]; - RF_ReconEvent_t *event = GetReconEventDesc(row, col, arg, type); - - if (type == RF_REVENT_BUFCLEAR) { - RF_ASSERT(col != rctrl->fcol); - } - RF_ASSERT(row >= 0 && row <= raidPtr->numRow && col >= 0 && col <= raidPtr->numCol); - RF_LOCK_MUTEX(rctrl->eq_mutex); - /* q null and count==0 must be equivalent conditions */ - RF_ASSERT((rctrl->eventQueue == NULL) == (rctrl->eq_count == 0)); - event->next = rctrl->eventQueue; - rctrl->eventQueue = event; - rctrl->eq_count++; - RF_UNLOCK_MUTEX(rctrl->eq_mutex); - - DO_SIGNAL(rctrl); -} -/* allocates and initializes a recon event descriptor */ -static RF_ReconEvent_t * -GetReconEventDesc(row, col, arg, type) - RF_RowCol_t row; - RF_RowCol_t col; - void *arg; - RF_Revent_t type; -{ - RF_ReconEvent_t *t; - - RF_FREELIST_GET(rf_revent_freelist, t, next, (RF_ReconEvent_t *)); - if (t == NULL) - return (NULL); - t->col = col; - t->arg = arg; - t->type = type; - return (t); -} - -void -rf_FreeReconEventDesc(event) - RF_ReconEvent_t *event; -{ - RF_FREELIST_FREE(rf_revent_freelist, event, next); -} diff --git a/sys/dev/raidframe/rf_revent.h b/sys/dev/raidframe/rf_revent.h deleted file mode 100644 index 51c3202..0000000 --- a/sys/dev/raidframe/rf_revent.h +++ /dev/null @@ -1,52 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_revent.h,v 1.3 1999/02/05 00:06:17 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/******************************************************************* - * - * rf_revent.h -- header file for reconstruction event handling code - * - *******************************************************************/ - -#ifndef _RF__RF_REVENT_H_ -#define _RF__RF_REVENT_H_ - -#include <dev/raidframe/rf_types.h> - -int rf_ConfigureReconEvent(RF_ShutdownList_t ** listp); - -RF_ReconEvent_t * -rf_GetNextReconEvent(RF_RaidReconDesc_t * reconDesc, - RF_RowCol_t row, void (*continueFunc) (void *), void *continueArg); - - void rf_CauseReconEvent(RF_Raid_t * raidPtr, RF_RowCol_t row, RF_RowCol_t col, - void *arg, RF_Revent_t type); - - void rf_FreeReconEventDesc(RF_ReconEvent_t * event); - -#endif /* !_RF__RF_REVENT_H_ */ diff --git a/sys/dev/raidframe/rf_shutdown.c b/sys/dev/raidframe/rf_shutdown.c deleted file mode 100644 index e6b5292..0000000 --- a/sys/dev/raidframe/rf_shutdown.c +++ /dev/null @@ -1,104 +0,0 @@ -/* $NetBSD: rf_shutdown.c,v 1.6 2000/01/13 23:41:18 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * rf_shutdown.c - */ -/* - * Copyright (c) 1996 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ -/* - * Maintain lists of cleanup functions. Also, mechanisms for coordinating - * thread startup and shutdown. - */ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_shutdown.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_freelist.h> - -static void -rf_FreeShutdownEnt(RF_ShutdownList_t * ent) -{ - FREE(ent, M_RAIDFRAME); -} - -int -_rf_ShutdownCreate( - RF_ShutdownList_t ** listp, - void (*cleanup) (void *arg), - void *arg, - char *file, - int line) -{ - RF_ShutdownList_t *ent; - - /* - * Have to directly allocate memory here, since we start up before - * and shutdown after RAIDframe internal allocation system. - */ - /* ent = (RF_ShutdownList_t *) malloc(sizeof(RF_ShutdownList_t), - M_RAIDFRAME, M_WAITOK); */ - ent = (RF_ShutdownList_t *) malloc(sizeof(RF_ShutdownList_t), - M_RAIDFRAME, M_NOWAIT); - if (ent == NULL) - return (ENOMEM); - ent->cleanup = cleanup; - ent->arg = arg; - ent->file = file; - ent->line = line; - ent->next = *listp; - *listp = ent; - return (0); -} - -int -rf_ShutdownList(RF_ShutdownList_t ** list) -{ - RF_ShutdownList_t *r, *next; - char *file; - int line; - - for (r = *list; r; r = next) { - next = r->next; - file = r->file; - line = r->line; - - if (rf_shutdownDebug) { - printf("call shutdown, created %s:%d\n", file, line); - } - r->cleanup(r->arg); - - if (rf_shutdownDebug) { - printf("completed shutdown, created %s:%d\n", file, line); - } - rf_FreeShutdownEnt(r); - } - *list = NULL; - return (0); -} diff --git a/sys/dev/raidframe/rf_shutdown.h b/sys/dev/raidframe/rf_shutdown.h deleted file mode 100644 index 5abc5ba..0000000 --- a/sys/dev/raidframe/rf_shutdown.h +++ /dev/null @@ -1,67 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_shutdown.h,v 1.2 1999/02/05 00:06:17 oster Exp $ */ -/* - * rf_shutdown.h - */ -/* - * Copyright (c) 1996 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ -/* - * Maintain lists of cleanup functions. Also, mechanisms for coordinating - * thread startup and shutdown. - */ - -#ifndef _RF__RF_SHUTDOWN_H_ -#define _RF__RF_SHUTDOWN_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> - -/* - * Important note: the shutdown list is run like a stack, new - * entries pushed on top. Therefore, the most recently added - * entry (last started) is the first removed (stopped). This - * should handle system-dependencies pretty nicely- if a system - * is there when you start another, it'll be there when you - * shut down another. Hopefully, this subsystem will remove - * more complexity than it introduces. - */ - -struct RF_ShutdownList_s { - void (*cleanup) (void *arg); - void *arg; - char *file; - int line; - RF_ShutdownList_t *next; -}; -#define rf_ShutdownCreate(_listp_,_func_,_arg_) \ - _rf_ShutdownCreate(_listp_,_func_,_arg_,__FILE__,__LINE__) - -int _rf_ShutdownCreate(RF_ShutdownList_t ** listp, void (*cleanup) (void *arg), - void *arg, char *file, int line); -int rf_ShutdownList(RF_ShutdownList_t ** listp); - -#endif /* !_RF__RF_SHUTDOWN_H_ */ diff --git a/sys/dev/raidframe/rf_sstf.c b/sys/dev/raidframe/rf_sstf.c deleted file mode 100644 index cd9ea56..0000000 --- a/sys/dev/raidframe/rf_sstf.c +++ /dev/null @@ -1,658 +0,0 @@ -/* $NetBSD: rf_sstf.c,v 1.6 2001/01/27 20:18:55 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/******************************************************************************* - * - * sstf.c -- prioritized shortest seek time first disk queueing code - * - ******************************************************************************/ - -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_stripelocks.h> -#include <dev/raidframe/rf_layout.h> -#include <dev/raidframe/rf_diskqueue.h> -#include <dev/raidframe/rf_sstf.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_options.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_types.h> - -#define DIR_LEFT 1 -#define DIR_RIGHT 2 -#define DIR_EITHER 3 - -#define SNUM_DIFF(_a_,_b_) (((_a_)>(_b_))?((_a_)-(_b_)):((_b_)-(_a_))) - -#define QSUM(_sstfq_) (((_sstfq_)->lopri.qlen)+((_sstfq_)->left.qlen)+((_sstfq_)->right.qlen)) - - -static void -do_sstf_ord_q(RF_DiskQueueData_t **, - RF_DiskQueueData_t **, - RF_DiskQueueData_t *); - -static RF_DiskQueueData_t * -closest_to_arm(RF_SstfQ_t *, - RF_SectorNum_t, - int *, - int); -static void do_dequeue(RF_SstfQ_t *, RF_DiskQueueData_t *); - - -static void -do_sstf_ord_q(queuep, tailp, req) - RF_DiskQueueData_t **queuep; - RF_DiskQueueData_t **tailp; - RF_DiskQueueData_t *req; -{ - RF_DiskQueueData_t *r, *s; - - if (*queuep == NULL) { - *queuep = req; - *tailp = req; - req->next = NULL; - req->prev = NULL; - return; - } - if (req->sectorOffset <= (*queuep)->sectorOffset) { - req->next = *queuep; - req->prev = NULL; - (*queuep)->prev = req; - *queuep = req; - return; - } - if (req->sectorOffset > (*tailp)->sectorOffset) { - /* optimization */ - r = NULL; - s = *tailp; - goto q_at_end; - } - for (s = NULL, r = *queuep; r; s = r, r = r->next) { - if (r->sectorOffset >= req->sectorOffset) { - /* insert after s, before r */ - RF_ASSERT(s); - req->next = r; - r->prev = req; - s->next = req; - req->prev = s; - return; - } - } -q_at_end: - /* insert after s, at end of queue */ - RF_ASSERT(r == NULL); - RF_ASSERT(s); - RF_ASSERT(s == (*tailp)); - req->next = NULL; - req->prev = s; - s->next = req; - *tailp = req; -} -/* for removing from head-of-queue */ -#define DO_HEAD_DEQ(_r_,_q_) { \ - _r_ = (_q_)->queue; \ - RF_ASSERT((_r_) != NULL); \ - (_q_)->queue = (_r_)->next; \ - (_q_)->qlen--; \ - if ((_q_)->qlen == 0) { \ - RF_ASSERT((_r_) == (_q_)->qtail); \ - RF_ASSERT((_q_)->queue == NULL); \ - (_q_)->qtail = NULL; \ - } \ - else { \ - RF_ASSERT((_q_)->queue->prev == (_r_)); \ - (_q_)->queue->prev = NULL; \ - } \ -} - -/* for removing from end-of-queue */ -#define DO_TAIL_DEQ(_r_,_q_) { \ - _r_ = (_q_)->qtail; \ - RF_ASSERT((_r_) != NULL); \ - (_q_)->qtail = (_r_)->prev; \ - (_q_)->qlen--; \ - if ((_q_)->qlen == 0) { \ - RF_ASSERT((_r_) == (_q_)->queue); \ - RF_ASSERT((_q_)->qtail == NULL); \ - (_q_)->queue = NULL; \ - } \ - else { \ - RF_ASSERT((_q_)->qtail->next == (_r_)); \ - (_q_)->qtail->next = NULL; \ - } \ -} - -#define DO_BEST_DEQ(_l_,_r_,_q_) { \ - if (SNUM_DIFF((_q_)->queue->sectorOffset,_l_) \ - < SNUM_DIFF((_q_)->qtail->sectorOffset,_l_)) \ - { \ - DO_HEAD_DEQ(_r_,_q_); \ - } \ - else { \ - DO_TAIL_DEQ(_r_,_q_); \ - } \ -} - -static RF_DiskQueueData_t * -closest_to_arm(queue, arm_pos, dir, allow_reverse) - RF_SstfQ_t *queue; - RF_SectorNum_t arm_pos; - int *dir; - int allow_reverse; -{ - RF_SectorNum_t best_pos_l = 0, this_pos_l = 0, last_pos = 0; - RF_SectorNum_t best_pos_r = 0, this_pos_r = 0; - RF_DiskQueueData_t *r, *best_l, *best_r; - - best_r = best_l = NULL; - for (r = queue->queue; r; r = r->next) { - if (r->sectorOffset < arm_pos) { - if (best_l == NULL) { - best_l = r; - last_pos = best_pos_l = this_pos_l; - } else { - this_pos_l = arm_pos - r->sectorOffset; - if (this_pos_l < best_pos_l) { - best_l = r; - last_pos = best_pos_l = this_pos_l; - } else { - last_pos = this_pos_l; - } - } - } else { - if (best_r == NULL) { - best_r = r; - last_pos = best_pos_r = this_pos_r; - } else { - this_pos_r = r->sectorOffset - arm_pos; - if (this_pos_r < best_pos_r) { - best_r = r; - last_pos = best_pos_r = this_pos_r; - } else { - last_pos = this_pos_r; - } - if (this_pos_r > last_pos) { - /* getting farther away */ - break; - } - } - } - } - if ((best_r == NULL) && (best_l == NULL)) - return (NULL); - if ((*dir == DIR_RIGHT) && best_r) - return (best_r); - if ((*dir == DIR_LEFT) && best_l) - return (best_l); - if (*dir == DIR_EITHER) { - if (best_l == NULL) - return (best_r); - if (best_r == NULL) - return (best_l); - if (best_pos_r < best_pos_l) - return (best_r); - else - return (best_l); - } - /* - * Nothing in the direction we want to go. Reverse or - * reset the arm. We know we have an I/O in the other - * direction. - */ - if (allow_reverse) { - if (*dir == DIR_RIGHT) { - *dir = DIR_LEFT; - return (best_l); - } else { - *dir = DIR_RIGHT; - return (best_r); - } - } - /* - * Reset (beginning of queue). - */ - RF_ASSERT(*dir == DIR_RIGHT); - return (queue->queue); -} - -void * -rf_SstfCreate(sect_per_disk, cl_list, listp) - RF_SectorCount_t sect_per_disk; - RF_AllocListElem_t *cl_list; - RF_ShutdownList_t **listp; -{ - RF_Sstf_t *sstfq; - - RF_CallocAndAdd(sstfq, 1, sizeof(RF_Sstf_t), (RF_Sstf_t *), cl_list); - sstfq->dir = DIR_EITHER; - sstfq->allow_reverse = 1; - return ((void *) sstfq); -} - -void * -rf_ScanCreate(sect_per_disk, cl_list, listp) - RF_SectorCount_t sect_per_disk; - RF_AllocListElem_t *cl_list; - RF_ShutdownList_t **listp; -{ - RF_Sstf_t *scanq; - - RF_CallocAndAdd(scanq, 1, sizeof(RF_Sstf_t), (RF_Sstf_t *), cl_list); - scanq->dir = DIR_RIGHT; - scanq->allow_reverse = 1; - return ((void *) scanq); -} - -void * -rf_CscanCreate(sect_per_disk, cl_list, listp) - RF_SectorCount_t sect_per_disk; - RF_AllocListElem_t *cl_list; - RF_ShutdownList_t **listp; -{ - RF_Sstf_t *cscanq; - - RF_CallocAndAdd(cscanq, 1, sizeof(RF_Sstf_t), (RF_Sstf_t *), cl_list); - cscanq->dir = DIR_RIGHT; - return ((void *) cscanq); -} - -void -rf_SstfEnqueue(qptr, req, priority) - void *qptr; - RF_DiskQueueData_t *req; - int priority; -{ - RF_Sstf_t *sstfq; - - sstfq = (RF_Sstf_t *) qptr; - - if (priority == RF_IO_LOW_PRIORITY) { - if (rf_sstfDebug || rf_scanDebug || rf_cscanDebug) { - RF_DiskQueue_t *dq; - dq = (RF_DiskQueue_t *) req->queue; - printf("raid%d: ENQ lopri %d,%d queues are %d,%d,%d\n", - req->raidPtr->raidid, - dq->row, dq->col, - sstfq->left.qlen, sstfq->right.qlen, - sstfq->lopri.qlen); - } - do_sstf_ord_q(&sstfq->lopri.queue, &sstfq->lopri.qtail, req); - sstfq->lopri.qlen++; - } else { - if (req->sectorOffset < sstfq->last_sector) { - do_sstf_ord_q(&sstfq->left.queue, &sstfq->left.qtail, req); - sstfq->left.qlen++; - } else { - do_sstf_ord_q(&sstfq->right.queue, &sstfq->right.qtail, req); - sstfq->right.qlen++; - } - } -} - -static void -do_dequeue(queue, req) - RF_SstfQ_t *queue; - RF_DiskQueueData_t *req; -{ - RF_DiskQueueData_t *req2; - - if (rf_sstfDebug || rf_scanDebug || rf_cscanDebug) { - printf("raid%d: do_dequeue\n", req->raidPtr->raidid); - } - if (req == queue->queue) { - DO_HEAD_DEQ(req2, queue); - RF_ASSERT(req2 == req); - } else - if (req == queue->qtail) { - DO_TAIL_DEQ(req2, queue); - RF_ASSERT(req2 == req); - } else { - /* dequeue from middle of list */ - RF_ASSERT(req->next); - RF_ASSERT(req->prev); - queue->qlen--; - req->next->prev = req->prev; - req->prev->next = req->next; - req->next = req->prev = NULL; - } -} - -RF_DiskQueueData_t * -rf_SstfDequeue(qptr) - void *qptr; -{ - RF_DiskQueueData_t *req = NULL; - RF_Sstf_t *sstfq; - - sstfq = (RF_Sstf_t *) qptr; - - if (rf_sstfDebug) { - RF_DiskQueue_t *dq; - dq = (RF_DiskQueue_t *) req->queue; - RF_ASSERT(QSUM(sstfq) == dq->queueLength); - printf("raid%d: sstf: Dequeue %d,%d queues are %d,%d,%d\n", - req->raidPtr->raidid, dq->row, dq->col, - sstfq->left.qlen, sstfq->right.qlen, sstfq->lopri.qlen); - } - if (sstfq->left.queue == NULL) { - RF_ASSERT(sstfq->left.qlen == 0); - if (sstfq->right.queue == NULL) { - RF_ASSERT(sstfq->right.qlen == 0); - if (sstfq->lopri.queue == NULL) { - RF_ASSERT(sstfq->lopri.qlen == 0); - return (NULL); - } - if (rf_sstfDebug) { - printf("raid%d: sstf: check for close lopri", - req->raidPtr->raidid); - } - req = closest_to_arm(&sstfq->lopri, sstfq->last_sector, - &sstfq->dir, sstfq->allow_reverse); - if (rf_sstfDebug) { - printf("raid%d: sstf: closest_to_arm said %lx", - req->raidPtr->raidid, (long) req); - } - if (req == NULL) - return (NULL); - do_dequeue(&sstfq->lopri, req); - } else { - DO_BEST_DEQ(sstfq->last_sector, req, &sstfq->right); - } - } else { - if (sstfq->right.queue == NULL) { - RF_ASSERT(sstfq->right.qlen == 0); - DO_BEST_DEQ(sstfq->last_sector, req, &sstfq->left); - } else { - if (SNUM_DIFF(sstfq->last_sector, sstfq->right.queue->sectorOffset) - < SNUM_DIFF(sstfq->last_sector, sstfq->left.qtail->sectorOffset)) { - DO_HEAD_DEQ(req, &sstfq->right); - } else { - DO_TAIL_DEQ(req, &sstfq->left); - } - } - } - RF_ASSERT(req); - sstfq->last_sector = req->sectorOffset; - return (req); -} - -RF_DiskQueueData_t * -rf_ScanDequeue(qptr) - void *qptr; -{ - RF_DiskQueueData_t *req = NULL; - RF_Sstf_t *scanq; - - scanq = (RF_Sstf_t *) qptr; - - if (rf_scanDebug) { - RF_DiskQueue_t *dq; - dq = (RF_DiskQueue_t *) req->queue; - RF_ASSERT(QSUM(scanq) == dq->queueLength); - printf("raid%d: scan: Dequeue %d,%d queues are %d,%d,%d\n", - req->raidPtr->raidid, dq->row, dq->col, - scanq->left.qlen, scanq->right.qlen, scanq->lopri.qlen); - } - if (scanq->left.queue == NULL) { - RF_ASSERT(scanq->left.qlen == 0); - if (scanq->right.queue == NULL) { - RF_ASSERT(scanq->right.qlen == 0); - if (scanq->lopri.queue == NULL) { - RF_ASSERT(scanq->lopri.qlen == 0); - return (NULL); - } - req = closest_to_arm(&scanq->lopri, scanq->last_sector, - &scanq->dir, scanq->allow_reverse); - if (req == NULL) - return (NULL); - do_dequeue(&scanq->lopri, req); - } else { - scanq->dir = DIR_RIGHT; - DO_HEAD_DEQ(req, &scanq->right); - } - } else - if (scanq->right.queue == NULL) { - RF_ASSERT(scanq->right.qlen == 0); - RF_ASSERT(scanq->left.queue); - scanq->dir = DIR_LEFT; - DO_TAIL_DEQ(req, &scanq->left); - } else { - RF_ASSERT(scanq->right.queue); - RF_ASSERT(scanq->left.queue); - if (scanq->dir == DIR_RIGHT) { - DO_HEAD_DEQ(req, &scanq->right); - } else { - DO_TAIL_DEQ(req, &scanq->left); - } - } - RF_ASSERT(req); - scanq->last_sector = req->sectorOffset; - return (req); -} - -RF_DiskQueueData_t * -rf_CscanDequeue(qptr) - void *qptr; -{ - RF_DiskQueueData_t *req = NULL; - RF_Sstf_t *cscanq; - - cscanq = (RF_Sstf_t *) qptr; - - RF_ASSERT(cscanq->dir == DIR_RIGHT); - if (rf_cscanDebug) { - RF_DiskQueue_t *dq; - dq = (RF_DiskQueue_t *) req->queue; - RF_ASSERT(QSUM(cscanq) == dq->queueLength); - printf("raid%d: scan: Dequeue %d,%d queues are %d,%d,%d\n", - req->raidPtr->raidid, dq->row, dq->col, - cscanq->left.qlen, cscanq->right.qlen, - cscanq->lopri.qlen); - } - if (cscanq->right.queue) { - DO_HEAD_DEQ(req, &cscanq->right); - } else { - RF_ASSERT(cscanq->right.qlen == 0); - if (cscanq->left.queue == NULL) { - RF_ASSERT(cscanq->left.qlen == 0); - if (cscanq->lopri.queue == NULL) { - RF_ASSERT(cscanq->lopri.qlen == 0); - return (NULL); - } - req = closest_to_arm(&cscanq->lopri, cscanq->last_sector, - &cscanq->dir, cscanq->allow_reverse); - if (req == NULL) - return (NULL); - do_dequeue(&cscanq->lopri, req); - } else { - /* - * There's I/Os to the left of the arm. Swing - * on back (swap queues). - */ - cscanq->right = cscanq->left; - cscanq->left.qlen = 0; - cscanq->left.queue = cscanq->left.qtail = NULL; - DO_HEAD_DEQ(req, &cscanq->right); - } - } - RF_ASSERT(req); - cscanq->last_sector = req->sectorOffset; - return (req); -} - -RF_DiskQueueData_t * -rf_SstfPeek(qptr) - void *qptr; -{ - RF_DiskQueueData_t *req; - RF_Sstf_t *sstfq; - - sstfq = (RF_Sstf_t *) qptr; - - if ((sstfq->left.queue == NULL) && (sstfq->right.queue == NULL)) { - req = closest_to_arm(&sstfq->lopri, sstfq->last_sector, &sstfq->dir, - sstfq->allow_reverse); - } else { - if (sstfq->left.queue == NULL) - req = sstfq->right.queue; - else { - if (sstfq->right.queue == NULL) - req = sstfq->left.queue; - else { - if (SNUM_DIFF(sstfq->last_sector, sstfq->right.queue->sectorOffset) - < SNUM_DIFF(sstfq->last_sector, sstfq->left.qtail->sectorOffset)) { - req = sstfq->right.queue; - } else { - req = sstfq->left.qtail; - } - } - } - } - if (req == NULL) { - RF_ASSERT(QSUM(sstfq) == 0); - } - return (req); -} - -RF_DiskQueueData_t * -rf_ScanPeek(qptr) - void *qptr; -{ - RF_DiskQueueData_t *req; - RF_Sstf_t *scanq; - int dir; - - scanq = (RF_Sstf_t *) qptr; - dir = scanq->dir; - - if (scanq->left.queue == NULL) { - RF_ASSERT(scanq->left.qlen == 0); - if (scanq->right.queue == NULL) { - RF_ASSERT(scanq->right.qlen == 0); - if (scanq->lopri.queue == NULL) { - RF_ASSERT(scanq->lopri.qlen == 0); - return (NULL); - } - req = closest_to_arm(&scanq->lopri, scanq->last_sector, - &dir, scanq->allow_reverse); - } else { - req = scanq->right.queue; - } - } else - if (scanq->right.queue == NULL) { - RF_ASSERT(scanq->right.qlen == 0); - RF_ASSERT(scanq->left.queue); - req = scanq->left.qtail; - } else { - RF_ASSERT(scanq->right.queue); - RF_ASSERT(scanq->left.queue); - if (scanq->dir == DIR_RIGHT) { - req = scanq->right.queue; - } else { - req = scanq->left.qtail; - } - } - if (req == NULL) { - RF_ASSERT(QSUM(scanq) == 0); - } - return (req); -} - -RF_DiskQueueData_t * -rf_CscanPeek(qptr) - void *qptr; -{ - RF_DiskQueueData_t *req; - RF_Sstf_t *cscanq; - - cscanq = (RF_Sstf_t *) qptr; - - RF_ASSERT(cscanq->dir == DIR_RIGHT); - if (cscanq->right.queue) { - req = cscanq->right.queue; - } else { - RF_ASSERT(cscanq->right.qlen == 0); - if (cscanq->left.queue == NULL) { - RF_ASSERT(cscanq->left.qlen == 0); - if (cscanq->lopri.queue == NULL) { - RF_ASSERT(cscanq->lopri.qlen == 0); - return (NULL); - } - req = closest_to_arm(&cscanq->lopri, cscanq->last_sector, - &cscanq->dir, cscanq->allow_reverse); - } else { - /* - * There's I/Os to the left of the arm. We'll end - * up swinging on back. - */ - req = cscanq->left.queue; - } - } - if (req == NULL) { - RF_ASSERT(QSUM(cscanq) == 0); - } - return (req); -} - -int -rf_SstfPromote(qptr, parityStripeID, which_ru) - void *qptr; - RF_StripeNum_t parityStripeID; - RF_ReconUnitNum_t which_ru; -{ - RF_DiskQueueData_t *r, *next; - RF_Sstf_t *sstfq; - int n; - - sstfq = (RF_Sstf_t *) qptr; - - n = 0; - for (r = sstfq->lopri.queue; r; r = next) { - next = r->next; - if (rf_sstfDebug || rf_scanDebug || rf_cscanDebug) { - printf("raid%d: check promote %lx\n", - r->raidPtr->raidid, (long) r); - } - if ((r->parityStripeID == parityStripeID) - && (r->which_ru == which_ru)) { - do_dequeue(&sstfq->lopri, r); - rf_SstfEnqueue(qptr, r, RF_IO_NORMAL_PRIORITY); - n++; - } - } - if (rf_sstfDebug || rf_scanDebug || rf_cscanDebug) { - printf("raid%d: promoted %d matching I/Os queues are %d,%d,%d\n", - r->raidPtr->raidid, n, sstfq->left.qlen, - sstfq->right.qlen, sstfq->lopri.qlen); - } - return (n); -} diff --git a/sys/dev/raidframe/rf_sstf.h b/sys/dev/raidframe/rf_sstf.h deleted file mode 100644 index 2fc1c0d..0000000 --- a/sys/dev/raidframe/rf_sstf.h +++ /dev/null @@ -1,70 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_sstf.h,v 1.3 1999/02/05 00:06:17 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_SSTF_H_ -#define _RF__RF_SSTF_H_ - -#include <dev/raidframe/rf_diskqueue.h> - -typedef struct RF_SstfQ_s { - RF_DiskQueueData_t *queue; - RF_DiskQueueData_t *qtail; - int qlen; -} RF_SstfQ_t; - -typedef struct RF_Sstf_s { - RF_SstfQ_t left; - RF_SstfQ_t right; - RF_SstfQ_t lopri; - RF_SectorNum_t last_sector; - int dir; - int allow_reverse; -} RF_Sstf_t; - -void * -rf_SstfCreate(RF_SectorCount_t sect_per_disk, - RF_AllocListElem_t * cl_list, RF_ShutdownList_t ** listp); -void * -rf_ScanCreate(RF_SectorCount_t sect_per_disk, - RF_AllocListElem_t * cl_list, RF_ShutdownList_t ** listp); -void * -rf_CscanCreate(RF_SectorCount_t sect_per_disk, - RF_AllocListElem_t * cl_list, RF_ShutdownList_t ** listp); -void rf_SstfEnqueue(void *qptr, RF_DiskQueueData_t * req, int priority); -RF_DiskQueueData_t *rf_SstfDequeue(void *qptr); -RF_DiskQueueData_t *rf_SstfPeek(void *qptr); -int -rf_SstfPromote(void *qptr, RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru); -RF_DiskQueueData_t *rf_ScanDequeue(void *qptr); -RF_DiskQueueData_t *rf_ScanPeek(void *qptr); -RF_DiskQueueData_t *rf_CscanDequeue(void *qptr); -RF_DiskQueueData_t *rf_CscanPeek(void *qptr); - -#endif /* !_RF__RF_SSTF_H_ */ diff --git a/sys/dev/raidframe/rf_states.c b/sys/dev/raidframe/rf_states.c deleted file mode 100644 index bc686ec..0000000 --- a/sys/dev/raidframe/rf_states.c +++ /dev/null @@ -1,669 +0,0 @@ -/* $NetBSD: rf_states.c,v 1.15 2000/10/20 02:24:45 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, William V. Courtright II, Robby Findler - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#include <sys/errno.h> - -#include <dev/raidframe/rf_archs.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_dag.h> -#include <dev/raidframe/rf_desc.h> -#include <dev/raidframe/rf_aselect.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_states.h> -#include <dev/raidframe/rf_dagutils.h> -#include <dev/raidframe/rf_driver.h> -#include <dev/raidframe/rf_engine.h> -#include <dev/raidframe/rf_map.h> -#include <dev/raidframe/rf_etimer.h> -#include <dev/raidframe/rf_kintf.h> - -/* prototypes for some of the available states. - - States must: - - - not block. - - - either schedule rf_ContinueRaidAccess as a callback and return - RF_TRUE, or complete all of their work and return RF_FALSE. - - - increment desc->state when they have finished their work. -*/ - -static char * -StateName(RF_AccessState_t state) -{ - switch (state) { - case rf_QuiesceState:return "QuiesceState"; - case rf_MapState: - return "MapState"; - case rf_LockState: - return "LockState"; - case rf_CreateDAGState: - return "CreateDAGState"; - case rf_ExecuteDAGState: - return "ExecuteDAGState"; - case rf_ProcessDAGState: - return "ProcessDAGState"; - case rf_CleanupState: - return "CleanupState"; - case rf_LastState: - return "LastState"; - case rf_IncrAccessesCountState: - return "IncrAccessesCountState"; - case rf_DecrAccessesCountState: - return "DecrAccessesCountState"; - default: - return "!!! UnnamedState !!!"; - } -} - -void -rf_ContinueRaidAccess(RF_RaidAccessDesc_t * desc) -{ - int suspended = RF_FALSE; - int current_state_index = desc->state; - RF_AccessState_t current_state = desc->states[current_state_index]; - int unit = desc->raidPtr->raidid; - - do { - - current_state_index = desc->state; - current_state = desc->states[current_state_index]; - - switch (current_state) { - - case rf_QuiesceState: - suspended = rf_State_Quiesce(desc); - break; - case rf_IncrAccessesCountState: - suspended = rf_State_IncrAccessCount(desc); - break; - case rf_MapState: - suspended = rf_State_Map(desc); - break; - case rf_LockState: - suspended = rf_State_Lock(desc); - break; - case rf_CreateDAGState: - suspended = rf_State_CreateDAG(desc); - break; - case rf_ExecuteDAGState: - suspended = rf_State_ExecuteDAG(desc); - break; - case rf_ProcessDAGState: - suspended = rf_State_ProcessDAG(desc); - break; - case rf_CleanupState: - suspended = rf_State_Cleanup(desc); - break; - case rf_DecrAccessesCountState: - suspended = rf_State_DecrAccessCount(desc); - break; - case rf_LastState: - suspended = rf_State_LastState(desc); - break; - } - - /* after this point, we cannot dereference desc since desc may - * have been freed. desc is only freed in LastState, so if we - * renter this function or loop back up, desc should be valid. */ - - if (rf_printStatesDebug) { - printf("raid%d: State: %-24s StateIndex: %3i desc: 0x%ld %s\n", - unit, StateName(current_state), - current_state_index, (long) desc, - suspended ? "callback scheduled" : "looping"); - } - } while (!suspended && current_state != rf_LastState); - - return; -} - - -void -rf_ContinueDagAccess(RF_DagList_t * dagList) -{ - RF_AccTraceEntry_t *tracerec = &(dagList->desc->tracerec); - RF_RaidAccessDesc_t *desc; - RF_DagHeader_t *dag_h; - RF_Etimer_t timer; - int i; - - desc = dagList->desc; - - timer = tracerec->timer; - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->specific.user.exec_us = RF_ETIMER_VAL_US(timer); - RF_ETIMER_START(tracerec->timer); - - /* skip to dag which just finished */ - dag_h = dagList->dags; - for (i = 0; i < dagList->numDagsDone; i++) { - dag_h = dag_h->next; - } - - /* check to see if retry is required */ - if (dag_h->status == rf_rollBackward) { - /* when a dag fails, mark desc status as bad and allow all - * other dags in the desc to execute to completion. then, - * free all dags and start over */ - desc->status = 1; /* bad status */ - { - printf("raid%d: DAG failure: %c addr 0x%lx (%ld) nblk 0x%x (%d) buf 0x%lx\n", - desc->raidPtr->raidid, desc->type, - (long) desc->raidAddress, - (long) desc->raidAddress, (int) desc->numBlocks, - (int) desc->numBlocks, - (unsigned long) (desc->bufPtr)); - } - } - dagList->numDagsDone++; - rf_ContinueRaidAccess(desc); -} - -int -rf_State_LastState(RF_RaidAccessDesc_t * desc) -{ - void (*callbackFunc) (RF_CBParam_t) = desc->callbackFunc; - RF_CBParam_t callbackArg; - - callbackArg.p = desc->callbackArg; - - /* - * If this is not an async request, wake up the caller - */ - if (desc->async_flag == 0) - wakeup(desc->bp); - - /* - * That's all the IO for this one... unbusy the 'disk'. - */ - - rf_disk_unbusy(desc); - - /* - * Wakeup any requests waiting to go. - */ - - RF_LOCK_MUTEX(((RF_Raid_t *) desc->raidPtr)->mutex); - ((RF_Raid_t *) desc->raidPtr)->openings++; - RF_UNLOCK_MUTEX(((RF_Raid_t *) desc->raidPtr)->mutex); - - /* wake up any pending IO */ - raidstart(((RF_Raid_t *) desc->raidPtr)); - - /* printf("Calling biodone on 0x%x\n",desc->bp); */ - biodone(desc->bp); /* access came through ioctl */ - - if (callbackFunc) - callbackFunc(callbackArg); - rf_FreeRaidAccDesc(desc); - - return RF_FALSE; -} - -int -rf_State_IncrAccessCount(RF_RaidAccessDesc_t * desc) -{ - RF_Raid_t *raidPtr; - - raidPtr = desc->raidPtr; - /* Bummer. We have to do this to be 100% safe w.r.t. the increment - * below */ - RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); - raidPtr->accs_in_flight++; /* used to detect quiescence */ - RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); - - desc->state++; - return RF_FALSE; -} - -int -rf_State_DecrAccessCount(RF_RaidAccessDesc_t * desc) -{ - RF_Raid_t *raidPtr; - - raidPtr = desc->raidPtr; - - RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); - raidPtr->accs_in_flight--; - if (raidPtr->accesses_suspended && raidPtr->accs_in_flight == 0) { - rf_SignalQuiescenceLock(raidPtr, raidPtr->reconDesc); - } - rf_UpdateUserStats(raidPtr, RF_ETIMER_VAL_US(desc->timer), desc->numBlocks); - RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); - - desc->state++; - return RF_FALSE; -} - -int -rf_State_Quiesce(RF_RaidAccessDesc_t * desc) -{ - RF_AccTraceEntry_t *tracerec = &desc->tracerec; - RF_Etimer_t timer; - int suspended = RF_FALSE; - RF_Raid_t *raidPtr; - - raidPtr = desc->raidPtr; - - RF_ETIMER_START(timer); - RF_ETIMER_START(desc->timer); - - RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); - if (raidPtr->accesses_suspended) { - RF_CallbackDesc_t *cb; - cb = rf_AllocCallbackDesc(); - /* XXX the following cast is quite bogus... - * rf_ContinueRaidAccess takes a (RF_RaidAccessDesc_t *) as an - * argument.. GO */ - cb->callbackFunc = (void (*) (RF_CBParam_t)) rf_ContinueRaidAccess; - cb->callbackArg.p = (void *) desc; - cb->next = raidPtr->quiesce_wait_list; - raidPtr->quiesce_wait_list = cb; - suspended = RF_TRUE; - } - RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->specific.user.suspend_ovhd_us += RF_ETIMER_VAL_US(timer); - - if (suspended && rf_quiesceDebug) - printf("Stalling access due to quiescence lock\n"); - - desc->state++; - return suspended; -} - -int -rf_State_Map(RF_RaidAccessDesc_t * desc) -{ - RF_Raid_t *raidPtr = desc->raidPtr; - RF_AccTraceEntry_t *tracerec = &desc->tracerec; - RF_Etimer_t timer; - - RF_ETIMER_START(timer); - - if (!(desc->asmap = rf_MapAccess(raidPtr, desc->raidAddress, desc->numBlocks, - desc->bufPtr, RF_DONT_REMAP))) - RF_PANIC(); - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->specific.user.map_us = RF_ETIMER_VAL_US(timer); - - desc->state++; - return RF_FALSE; -} - -int -rf_State_Lock(RF_RaidAccessDesc_t * desc) -{ - RF_AccTraceEntry_t *tracerec = &desc->tracerec; - RF_Raid_t *raidPtr = desc->raidPtr; - RF_AccessStripeMapHeader_t *asmh = desc->asmap; - RF_AccessStripeMap_t *asm_p; - RF_Etimer_t timer; - int suspended = RF_FALSE; - - RF_ETIMER_START(timer); - if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) { - RF_StripeNum_t lastStripeID = -1; - - /* acquire each lock that we don't already hold */ - for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) { - RF_ASSERT(RF_IO_IS_R_OR_W(desc->type)); - if (!rf_suppressLocksAndLargeWrites && - asm_p->parityInfo && - !(desc->flags & RF_DAG_SUPPRESS_LOCKS) && - !(asm_p->flags & RF_ASM_FLAGS_LOCK_TRIED)) { - asm_p->flags |= RF_ASM_FLAGS_LOCK_TRIED; - RF_ASSERT(asm_p->stripeID > lastStripeID); - - /* locks must be acquired hierarchically */ - - lastStripeID = asm_p->stripeID; - /* XXX the cast to (void (*)(RF_CBParam_t)) - * below is bogus! GO */ - RF_INIT_LOCK_REQ_DESC(asm_p->lockReqDesc, - desc->type, - (void (*) (RF_Buf_t)) rf_ContinueRaidAccess, - desc, asm_p, - raidPtr->Layout.dataSectorsPerStripe); - if (rf_AcquireStripeLock(raidPtr->lockTable, - asm_p->stripeID, &asm_p->lockReqDesc)) { - suspended = RF_TRUE; - break; - } - } - if (desc->type == RF_IO_TYPE_WRITE && - raidPtr->status[asm_p->physInfo->row] == rf_rs_reconstructing) { - if (!(asm_p->flags & RF_ASM_FLAGS_FORCE_TRIED)) { - int val; - - asm_p->flags |= RF_ASM_FLAGS_FORCE_TRIED; - /* XXX the cast below is quite - * bogus!!! XXX GO */ - val = rf_ForceOrBlockRecon(raidPtr, asm_p, - (void (*) (RF_Raid_t *, void *)) rf_ContinueRaidAccess, desc); - if (val == 0) { - asm_p->flags |= RF_ASM_FLAGS_RECON_BLOCKED; - } else { - suspended = RF_TRUE; - break; - } - } else { - if (rf_pssDebug) { - printf("raid%d: skipping force/block because already done, psid %ld\n", - desc->raidPtr->raidid, - (long) asm_p->stripeID); - } - } - } else { - if (rf_pssDebug) { - printf("raid%d: skipping force/block because not write or not under recon, psid %ld\n", - desc->raidPtr->raidid, - (long) asm_p->stripeID); - } - } - } - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer); - - if (suspended) - return (RF_TRUE); - } - desc->state++; - return (RF_FALSE); -} -/* - * the following three states create, execute, and post-process dags - * the error recovery unit is a single dag. - * by default, SelectAlgorithm creates an array of dags, one per parity stripe - * in some tricky cases, multiple dags per stripe are created - * - dags within a parity stripe are executed sequentially (arbitrary order) - * - dags for distinct parity stripes are executed concurrently - * - * repeat until all dags complete successfully -or- dag selection fails - * - * while !done - * create dag(s) (SelectAlgorithm) - * if dag - * execute dag (DispatchDAG) - * if dag successful - * done (SUCCESS) - * else - * !done (RETRY - start over with new dags) - * else - * done (FAIL) - */ -int -rf_State_CreateDAG(RF_RaidAccessDesc_t * desc) -{ - RF_AccTraceEntry_t *tracerec = &desc->tracerec; - RF_Etimer_t timer; - RF_DagHeader_t *dag_h; - int i, selectStatus; - - /* generate a dag for the access, and fire it off. When the dag - * completes, we'll get re-invoked in the next state. */ - RF_ETIMER_START(timer); - /* SelectAlgorithm returns one or more dags */ - selectStatus = rf_SelectAlgorithm(desc, desc->flags | RF_DAG_SUPPRESS_LOCKS); - if (rf_printDAGsDebug) - for (i = 0; i < desc->numStripes; i++) - rf_PrintDAGList(desc->dagArray[i].dags); - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - /* update time to create all dags */ - tracerec->specific.user.dag_create_us = RF_ETIMER_VAL_US(timer); - - desc->status = 0; /* good status */ - - if (selectStatus) { - /* failed to create a dag */ - /* this happens when there are too many faults or incomplete - * dag libraries */ - printf("[Failed to create a DAG]\n"); - RF_PANIC(); - } else { - /* bind dags to desc */ - for (i = 0; i < desc->numStripes; i++) { - dag_h = desc->dagArray[i].dags; - while (dag_h) { - dag_h->bp = (RF_Buf_t) desc->bp; - dag_h->tracerec = tracerec; - dag_h = dag_h->next; - } - } - desc->flags |= RF_DAG_DISPATCH_RETURNED; - desc->state++; /* next state should be rf_State_ExecuteDAG */ - } - return RF_FALSE; -} - - - -/* the access has an array of dagLists, one dagList per parity stripe. - * fire the first dag in each parity stripe (dagList). - * dags within a stripe (dagList) must be executed sequentially - * - this preserves atomic parity update - * dags for independents parity groups (stripes) are fired concurrently */ - -int -rf_State_ExecuteDAG(RF_RaidAccessDesc_t * desc) -{ - int i; - RF_DagHeader_t *dag_h; - RF_DagList_t *dagArray = desc->dagArray; - - /* next state is always rf_State_ProcessDAG important to do this - * before firing the first dag (it may finish before we leave this - * routine) */ - desc->state++; - - /* sweep dag array, a stripe at a time, firing the first dag in each - * stripe */ - for (i = 0; i < desc->numStripes; i++) { - RF_ASSERT(dagArray[i].numDags > 0); - RF_ASSERT(dagArray[i].numDagsDone == 0); - RF_ASSERT(dagArray[i].numDagsFired == 0); - RF_ETIMER_START(dagArray[i].tracerec.timer); - /* fire first dag in this stripe */ - dag_h = dagArray[i].dags; - RF_ASSERT(dag_h); - dagArray[i].numDagsFired++; - /* XXX Yet another case where we pass in a conflicting - * function pointer :-( XXX GO */ - rf_DispatchDAG(dag_h, (void (*) (void *)) rf_ContinueDagAccess, &dagArray[i]); - } - - /* the DAG will always call the callback, even if there was no - * blocking, so we are always suspended in this state */ - return RF_TRUE; -} - - - -/* rf_State_ProcessDAG is entered when a dag completes. - * first, check to all dags in the access have completed - * if not, fire as many dags as possible */ - -int -rf_State_ProcessDAG(RF_RaidAccessDesc_t * desc) -{ - RF_AccessStripeMapHeader_t *asmh = desc->asmap; - RF_Raid_t *raidPtr = desc->raidPtr; - RF_DagHeader_t *dag_h; - int i, j, done = RF_TRUE; - RF_DagList_t *dagArray = desc->dagArray; - RF_Etimer_t timer; - - /* check to see if this is the last dag */ - for (i = 0; i < desc->numStripes; i++) - if (dagArray[i].numDags != dagArray[i].numDagsDone) - done = RF_FALSE; - - if (done) { - if (desc->status) { - /* a dag failed, retry */ - RF_ETIMER_START(timer); - /* free all dags */ - for (i = 0; i < desc->numStripes; i++) { - rf_FreeDAG(desc->dagArray[i].dags); - } - rf_MarkFailuresInASMList(raidPtr, asmh); - /* back up to rf_State_CreateDAG */ - desc->state = desc->state - 2; - return RF_FALSE; - } else { - /* move on to rf_State_Cleanup */ - desc->state++; - } - return RF_FALSE; - } else { - /* more dags to execute */ - /* see if any are ready to be fired. if so, fire them */ - /* don't fire the initial dag in a list, it's fired in - * rf_State_ExecuteDAG */ - for (i = 0; i < desc->numStripes; i++) { - if ((dagArray[i].numDagsDone < dagArray[i].numDags) - && (dagArray[i].numDagsDone == dagArray[i].numDagsFired) - && (dagArray[i].numDagsFired > 0)) { - RF_ETIMER_START(dagArray[i].tracerec.timer); - /* fire next dag in this stripe */ - /* first, skip to next dag awaiting execution */ - dag_h = dagArray[i].dags; - for (j = 0; j < dagArray[i].numDagsDone; j++) - dag_h = dag_h->next; - dagArray[i].numDagsFired++; - /* XXX and again we pass a different function - * pointer.. GO */ - rf_DispatchDAG(dag_h, (void (*) (void *)) rf_ContinueDagAccess, - &dagArray[i]); - } - } - return RF_TRUE; - } -} -/* only make it this far if all dags complete successfully */ -int -rf_State_Cleanup(RF_RaidAccessDesc_t * desc) -{ - RF_AccTraceEntry_t *tracerec = &desc->tracerec; - RF_AccessStripeMapHeader_t *asmh = desc->asmap; - RF_Raid_t *raidPtr = desc->raidPtr; - RF_AccessStripeMap_t *asm_p; - RF_DagHeader_t *dag_h; - RF_Etimer_t timer; - int i; - - desc->state++; - - timer = tracerec->timer; - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->specific.user.dag_retry_us = RF_ETIMER_VAL_US(timer); - - /* the RAID I/O is complete. Clean up. */ - tracerec->specific.user.dag_retry_us = 0; - - RF_ETIMER_START(timer); - if (desc->flags & RF_DAG_RETURN_DAG) { - /* copy dags into paramDAG */ - *(desc->paramDAG) = desc->dagArray[0].dags; - dag_h = *(desc->paramDAG); - for (i = 1; i < desc->numStripes; i++) { - /* concatenate dags from remaining stripes */ - RF_ASSERT(dag_h); - while (dag_h->next) - dag_h = dag_h->next; - dag_h->next = desc->dagArray[i].dags; - } - } else { - /* free all dags */ - for (i = 0; i < desc->numStripes; i++) { - rf_FreeDAG(desc->dagArray[i].dags); - } - } - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->specific.user.cleanup_us = RF_ETIMER_VAL_US(timer); - - RF_ETIMER_START(timer); - if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) { - for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) { - if (!rf_suppressLocksAndLargeWrites && - asm_p->parityInfo && - !(desc->flags & RF_DAG_SUPPRESS_LOCKS)) { - RF_ASSERT_VALID_LOCKREQ(&asm_p->lockReqDesc); - rf_ReleaseStripeLock(raidPtr->lockTable, - asm_p->stripeID, - &asm_p->lockReqDesc); - } - if (asm_p->flags & RF_ASM_FLAGS_RECON_BLOCKED) { - rf_UnblockRecon(raidPtr, asm_p); - } - } - } - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer); - - RF_ETIMER_START(timer); - if (desc->flags & RF_DAG_RETURN_ASM) - *(desc->paramASM) = asmh; - else - rf_FreeAccessStripeMap(asmh); - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->specific.user.cleanup_us += RF_ETIMER_VAL_US(timer); - - RF_ETIMER_STOP(desc->timer); - RF_ETIMER_EVAL(desc->timer); - - timer = desc->tracerec.tot_timer; - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - desc->tracerec.total_us = RF_ETIMER_VAL_US(timer); - - rf_LogTraceRec(raidPtr, tracerec); - - desc->flags |= RF_DAG_ACCESS_COMPLETE; - - return RF_FALSE; -} diff --git a/sys/dev/raidframe/rf_states.h b/sys/dev/raidframe/rf_states.h deleted file mode 100644 index 6c0aee4..0000000 --- a/sys/dev/raidframe/rf_states.h +++ /dev/null @@ -1,48 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_states.h,v 1.3 1999/02/05 00:06:17 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, William V. Courtright II, Robby Findler - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#ifndef _RF__RF_STATES_H_ -#define _RF__RF_STATES_H_ - -#include <dev/raidframe/rf_types.h> - -void rf_ContinueRaidAccess(RF_RaidAccessDesc_t * desc); -void rf_ContinueDagAccess(RF_DagList_t * dagList); -int rf_State_LastState(RF_RaidAccessDesc_t * desc); -int rf_State_IncrAccessCount(RF_RaidAccessDesc_t * desc); -int rf_State_DecrAccessCount(RF_RaidAccessDesc_t * desc); -int rf_State_Quiesce(RF_RaidAccessDesc_t * desc); -int rf_State_Map(RF_RaidAccessDesc_t * desc); -int rf_State_Lock(RF_RaidAccessDesc_t * desc); -int rf_State_CreateDAG(RF_RaidAccessDesc_t * desc); -int rf_State_ExecuteDAG(RF_RaidAccessDesc_t * desc); -int rf_State_ProcessDAG(RF_RaidAccessDesc_t * desc); -int rf_State_Cleanup(RF_RaidAccessDesc_t * desc); - -#endif /* !_RF__RF_STATES_H_ */ diff --git a/sys/dev/raidframe/rf_stripelocks.c b/sys/dev/raidframe/rf_stripelocks.c deleted file mode 100644 index 409c0f4..0000000 --- a/sys/dev/raidframe/rf_stripelocks.c +++ /dev/null @@ -1,669 +0,0 @@ -/* $NetBSD: rf_stripelocks.c,v 1.6 2000/12/04 11:35:46 fvdl Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Authors: Mark Holland, Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * stripelocks.c -- code to lock stripes for read and write access - * - * The code distinguishes between read locks and write locks. There can be - * as many readers to given stripe as desired. When a write request comes - * in, no further readers are allowed to enter, and all subsequent requests - * are queued in FIFO order. When a the number of readers goes to zero, the - * writer is given the lock. When a writer releases the lock, the list of - * queued requests is scanned, and all readersq up to the next writer are - * given the lock. - * - * The lock table size must be one less than a power of two, but HASH_STRIPEID - * is the only function that requires this. - * - * The code now supports "range locks". When you ask to lock a stripe, you - * specify a range of addresses in that stripe that you want to lock. When - * you acquire the lock, you've locked only this range of addresses, and - * other threads can concurrently read/write any non-overlapping portions - * of the stripe. The "addresses" that you lock are abstract in that you - * can pass in anything you like. The expectation is that you'll pass in - * the range of physical disk offsets of the parity bits you're planning - * to update. The idea behind this, of course, is to allow sub-stripe - * locking. The implementation is perhaps not the best imaginable; in the - * worst case a lock release is O(n^2) in the total number of outstanding - * requests to a given stripe. Note that if you're striping with a - * stripe unit size equal to an entire disk (i.e. not striping), there will - * be only one stripe and you may spend some significant number of cycles - * searching through stripe lock descriptors. - */ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_raid.h> -#include <dev/raidframe/rf_stripelocks.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_freelist.h> -#include <dev/raidframe/rf_debugprint.h> -#include <dev/raidframe/rf_driver.h> -#include <dev/raidframe/rf_shutdown.h> - -#define Dprintf1(s,a) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf2(s,a,b) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf3(s,a,b,c) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL) -#define Dprintf4(s,a,b,c,d) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),NULL,NULL,NULL,NULL) -#define Dprintf5(s,a,b,c,d,e) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),NULL,NULL,NULL) -#define Dprintf6(s,a,b,c,d,e,f) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),NULL,NULL) -#define Dprintf7(s,a,b,c,d,e,f,g) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),(void *)((unsigned long)g),NULL) -#define Dprintf8(s,a,b,c,d,e,f,g,h) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),(void *)((unsigned long)g),(void *)((unsigned long)h)) - -#define FLUSH - -#define HASH_STRIPEID(_sid_) ( (_sid_) & (rf_lockTableSize-1) ) - -static void AddToWaitersQueue(RF_LockTableEntry_t * lockTable, RF_StripeLockDesc_t * lockDesc, RF_LockReqDesc_t * lockReqDesc); -static RF_StripeLockDesc_t *AllocStripeLockDesc(RF_StripeNum_t stripeID); -static void FreeStripeLockDesc(RF_StripeLockDesc_t * p); -static void PrintLockedStripes(RF_LockTableEntry_t * lockTable); - -/* determines if two ranges overlap. always yields false if either start value is negative */ -#define SINGLE_RANGE_OVERLAP(_strt1, _stop1, _strt2, _stop2) \ - ( (_strt1 >= 0) && (_strt2 >= 0) && (RF_MAX(_strt1, _strt2) <= RF_MIN(_stop1, _stop2)) ) - -/* determines if any of the ranges specified in the two lock descriptors overlap each other */ -#define RANGE_OVERLAP(_cand, _pred) \ - ( SINGLE_RANGE_OVERLAP((_cand)->start, (_cand)->stop, (_pred)->start, (_pred)->stop ) || \ - SINGLE_RANGE_OVERLAP((_cand)->start2, (_cand)->stop2, (_pred)->start, (_pred)->stop ) || \ - SINGLE_RANGE_OVERLAP((_cand)->start, (_cand)->stop, (_pred)->start2, (_pred)->stop2) || \ - SINGLE_RANGE_OVERLAP((_cand)->start2, (_cand)->stop2, (_pred)->start2, (_pred)->stop2) ) - -/* Determines if a candidate lock request conflicts with a predecessor lock req. - * Note that the arguments are not interchangeable. - * The rules are: - * a candidate read conflicts with a predecessor write if any ranges overlap - * a candidate write conflicts with a predecessor read if any ranges overlap - * a candidate write conflicts with a predecessor write if any ranges overlap - */ -#define STRIPELOCK_CONFLICT(_cand, _pred) \ - RANGE_OVERLAP((_cand), (_pred)) && \ - ( ( (((_cand)->type == RF_IO_TYPE_READ) && ((_pred)->type == RF_IO_TYPE_WRITE)) || \ - (((_cand)->type == RF_IO_TYPE_WRITE) && ((_pred)->type == RF_IO_TYPE_READ)) || \ - (((_cand)->type == RF_IO_TYPE_WRITE) && ((_pred)->type == RF_IO_TYPE_WRITE)) \ - ) \ - ) - -static RF_FreeList_t *rf_stripelock_freelist; -#define RF_MAX_FREE_STRIPELOCK 128 -#define RF_STRIPELOCK_INC 8 -#define RF_STRIPELOCK_INITIAL 32 - -static void rf_ShutdownStripeLockFreeList(void *); -static void rf_RaidShutdownStripeLocks(void *); - -static void -rf_ShutdownStripeLockFreeList(ignored) - void *ignored; -{ - RF_FREELIST_DESTROY(rf_stripelock_freelist, next, (RF_StripeLockDesc_t *)); -} - -int -rf_ConfigureStripeLockFreeList(listp) - RF_ShutdownList_t **listp; -{ - unsigned mask; - int rc; - - RF_FREELIST_CREATE(rf_stripelock_freelist, RF_MAX_FREE_STRIPELOCK, - RF_STRIPELOCK_INITIAL, sizeof(RF_StripeLockDesc_t)); - rc = rf_ShutdownCreate(listp, rf_ShutdownStripeLockFreeList, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - rf_ShutdownStripeLockFreeList(NULL); - return (rc); - } - RF_FREELIST_PRIME(rf_stripelock_freelist, RF_STRIPELOCK_INITIAL, next, - (RF_StripeLockDesc_t *)); - for (mask = 0x1; mask; mask <<= 1) - if (rf_lockTableSize == mask) - break; - if (!mask) { - printf("[WARNING: lock table size must be a power of two. Setting to %d.]\n", RF_DEFAULT_LOCK_TABLE_SIZE); - rf_lockTableSize = RF_DEFAULT_LOCK_TABLE_SIZE; - } - return (0); -} - -RF_LockTableEntry_t * -rf_MakeLockTable() -{ - RF_LockTableEntry_t *lockTable; - int i, rc; - - RF_Calloc(lockTable, ((int) rf_lockTableSize), sizeof(RF_LockTableEntry_t), (RF_LockTableEntry_t *)); - if (lockTable == NULL) - return (NULL); - for (i = 0; i < rf_lockTableSize; i++) { - rc = rf_mutex_init(&lockTable[i].mutex, __FUNCTION__); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - /* XXX clean up other mutexes */ - return (NULL); - } - } - return (lockTable); -} - -void -rf_ShutdownStripeLocks(RF_LockTableEntry_t * lockTable) -{ - int i; - - if (rf_stripeLockDebug) { - PrintLockedStripes(lockTable); - } - for (i = 0; i < rf_lockTableSize; i++) { - rf_mutex_destroy(&lockTable[i].mutex); - } - RF_Free(lockTable, rf_lockTableSize * sizeof(RF_LockTableEntry_t)); -} - -static void -rf_RaidShutdownStripeLocks(arg) - void *arg; -{ - RF_Raid_t *raidPtr = (RF_Raid_t *) arg; - rf_ShutdownStripeLocks(raidPtr->lockTable); -} - -int -rf_ConfigureStripeLocks( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) -{ - int rc; - - raidPtr->lockTable = rf_MakeLockTable(); - if (raidPtr->lockTable == NULL) - return (ENOMEM); - rc = rf_ShutdownCreate(listp, rf_RaidShutdownStripeLocks, raidPtr); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - rf_ShutdownStripeLocks(raidPtr->lockTable); - return (rc); - } - return (0); -} -/* returns 0 if you've got the lock, and non-zero if you have to wait. - * if and only if you have to wait, we'll cause cbFunc to get invoked - * with cbArg when you are granted the lock. We store a tag in *releaseTag - * that you need to give back to us when you release the lock. - */ -int -rf_AcquireStripeLock( - RF_LockTableEntry_t * lockTable, - RF_StripeNum_t stripeID, - RF_LockReqDesc_t * lockReqDesc) -{ - RF_StripeLockDesc_t *lockDesc; - RF_LockReqDesc_t *p; - int tid = 0, hashval = HASH_STRIPEID(stripeID); - int retcode = 0; - - RF_ASSERT(RF_IO_IS_R_OR_W(lockReqDesc->type)); - - if (rf_stripeLockDebug) { - if (stripeID == -1) - Dprintf1("[%d] Lock acquisition supressed (stripeID == -1)\n", tid); - else { - Dprintf8("[%d] Trying to acquire stripe lock table 0x%lx SID %ld type %c range %ld-%ld, range2 %ld-%ld hashval %d\n", - tid, (unsigned long) lockTable, stripeID, lockReqDesc->type, lockReqDesc->start, - lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2); - Dprintf3("[%d] lock %ld hashval %d\n", tid, stripeID, hashval); - FLUSH; - } - } - if (stripeID == -1) - return (0); - lockReqDesc->next = NULL; /* just to be sure */ - - RF_LOCK_MUTEX(lockTable[hashval].mutex); - for (lockDesc = lockTable[hashval].descList; lockDesc; lockDesc = lockDesc->next) { - if (lockDesc->stripeID == stripeID) - break; - } - - if (!lockDesc) { /* no entry in table => no one reading or - * writing */ - lockDesc = AllocStripeLockDesc(stripeID); - lockDesc->next = lockTable[hashval].descList; - lockTable[hashval].descList = lockDesc; - if (lockReqDesc->type == RF_IO_TYPE_WRITE) - lockDesc->nWriters++; - lockDesc->granted = lockReqDesc; - if (rf_stripeLockDebug) { - Dprintf7("[%d] no one waiting: lock %ld %c %ld-%ld %ld-%ld granted\n", - tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2); - FLUSH; - } - } else { - - if (lockReqDesc->type == RF_IO_TYPE_WRITE) - lockDesc->nWriters++; - - if (lockDesc->nWriters == 0) { /* no need to search any lists - * if there are no writers - * anywhere */ - lockReqDesc->next = lockDesc->granted; - lockDesc->granted = lockReqDesc; - if (rf_stripeLockDebug) { - Dprintf7("[%d] no writers: lock %ld %c %ld-%ld %ld-%ld granted\n", - tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2); - FLUSH; - } - } else { - - /* search the granted & waiting lists for a conflict. - * stop searching as soon as we find one */ - retcode = 0; - for (p = lockDesc->granted; p; p = p->next) - if (STRIPELOCK_CONFLICT(lockReqDesc, p)) { - retcode = 1; - break; - } - if (!retcode) - for (p = lockDesc->waitersH; p; p = p->next) - if (STRIPELOCK_CONFLICT(lockReqDesc, p)) { - retcode = 2; - break; - } - if (!retcode) { - lockReqDesc->next = lockDesc->granted; /* no conflicts found => - * grant lock */ - lockDesc->granted = lockReqDesc; - if (rf_stripeLockDebug) { - Dprintf7("[%d] no conflicts: lock %ld %c %ld-%ld %ld-%ld granted\n", - tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, - lockReqDesc->start2, lockReqDesc->stop2); - FLUSH; - } - } else { - if (rf_stripeLockDebug) { - Dprintf6("[%d] conflict: lock %ld %c %ld-%ld hashval=%d not granted\n", - tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, - hashval); - Dprintf3("[%d] lock %ld retcode=%d\n", tid, stripeID, retcode); - FLUSH; - } - AddToWaitersQueue(lockTable, lockDesc, lockReqDesc); /* conflict => the - * current access must - * wait */ - } - } - } - - RF_UNLOCK_MUTEX(lockTable[hashval].mutex); - return (retcode); -} - -void -rf_ReleaseStripeLock( - RF_LockTableEntry_t * lockTable, - RF_StripeNum_t stripeID, - RF_LockReqDesc_t * lockReqDesc) -{ - RF_StripeLockDesc_t *lockDesc, *ld_t; - RF_LockReqDesc_t *lr, *lr_t, *callbacklist, *t; - RF_IoType_t type = lockReqDesc->type; - int tid = 0, hashval = HASH_STRIPEID(stripeID); - int release_it, consider_it; - RF_LockReqDesc_t *candidate, *candidate_t, *predecessor; - - RF_ASSERT(RF_IO_IS_R_OR_W(type)); - - if (rf_stripeLockDebug) { - if (stripeID == -1) - Dprintf1("[%d] Lock release supressed (stripeID == -1)\n", tid); - else { - Dprintf8("[%d] Releasing stripe lock on stripe ID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", - tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2, lockTable); - FLUSH; - } - } - if (stripeID == -1) - return; - - RF_LOCK_MUTEX(lockTable[hashval].mutex); - - /* find the stripe lock descriptor */ - for (ld_t = NULL, lockDesc = lockTable[hashval].descList; lockDesc; ld_t = lockDesc, lockDesc = lockDesc->next) { - if (lockDesc->stripeID == stripeID) - break; - } - RF_ASSERT(lockDesc); /* major error to release a lock that doesn't - * exist */ - - /* find the stripe lock request descriptor & delete it from the list */ - for (lr_t = NULL, lr = lockDesc->granted; lr; lr_t = lr, lr = lr->next) - if (lr == lockReqDesc) - break; - - RF_ASSERT(lr && (lr == lockReqDesc)); /* major error to release a - * lock that hasn't been - * granted */ - if (lr_t) - lr_t->next = lr->next; - else { - RF_ASSERT(lr == lockDesc->granted); - lockDesc->granted = lr->next; - } - lr->next = NULL; - - if (lockReqDesc->type == RF_IO_TYPE_WRITE) - lockDesc->nWriters--; - - /* search through the waiters list to see if anyone needs to be woken - * up. for each such descriptor in the wait list, we check it against - * everything granted and against everything _in front_ of it in the - * waiters queue. If it conflicts with none of these, we release it. - * - * DON'T TOUCH THE TEMPLINK POINTER OF ANYTHING IN THE GRANTED LIST HERE. - * This will roach the case where the callback tries to acquire a new - * lock in the same stripe. There are some asserts to try and detect - * this. - * - * We apply 2 performance optimizations: (1) if releasing this lock - * results in no more writers to this stripe, we just release - * everybody waiting, since we place no restrictions on the number of - * concurrent reads. (2) we consider as candidates for wakeup only - * those waiters that have a range overlap with either the descriptor - * being woken up or with something in the callbacklist (i.e. - * something we've just now woken up). This allows us to avoid the - * long evaluation for some descriptors. */ - - callbacklist = NULL; - if (lockDesc->nWriters == 0) { /* performance tweak (1) */ - while (lockDesc->waitersH) { - - lr = lockDesc->waitersH; /* delete from waiters - * list */ - lockDesc->waitersH = lr->next; - - RF_ASSERT(lr->type == RF_IO_TYPE_READ); - - lr->next = lockDesc->granted; /* add to granted list */ - lockDesc->granted = lr; - - RF_ASSERT(!lr->templink); - lr->templink = callbacklist; /* put on callback list - * so that we'll invoke - * callback below */ - callbacklist = lr; - if (rf_stripeLockDebug) { - Dprintf8("[%d] No writers: granting lock stripe ID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", - tid, stripeID, lr->type, lr->start, lr->stop, lr->start2, lr->stop2, (unsigned long) lockTable); - FLUSH; - } - } - lockDesc->waitersT = NULL; /* we've purged the whole - * waiters list */ - - } else - for (candidate_t = NULL, candidate = lockDesc->waitersH; candidate;) { - - /* performance tweak (2) */ - consider_it = 0; - if (RANGE_OVERLAP(lockReqDesc, candidate)) - consider_it = 1; - else - for (t = callbacklist; t; t = t->templink) - if (RANGE_OVERLAP(t, candidate)) { - consider_it = 1; - break; - } - if (!consider_it) { - if (rf_stripeLockDebug) { - Dprintf8("[%d] No overlap: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", - tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, - (unsigned long) lockTable); - FLUSH; - } - candidate_t = candidate; - candidate = candidate->next; - continue; - } - /* we have a candidate for release. check to make - * sure it is not blocked by any granted locks */ - release_it = 1; - for (predecessor = lockDesc->granted; predecessor; predecessor = predecessor->next) { - if (STRIPELOCK_CONFLICT(candidate, predecessor)) { - if (rf_stripeLockDebug) { - Dprintf8("[%d] Conflicts with granted lock: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", - tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, - (unsigned long) lockTable); - FLUSH; - } - release_it = 0; - break; - } - } - - /* now check to see if the candidate is blocked by any - * waiters that occur before it it the wait queue */ - if (release_it) - for (predecessor = lockDesc->waitersH; predecessor != candidate; predecessor = predecessor->next) { - if (STRIPELOCK_CONFLICT(candidate, predecessor)) { - if (rf_stripeLockDebug) { - Dprintf8("[%d] Conflicts with waiting lock: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", - tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, - (unsigned long) lockTable); - FLUSH; - } - release_it = 0; - break; - } - } - - /* release it if indicated */ - if (release_it) { - if (rf_stripeLockDebug) { - Dprintf8("[%d] Granting lock to candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", - tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, - (unsigned long) lockTable); - FLUSH; - } - if (candidate_t) { - candidate_t->next = candidate->next; - if (lockDesc->waitersT == candidate) - lockDesc->waitersT = candidate_t; /* cannot be waitersH - * since candidate_t is - * not NULL */ - } else { - RF_ASSERT(candidate == lockDesc->waitersH); - lockDesc->waitersH = lockDesc->waitersH->next; - if (!lockDesc->waitersH) - lockDesc->waitersT = NULL; - } - candidate->next = lockDesc->granted; /* move it to the - * granted list */ - lockDesc->granted = candidate; - - RF_ASSERT(!candidate->templink); - candidate->templink = callbacklist; /* put it on the list of - * things to be called - * after we release the - * mutex */ - callbacklist = candidate; - - if (!candidate_t) - candidate = lockDesc->waitersH; - else - candidate = candidate_t->next; /* continue with the - * rest of the list */ - } else { - candidate_t = candidate; - candidate = candidate->next; /* continue with the - * rest of the list */ - } - } - - /* delete the descriptor if no one is waiting or active */ - if (!lockDesc->granted && !lockDesc->waitersH) { - RF_ASSERT(lockDesc->nWriters == 0); - if (rf_stripeLockDebug) { - Dprintf3("[%d] Last lock released (table 0x%lx): deleting desc for stripeID %ld\n", tid, (unsigned long) lockTable, stripeID); - FLUSH; - } - if (ld_t) - ld_t->next = lockDesc->next; - else { - RF_ASSERT(lockDesc == lockTable[hashval].descList); - lockTable[hashval].descList = lockDesc->next; - } - FreeStripeLockDesc(lockDesc); - lockDesc = NULL;/* only for the ASSERT below */ - } - RF_UNLOCK_MUTEX(lockTable[hashval].mutex); - - /* now that we've unlocked the mutex, invoke the callback on all the - * descriptors in the list */ - RF_ASSERT(!((callbacklist) && (!lockDesc))); /* if we deleted the - * descriptor, we should - * have no callbacks to - * do */ - for (candidate = callbacklist; candidate;) { - t = candidate; - candidate = candidate->templink; - t->templink = NULL; - (t->cbFunc) (t->cbArg); - } -} -/* must have the indicated lock table mutex upon entry */ -static void -AddToWaitersQueue( - RF_LockTableEntry_t * lockTable, - RF_StripeLockDesc_t * lockDesc, - RF_LockReqDesc_t * lockReqDesc) -{ -#if 0 /* XXX fvdl -- unitialized use of 'tid' */ - int tid; - - if (rf_stripeLockDebug) { - Dprintf3("[%d] Waiting on lock for stripe %ld table 0x%lx\n", tid, lockDesc->stripeID, (unsigned long) lockTable); - FLUSH; - } -#endif - if (!lockDesc->waitersH) { - lockDesc->waitersH = lockDesc->waitersT = lockReqDesc; - } else { - lockDesc->waitersT->next = lockReqDesc; - lockDesc->waitersT = lockReqDesc; - } -} - -static RF_StripeLockDesc_t * -AllocStripeLockDesc(RF_StripeNum_t stripeID) -{ - RF_StripeLockDesc_t *p; - - RF_FREELIST_GET(rf_stripelock_freelist, p, next, (RF_StripeLockDesc_t *)); - if (p) { - p->stripeID = stripeID; - } - return (p); -} - -static void -FreeStripeLockDesc(RF_StripeLockDesc_t * p) -{ - RF_FREELIST_FREE(rf_stripelock_freelist, p, next); -} - -static void -PrintLockedStripes(lockTable) - RF_LockTableEntry_t *lockTable; -{ - int i, j, foundone = 0, did; - RF_StripeLockDesc_t *p; - RF_LockReqDesc_t *q; - - RF_LOCK_MUTEX(rf_printf_mutex); - printf("Locked stripes:\n"); - for (i = 0; i < rf_lockTableSize; i++) - if (lockTable[i].descList) { - foundone = 1; - for (p = lockTable[i].descList; p; p = p->next) { - printf("Stripe ID 0x%lx (%d) nWriters %d\n", - (long) p->stripeID, (int) p->stripeID, p->nWriters); - - if (!(p->granted)) - printf("Granted: (none)\n"); - else - printf("Granted:\n"); - for (did = 1, j = 0, q = p->granted; q; j++, q = q->next) { - printf(" %c(%ld-%ld", q->type, (long) q->start, (long) q->stop); - if (q->start2 != -1) - printf(",%ld-%ld) ", (long) q->start2, - (long) q->stop2); - else - printf(") "); - if (j && !(j % 4)) { - printf("\n"); - did = 1; - } else - did = 0; - } - if (!did) - printf("\n"); - - if (!(p->waitersH)) - printf("Waiting: (none)\n"); - else - printf("Waiting:\n"); - for (did = 1, j = 0, q = p->waitersH; q; j++, q = q->next) { - printf("%c(%ld-%ld", q->type, (long) q->start, (long) q->stop); - if (q->start2 != -1) - printf(",%ld-%ld) ", (long) q->start2, (long) q->stop2); - else - printf(") "); - if (j && !(j % 4)) { - printf("\n "); - did = 1; - } else - did = 0; - } - if (!did) - printf("\n"); - } - } - if (!foundone) - printf("(none)\n"); - else - printf("\n"); - RF_UNLOCK_MUTEX(rf_printf_mutex); -} diff --git a/sys/dev/raidframe/rf_stripelocks.h b/sys/dev/raidframe/rf_stripelocks.h deleted file mode 100644 index ab960c1..0000000 --- a/sys/dev/raidframe/rf_stripelocks.h +++ /dev/null @@ -1,130 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_stripelocks.h,v 1.3 1999/02/05 00:06:18 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/***************************************************************************** - * - * stripelocks.h -- header file for locking stripes - * - * Note that these functions are called from the execution routines of certain - * DAG Nodes, and so they must be NON-BLOCKING to assure maximum parallelism - * in the DAG. Accordingly, when a node wants to acquire a lock, it calls - * AcquireStripeLock, supplying a pointer to a callback function. If the lock - * is free at the time of the call, 0 is returned, indicating that the lock - * has been acquired. If the lock is not free, 1 is returned, and a copy of - * the function pointer and argument are held in the lock table. When the - * lock becomes free, the callback function is invoked. - * - *****************************************************************************/ - -#ifndef _RF__RF_STRIPELOCKS_H_ -#define _RF__RF_STRIPELOCKS_H_ - -#if defined(__FreeBSD__) -#include <sys/types.h> -#if __FreeBSD_version > 500005 -#include <sys/bio.h> -#endif -#if _KERNEL -#include <sys/systm.h> -#endif -#endif -#include <sys/buf.h> - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_general.h> - -struct RF_LockReqDesc_s { - RF_IoType_t type; /* read or write */ - RF_int64 start, stop; /* start and end of range to be locked */ - RF_int64 start2, stop2; /* start and end of 2nd range to be locked */ - void (*cbFunc) (RF_Buf_t); /* callback function */ - void *cbArg; /* argument to callback function */ - RF_LockReqDesc_t *next; /* next element in chain */ - RF_LockReqDesc_t *templink; /* for making short-lived lists of - * request descriptors */ -}; -#define RF_ASSERT_VALID_LOCKREQ(_lr_) { \ - RF_ASSERT(RF_IO_IS_R_OR_W((_lr_)->type)); \ -} - -struct RF_StripeLockDesc_s { - RF_StripeNum_t stripeID;/* the stripe ID */ - RF_LockReqDesc_t *granted; /* unordered list of granted requests */ - RF_LockReqDesc_t *waitersH; /* FIFO queue of all waiting reqs, - * both read and write (Head and Tail) */ - RF_LockReqDesc_t *waitersT; - int nWriters; /* number of writers either granted or waiting */ - RF_StripeLockDesc_t *next; /* for hash table collision resolution */ -}; - -struct RF_LockTableEntry_s { - RF_DECLARE_MUTEX(mutex) /* mutex on this hash chain */ - RF_StripeLockDesc_t *descList; /* hash chain of lock descriptors */ -}; -/* - * Initializes a stripe lock descriptor. _defSize is the number of sectors - * that we lock when there is no parity information in the ASM (e.g. RAID0). - */ - -#define RF_INIT_LOCK_REQ_DESC(_lrd, _typ, _cbf, _cba, _asm, _defSize) \ - { \ - (_lrd).type = _typ; \ - (_lrd).start2 = -1; \ - (_lrd).stop2 = -1; \ - if ((_asm)->parityInfo) { \ - (_lrd).start = (_asm)->parityInfo->startSector; \ - (_lrd).stop = (_asm)->parityInfo->startSector + (_asm)->parityInfo->numSector-1; \ - if ((_asm)->parityInfo->next) { \ - (_lrd).start2 = (_asm)->parityInfo->next->startSector; \ - (_lrd).stop2 = (_asm)->parityInfo->next->startSector + (_asm)->parityInfo->next->numSector-1; \ - } \ - } else { \ - (_lrd).start = 0; \ - (_lrd).stop = (_defSize); \ - } \ - (_lrd).templink= NULL; \ - (_lrd).cbFunc = (_cbf); \ - (_lrd).cbArg = (void *) (_cba); \ - } - -int rf_ConfigureStripeLockFreeList(RF_ShutdownList_t ** listp); -RF_LockTableEntry_t *rf_MakeLockTable(void); -void rf_ShutdownStripeLocks(RF_LockTableEntry_t * lockTable); -int -rf_ConfigureStripeLocks(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int -rf_AcquireStripeLock(RF_LockTableEntry_t * lockTable, - RF_StripeNum_t stripeID, RF_LockReqDesc_t * lockReqDesc); -void -rf_ReleaseStripeLock(RF_LockTableEntry_t * lockTable, - RF_StripeNum_t stripeID, RF_LockReqDesc_t * lockReqDesc); - -#endif /* !_RF__RF_STRIPELOCKS_H_ */ diff --git a/sys/dev/raidframe/rf_strutils.c b/sys/dev/raidframe/rf_strutils.c deleted file mode 100644 index d434f0a..0000000 --- a/sys/dev/raidframe/rf_strutils.c +++ /dev/null @@ -1,58 +0,0 @@ -/* $NetBSD: rf_strutils.c,v 1.3 1999/02/05 00:06:18 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * rf_strutils.c - * - * String-parsing funcs - */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ -/* - * rf_strutils.c -- some simple utilities for munging on strings. - * I put them in a file by themselves because they're needed in - * setconfig, in the user-level driver, and in the kernel. - * - */ - -#include <dev/raidframe/rf_utils.h> - -/* finds a non-white character in the line */ -char * -rf_find_non_white(char *p) -{ - for (; *p != '\0' && (*p == ' ' || *p == '\t'); p++); - return (p); -} -/* finds a white character in the line */ -char * -rf_find_white(char *p) -{ - for (; *p != '\0' && (*p != ' ' && *p != '\t'); p++); - return (p); -} diff --git a/sys/dev/raidframe/rf_threadstuff.c b/sys/dev/raidframe/rf_threadstuff.c deleted file mode 100644 index 657ffee..0000000 --- a/sys/dev/raidframe/rf_threadstuff.c +++ /dev/null @@ -1,223 +0,0 @@ -/* $NetBSD: rf_threadstuff.c,v 1.5 1999/12/07 02:13:28 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * rf_threadstuff.c - */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_threadstuff.h> -#include <dev/raidframe/rf_general.h> -#include <dev/raidframe/rf_shutdown.h> - -static void mutex_destroyer(void *); -static void cond_destroyer(void *); - -/* - * Shared stuff - */ - -static void -mutex_destroyer(arg) - void *arg; -{ - int rc; - - rc = rf_mutex_destroy(arg); - if (rc) { - RF_ERRORMSG1("RAIDFRAME: Error %d auto-destroying mutex\n", rc); - } -} - -static void -cond_destroyer(arg) - void *arg; -{ - int rc; - - rc = rf_cond_destroy(arg); - if (rc) { - RF_ERRORMSG1("RAIDFRAME: Error %d auto-destroying condition\n", rc); - } -} - -int -_rf_create_managed_mutex(listp, m, file, line) - RF_ShutdownList_t **listp; -RF_DECLARE_MUTEX(*m) - char *file; - int line; -{ - int rc, rc1; - - rc = rf_mutex_init(m, __FUNCTION__); - if (rc) - return (rc); - rc = _rf_ShutdownCreate(listp, mutex_destroyer, (void *) m, file, line); - if (rc) { - RF_ERRORMSG1("RAIDFRAME: Error %d adding shutdown entry\n", rc); - rc1 = rf_mutex_destroy(m); - if (rc1) { - RF_ERRORMSG1("RAIDFRAME: Error %d destroying mutex\n", rc1); - } - } - return (rc); -} - -int -_rf_create_managed_cond(listp, c, file, line) - RF_ShutdownList_t **listp; -RF_DECLARE_COND(*c) - char *file; - int line; -{ - int rc, rc1; - - rc = rf_cond_init(c); - if (rc) - return (rc); - rc = _rf_ShutdownCreate(listp, cond_destroyer, (void *) c, file, line); - if (rc) { - RF_ERRORMSG1("RAIDFRAME: Error %d adding shutdown entry\n", rc); - rc1 = rf_cond_destroy(c); - if (rc1) { - RF_ERRORMSG1("RAIDFRAME: Error %d destroying cond\n", rc1); - } - } - return (rc); -} - -int -_rf_init_managed_threadgroup(listp, g, file, line) - RF_ShutdownList_t **listp; - RF_ThreadGroup_t *g; - char *file; - int line; -{ - int rc; - - rc = _rf_create_managed_mutex(listp, &g->mutex, file, line); - if (rc) - return (rc); - rc = _rf_create_managed_cond(listp, &g->cond, file, line); - if (rc) - return (rc); - g->created = g->running = g->shutdown = 0; - return (0); -} - -int -_rf_destroy_threadgroup(g, file, line) - RF_ThreadGroup_t *g; - char *file; - int line; -{ - int rc1, rc2; - - rc1 = rf_mutex_destroy(&g->mutex); - rc2 = rf_cond_destroy(&g->cond); - if (rc1) - return (rc1); - return (rc2); -} - -int -_rf_init_threadgroup(g, file, line) - RF_ThreadGroup_t *g; - char *file; - int line; -{ - int rc; - - rc = rf_mutex_init(&g->mutex, __FUNCTION__); - if (rc) - return (rc); - rc = rf_cond_init(&g->cond); - if (rc) { - rf_mutex_destroy(&g->mutex); - return (rc); - } - g->created = g->running = g->shutdown = 0; - return (0); -} - - -/* - * Kernel - */ -#if defined(__FreeBSD__) && __FreeBSD_version > 500005 -int -rf_mutex_init(m, s) -decl_simple_lock_data(, *m) -const char *s; -{ - mtx_init(m, s, NULL, MTX_DEF); - return (0); -} - -int -rf_mutex_destroy(m) -decl_simple_lock_data(, *m) -{ - mtx_destroy(m); - return (0); -} -#else -int -rf_mutex_init(m, s) -decl_simple_lock_data(, *m) -const char *s; -{ - simple_lock_init(m); - return (0); -} - -int -rf_mutex_destroy(m) -decl_simple_lock_data(, *m) -{ - return (0); -} -#endif - -int -rf_cond_init(c) -RF_DECLARE_COND(*c) -{ - *c = 0; /* no reason */ - return (0); -} - -int -rf_cond_destroy(c) -RF_DECLARE_COND(*c) -{ - return (0); -} diff --git a/sys/dev/raidframe/rf_threadstuff.h b/sys/dev/raidframe/rf_threadstuff.h deleted file mode 100644 index a3560cc..0000000 --- a/sys/dev/raidframe/rf_threadstuff.h +++ /dev/null @@ -1,229 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_threadstuff.h,v 1.10 2001/01/27 20:42:21 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, Daniel Stodolsky, Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * threadstuff.h -- definitions for threads, locks, and synchronization - * - * The purpose of this file is provide some illusion of portability. - * If the functions below can be implemented with the same semantics on - * some new system, then at least the synchronization and thread control - * part of the code should not require modification to port to a new machine. - * the only other place where the pthread package is explicitly used is - * threadid.h - * - * this file should be included above stdio.h to get some necessary defines. - * - */ - -#ifndef _RF__RF_THREADSTUFF_H_ -#define _RF__RF_THREADSTUFF_H_ - -#include <dev/raidframe/rf_types.h> -#include <sys/types.h> -#include <sys/param.h> -#ifdef _KERNEL -#include <sys/systm.h> -#include <sys/proc.h> -#include <sys/kthread.h> -#endif - -#define rf_create_managed_mutex(a,b) _rf_create_managed_mutex(a,b,__FILE__,__LINE__) -#define rf_create_managed_cond(a,b) _rf_create_managed_cond(a,b,__FILE__,__LINE__) -#define rf_init_managed_threadgroup(a,b) _rf_init_managed_threadgroup(a,b,__FILE__,__LINE__) -#define rf_init_threadgroup(a) _rf_init_threadgroup(a,__FILE__,__LINE__) -#define rf_destroy_threadgroup(a) _rf_destroy_threadgroup(a,__FILE__,__LINE__) - -int _rf_init_threadgroup(RF_ThreadGroup_t * g, char *file, int line); -int _rf_destroy_threadgroup(RF_ThreadGroup_t * g, char *file, int line); -int -_rf_init_managed_threadgroup(RF_ShutdownList_t ** listp, - RF_ThreadGroup_t * g, char *file, int line); - -#include <sys/lock.h> -#if defined(__FreeBSD__ ) && __FreeBSD_version > 500005 -#include <sys/mutex.h> -#define decl_simple_lock_data(a,b) a struct mtx b; -#define simple_lock_addr(a) ((struct mtx *)&(a)) - -typedef struct thread *RF_Thread_t; -typedef void *RF_ThreadArg_t; - -#ifdef _KERNEL -static __inline struct ucred * -rf_getucred(RF_Thread_t td) -{ - return (((struct thread *)td)->td_ucred); -} -#endif - -#define RF_LOCK_MUTEX(_m_) mtx_lock(&(_m_)) -#define RF_UNLOCK_MUTEX(_m_) mtx_unlock(&(_m_)) -#else -#define decl_simple_lock_data(a,b) a struct simplelock b; -#define simple_lock_addr(a) ((struct simplelock *)&(a)) - -typedef struct proc *RF_Thread_t; -typedef void *RF_ThreadArg_t; - -static __inline struct ucred * -rf_getucred(RF_Thread_t td) -{ - return (((struct proc *)td)->p_ucred); -} - -#define RF_LOCK_MUTEX(_m_) simple_lock(&(_m_)) -#define RF_UNLOCK_MUTEX(_m_) simple_unlock(&(_m_)) -#endif - -#define RF_DECLARE_MUTEX(_m_) decl_simple_lock_data(,(_m_)) -#define RF_DECLARE_STATIC_MUTEX(_m_) decl_simple_lock_data(static,(_m_)) -#define RF_DECLARE_EXTERN_MUTEX(_m_) decl_simple_lock_data(extern,(_m_)) - -#define RF_DECLARE_COND(_c_) int _c_; -#define RF_DECLARE_STATIC_COND(_c_) static int _c_; -#define RF_DECLARE_EXTERN_COND(_c_) extern int _c_; - -/* - * In NetBSD, kernel threads are simply processes which share several - * substructures and never run in userspace. - */ -#define RF_WAIT_COND(_c_,_m_) \ - RF_LTSLEEP(&(_c_), PRIBIO, "rfwcond", 0, &(_m_)) -#define RF_SIGNAL_COND(_c_) wakeup_one(&(_c_)) -#define RF_BROADCAST_COND(_c_) wakeup(&(_c_)) -#if defined(__NetBSD__) -#define RF_CREATE_THREAD(_handle_, _func_, _arg_, _name_) \ - kthread_create1((void (*)(void *))(_func_), (void *)(_arg_), \ - (struct proc **)&(_handle_), _name_) -#define RF_THREAD_EXIT(ret) \ - kthread_exit(ret) -#elif defined(__FreeBSD__) -#if __FreeBSD_version > 500005 -#define RF_CREATE_THREAD(_handle_, _func_, _arg_, _name_) \ - kthread_create((void (*)(void *))(_func_), (void *)(_arg_), \ - (struct proc **)&(_handle_), 0, 4, _name_) -#define RF_THREAD_EXIT(ret) \ - kthread_exit(ret) -#else -#define RF_CREATE_THREAD(_handle_, _func_, _arg_, _name_) \ - kthread_create((void (*)(void *))(_func_), (void *)(_arg_), \ - (struct proc **)&(_handle_), _name_) -#define RF_THREAD_EXIT(ret) \ - kthread_exit(ret); -#endif -#endif - -struct RF_ThreadGroup_s { - int created; - int running; - int shutdown; - RF_DECLARE_MUTEX(mutex) - RF_DECLARE_COND(cond) -}; -/* - * Someone has started a thread in the group - */ -#define RF_THREADGROUP_STARTED(_g_) { \ - RF_LOCK_MUTEX((_g_)->mutex); \ - (_g_)->created++; \ - RF_UNLOCK_MUTEX((_g_)->mutex); \ -} - -/* - * Thread announcing that it is now running - */ -#define RF_THREADGROUP_RUNNING(_g_) { \ - RF_LOCK_MUTEX((_g_)->mutex); \ - (_g_)->running++; \ - RF_UNLOCK_MUTEX((_g_)->mutex); \ - RF_SIGNAL_COND((_g_)->cond); \ -} - -/* - * Thread announcing that it is now done - */ -#define RF_THREADGROUP_DONE(_g_) { \ - RF_LOCK_MUTEX((_g_)->mutex); \ - (_g_)->shutdown++; \ - RF_UNLOCK_MUTEX((_g_)->mutex); \ - RF_SIGNAL_COND((_g_)->cond); \ -} - -/* - * Wait for all threads to start running - */ -#define RF_THREADGROUP_WAIT_START(_g_) { \ - RF_LOCK_MUTEX((_g_)->mutex); \ - while((_g_)->running < (_g_)->created) { \ - RF_WAIT_COND((_g_)->cond, (_g_)->mutex); \ - } \ - RF_UNLOCK_MUTEX((_g_)->mutex); \ -} - -/* - * Wait for all threads to stop running - */ -#ifndef __NetBSD__ -#define RF_THREADGROUP_WAIT_STOP(_g_) { \ - RF_LOCK_MUTEX((_g_)->mutex); \ - RF_ASSERT((_g_)->running == (_g_)->created); \ - while((_g_)->shutdown < (_g_)->running) { \ - RF_WAIT_COND((_g_)->cond, (_g_)->mutex); \ - } \ - RF_UNLOCK_MUTEX((_g_)->mutex); \ -} -#else - /* XXX Note that we've removed the assert. That should get put back in once - * we actually get something like a kernel thread running */ -#define RF_THREADGROUP_WAIT_STOP(_g_) { \ - RF_LOCK_MUTEX((_g_)->mutex); \ - while((_g_)->shutdown < (_g_)->running) { \ - RF_WAIT_COND((_g_)->cond, (_g_)->mutex); \ - } \ - RF_UNLOCK_MUTEX((_g_)->mutex); \ -} -#endif - -#if defined(__FreeBSD__) && __FreeBSD_version > 500005 -int rf_mutex_init(struct mtx *, const char *); -int rf_mutex_destroy(struct mtx *); -int _rf_create_managed_mutex(RF_ShutdownList_t **, struct mtx *, - char *, int); -#else -int rf_mutex_init(struct simplelock *, const char *); -int rf_mutex_destroy(struct simplelock *); -int _rf_create_managed_mutex(RF_ShutdownList_t **, struct simplelock *, - char *, int); -#endif -int _rf_create_managed_cond(RF_ShutdownList_t ** listp, int *, - char *file, int line); - -int rf_cond_init(int *c); -int rf_cond_destroy(int *c); -#endif /* !_RF__RF_THREADSTUFF_H_ */ diff --git a/sys/dev/raidframe/rf_types.h b/sys/dev/raidframe/rf_types.h deleted file mode 100644 index 25630ef..0000000 --- a/sys/dev/raidframe/rf_types.h +++ /dev/null @@ -1,247 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_types.h,v 1.6 1999/09/05 03:05:55 oster Exp $ */ -/* - * rf_types.h - */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Jim Zelenka - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ -/*********************************************************** - * - * rf_types.h -- standard types for RAIDframe - * - ***********************************************************/ - -#ifndef _RF__RF_TYPES_H_ -#define _RF__RF_TYPES_H_ - - -#include <dev/raidframe/rf_archs.h> - -#include <sys/errno.h> -#include <sys/types.h> - -#include <sys/uio.h> -#include <sys/param.h> -#ifdef _KERNEL -#include <sys/lock.h> -#endif - -/* - * First, define system-dependent types and constants. - * - * If the machine is big-endian, RF_BIG_ENDIAN should be 1. - * Otherwise, it should be 0. - * - * The various integer types should be self-explanatory; we - * use these elsewhere to avoid size confusion. - * - * LONGSHIFT is lg(sizeof(long)) (that is, log base two of sizeof(long) - * - */ - -#include <sys/types.h> -#include <sys/limits.h> -#include <machine/endian.h> - -#if BYTE_ORDER == BIG_ENDIAN -#define RF_IS_BIG_ENDIAN 1 -#elif BYTE_ORDER == LITTLE_ENDIAN -#define RF_IS_BIG_ENDIAN 0 -#else -#error byte order not defined -#endif -typedef int8_t RF_int8; -typedef u_int8_t RF_uint8; -typedef int16_t RF_int16; -typedef u_int16_t RF_uint16; -typedef int32_t RF_int32; -typedef u_int32_t RF_uint32; -typedef int64_t RF_int64; -typedef u_int64_t RF_uint64; -#if LONG_BIT == 32 -#define RF_LONGSHIFT 2 -#elif LONG_BIT == 64 -#define RF_LONGSHIFT 3 -#elif defined(__i386__) -#define RF_LONGSHIFT 2 -#elif defined(__alpha__) -#define RF_LONGSHIFT 3 -#else -#error word size not defined -#endif - -/* - * These are just zero and non-zero. We don't use "TRUE" - * and "FALSE" because there's too much nonsense trying - * to get them defined exactly once on every platform, given - * the different places they may be defined in system header - * files. - */ -#define RF_TRUE 1 -#define RF_FALSE 0 - -/* - * Now, some generic types - */ -typedef RF_uint64 RF_IoCount_t; -typedef RF_uint64 RF_Offset_t; -typedef RF_uint32 RF_PSSFlags_t; -typedef RF_uint64 RF_SectorCount_t; -typedef RF_uint64 RF_StripeCount_t; -typedef RF_int64 RF_SectorNum_t;/* these are unsigned so we can set them to - * (-1) for "uninitialized" */ -typedef RF_int64 RF_StripeNum_t; -typedef RF_int64 RF_RaidAddr_t; -typedef int RF_RowCol_t; /* unsigned so it can be (-1) */ -typedef RF_int64 RF_HeadSepLimit_t; -typedef RF_int64 RF_ReconUnitCount_t; -typedef int RF_ReconUnitNum_t; - -typedef char RF_ParityConfig_t; - -typedef char RF_DiskQueueType_t[1024]; -#define RF_DISK_QUEUE_TYPE_NONE "" - -/* values for the 'type' field in a reconstruction buffer */ -typedef int RF_RbufType_t; -#define RF_RBUF_TYPE_EXCLUSIVE 0 /* this buf assigned exclusively to - * one disk */ -#define RF_RBUF_TYPE_FLOATING 1 /* this is a floating recon buf */ -#define RF_RBUF_TYPE_FORCED 2 /* this rbuf was allocated to complete - * a forced recon */ - -typedef char RF_IoType_t; -#define RF_IO_TYPE_READ 'r' -#define RF_IO_TYPE_WRITE 'w' -#define RF_IO_TYPE_NOP 'n' -#define RF_IO_IS_R_OR_W(_type_) (((_type_) == RF_IO_TYPE_READ) \ - || ((_type_) == RF_IO_TYPE_WRITE)) - -typedef void (*RF_VoidFuncPtr) (void *,...); - -typedef RF_uint32 RF_AccessStripeMapFlags_t; -typedef RF_uint32 RF_DiskQueueDataFlags_t; -typedef RF_uint32 RF_DiskQueueFlags_t; -typedef RF_uint32 RF_RaidAccessFlags_t; - -#define RF_DISKQUEUE_DATA_FLAGS_NONE ((RF_DiskQueueDataFlags_t)0) - -typedef struct RF_AccessStripeMap_s RF_AccessStripeMap_t; -typedef struct RF_AccessStripeMapHeader_s RF_AccessStripeMapHeader_t; -typedef struct RF_AllocListElem_s RF_AllocListElem_t; -typedef struct RF_CallbackDesc_s RF_CallbackDesc_t; -typedef struct RF_ChunkDesc_s RF_ChunkDesc_t; -typedef struct RF_CommonLogData_s RF_CommonLogData_t; -typedef struct RF_Config_s RF_Config_t; -typedef struct RF_CumulativeStats_s RF_CumulativeStats_t; -typedef struct RF_DagHeader_s RF_DagHeader_t; -typedef struct RF_DagList_s RF_DagList_t; -typedef struct RF_DagNode_s RF_DagNode_t; -typedef struct RF_DeclusteredConfigInfo_s RF_DeclusteredConfigInfo_t; -typedef struct RF_DiskId_s RF_DiskId_t; -typedef struct RF_DiskMap_s RF_DiskMap_t; -typedef struct RF_DiskQueue_s RF_DiskQueue_t; -typedef struct RF_DiskQueueData_s RF_DiskQueueData_t; -typedef struct RF_DiskQueueSW_s RF_DiskQueueSW_t; -typedef struct RF_Etimer_s RF_Etimer_t; -typedef struct RF_EventCreate_s RF_EventCreate_t; -typedef struct RF_FreeList_s RF_FreeList_t; -typedef struct RF_LockReqDesc_s RF_LockReqDesc_t; -typedef struct RF_LockTableEntry_s RF_LockTableEntry_t; -typedef struct RF_MCPair_s RF_MCPair_t; -typedef struct RF_OwnerInfo_s RF_OwnerInfo_t; -typedef struct RF_ParityLog_s RF_ParityLog_t; -typedef struct RF_ParityLogAppendQueue_s RF_ParityLogAppendQueue_t; -typedef struct RF_ParityLogData_s RF_ParityLogData_t; -typedef struct RF_ParityLogDiskQueue_s RF_ParityLogDiskQueue_t; -typedef struct RF_ParityLogQueue_s RF_ParityLogQueue_t; -typedef struct RF_ParityLogRecord_s RF_ParityLogRecord_t; -typedef struct RF_PerDiskReconCtrl_s RF_PerDiskReconCtrl_t; -typedef struct RF_PSStatusHeader_s RF_PSStatusHeader_t; -typedef struct RF_PhysDiskAddr_s RF_PhysDiskAddr_t; -typedef struct RF_PropHeader_s RF_PropHeader_t; -typedef struct RF_Raid_s RF_Raid_t; -typedef struct RF_RaidAccessDesc_s RF_RaidAccessDesc_t; -typedef struct RF_RaidDisk_s RF_RaidDisk_t; -typedef struct RF_RaidLayout_s RF_RaidLayout_t; -typedef struct RF_RaidReconDesc_s RF_RaidReconDesc_t; -typedef struct RF_ReconBuffer_s RF_ReconBuffer_t; -typedef struct RF_ReconConfig_s RF_ReconConfig_t; -typedef struct RF_ReconCtrl_s RF_ReconCtrl_t; -typedef struct RF_ReconDoneProc_s RF_ReconDoneProc_t; -typedef struct RF_ReconEvent_s RF_ReconEvent_t; -typedef struct RF_ReconMap_s RF_ReconMap_t; -typedef struct RF_ReconMapListElem_s RF_ReconMapListElem_t; -typedef struct RF_ReconParityStripeStatus_s RF_ReconParityStripeStatus_t; -typedef struct RF_RedFuncs_s RF_RedFuncs_t; -typedef struct RF_RegionBufferQueue_s RF_RegionBufferQueue_t; -typedef struct RF_RegionInfo_s RF_RegionInfo_t; -typedef struct RF_ShutdownList_s RF_ShutdownList_t; -typedef struct RF_SpareTableEntry_s RF_SpareTableEntry_t; -typedef struct RF_SparetWait_s RF_SparetWait_t; -typedef struct RF_StripeLockDesc_s RF_StripeLockDesc_t; -typedef struct RF_ThreadGroup_s RF_ThreadGroup_t; -typedef struct RF_ThroughputStats_s RF_ThroughputStats_t; - -/* - * Important assumptions regarding ordering of the states in this list - * have been made!!! - * Before disturbing this ordering, look at code in rf_states.c - */ -typedef enum RF_AccessState_e { - /* original states */ - rf_QuiesceState, /* handles queisence for reconstruction */ - rf_IncrAccessesCountState, /* count accesses in flight */ - rf_DecrAccessesCountState, - rf_MapState, /* map access to disk addresses */ - rf_LockState, /* take stripe locks */ - rf_CreateDAGState, /* create DAGs */ - rf_ExecuteDAGState, /* execute DAGs */ - rf_ProcessDAGState, /* DAGs are completing- check if correct, or - * if we need to retry */ - rf_CleanupState, /* release stripe locks, clean up */ - rf_LastState /* must be the last state */ -} RF_AccessState_t; -#define RF_MAXROW 10 /* these are arbitrary and can be modified at - * will */ -#define RF_MAXCOL 40 -#define RF_MAXSPARE 10 -#define RF_MAXDBGV 75 /* max number of debug variables */ - -union RF_GenericParam_u { - void *p; - RF_uint64 v; -}; -typedef union RF_GenericParam_u RF_DagParam_t; -typedef union RF_GenericParam_u RF_CBParam_t; - -#if defined(__FreeBSD__) && __FreeBSD_version > 500005 -typedef struct bio *RF_Buf_t; -#else -typedef struct buf *RF_Buf_t; -#endif -#endif /* _RF__RF_TYPES_H_ */ diff --git a/sys/dev/raidframe/rf_utils.c b/sys/dev/raidframe/rf_utils.c deleted file mode 100644 index 71f7b93..0000000 --- a/sys/dev/raidframe/rf_utils.c +++ /dev/null @@ -1,149 +0,0 @@ -/* $NetBSD: rf_utils.c,v 1.5 2000/01/07 03:41:03 oster Exp $ */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/**************************************** - * - * rf_utils.c -- various support routines - * - ****************************************/ - - -#include <dev/raidframe/rf_threadstuff.h> - -#include <sys/time.h> - -#include <dev/raidframe/rf_utils.h> -#include <dev/raidframe/rf_debugMem.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_general.h> - -/* creates & zeros 2-d array with b rows and k columns (MCH) */ -RF_RowCol_t ** -rf_make_2d_array(b, k, allocList) - int b; - int k; - RF_AllocListElem_t *allocList; -{ - RF_RowCol_t **retval, i; - - RF_MallocAndAdd(retval, b * sizeof(RF_RowCol_t *), (RF_RowCol_t **), allocList); - for (i = 0; i < b; i++) { - RF_MallocAndAdd(retval[i], k * sizeof(RF_RowCol_t), (RF_RowCol_t *), allocList); - (void) bzero((char *) retval[i], k * sizeof(RF_RowCol_t)); - } - return (retval); -} - -void -rf_free_2d_array(a, b, k) - RF_RowCol_t **a; - int b; - int k; -{ - RF_RowCol_t i; - - for (i = 0; i < b; i++) - RF_Free(a[i], k * sizeof(RF_RowCol_t)); - RF_Free(a, b * sizeof(RF_RowCol_t)); -} - - -/* creates & zeros a 1-d array with c columns */ -RF_RowCol_t * -rf_make_1d_array(c, allocList) - int c; - RF_AllocListElem_t *allocList; -{ - RF_RowCol_t *retval; - - RF_MallocAndAdd(retval, c * sizeof(RF_RowCol_t), (RF_RowCol_t *), allocList); - (void) bzero((char *) retval, c * sizeof(RF_RowCol_t)); - return (retval); -} - -void -rf_free_1d_array(a, n) - RF_RowCol_t *a; - int n; -{ - RF_Free(a, n * sizeof(RF_RowCol_t)); -} -/* Euclid's algorithm: finds and returns the greatest common divisor - * between a and b. (MCH) - */ -int -rf_gcd(m, n) - int m; - int n; -{ - int t; - - while (m > 0) { - t = n % m; - n = m; - m = t; - } - return (n); -} -/* these convert between text and integer. Apparently the regular C macros - * for doing this are not available in the kernel - */ - -#define ISDIGIT(x) ( (x) >= '0' && (x) <= '9' ) -#define ISHEXCHAR(x) ( ((x) >= 'a' && (x) <= 'f') || ((x) >= 'A' && (x) <= 'F') ) -#define ISHEX(x) ( ISDIGIT(x) || ISHEXCHAR(x) ) -#define HC2INT(x) ( ((x) >= 'a' && (x) <= 'f') ? (x) - 'a' + 10 : \ - ( ((x) >= 'A' && (x) <= 'F') ? (x) - 'A' + 10 : (x - '0') ) ) - -int -rf_atoi(p) - char *p; -{ - int val = 0, negate = 0; - - if (*p == '-') { - negate = 1; - p++; - } - for (; ISDIGIT(*p); p++) - val = 10 * val + (*p - '0'); - return ((negate) ? -val : val); -} - -int -rf_htoi(p) - char *p; -{ - int val = 0; - for (; ISHEXCHAR(*p); p++) - val = 16 * val + HC2INT(*p); - return (val); -} diff --git a/sys/dev/raidframe/rf_utils.h b/sys/dev/raidframe/rf_utils.h deleted file mode 100644 index 18eac84..0000000 --- a/sys/dev/raidframe/rf_utils.h +++ /dev/null @@ -1,70 +0,0 @@ -/* $FreeBSD$ */ -/* $NetBSD: rf_utils.h,v 1.4 1999/08/13 03:26:55 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/*************************************** - * - * rf_utils.c -- header file for utils.c - * - ***************************************/ - - -#ifndef _RF__RF_UTILS_H_ -#define _RF__RF_UTILS_H_ - -#include <dev/raidframe/rf_types.h> -#include <dev/raidframe/rf_alloclist.h> -#include <dev/raidframe/rf_threadstuff.h> - -char *rf_find_non_white(char *p); -char *rf_find_white(char *p); -RF_RowCol_t **rf_make_2d_array(int b, int k, RF_AllocListElem_t * allocList); -RF_RowCol_t *rf_make_1d_array(int c, RF_AllocListElem_t * allocList); -void rf_free_2d_array(RF_RowCol_t ** a, int b, int k); -void rf_free_1d_array(RF_RowCol_t * a, int n); -int rf_gcd(int m, int n); -int rf_atoi(char *p); -int rf_htoi(char *p); - -#define RF_USEC_PER_SEC 1000000 -#define RF_TIMEVAL_TO_US(_t_) (((_t_).tv_sec) \ - * RF_USEC_PER_SEC + (_t_).tv_usec) - -#define RF_TIMEVAL_DIFF(_start_,_end_,_diff_) { \ - if ((_end_)->tv_usec < (_start_)->tv_usec) { \ - (_diff_)->tv_usec = ((_end_)->tv_usec + RF_USEC_PER_SEC) \ - - (_start_)->tv_usec; \ - (_diff_)->tv_sec = ((_end_)->tv_sec-1) - (_start_)->tv_sec; \ - } \ - else { \ - (_diff_)->tv_usec = (_end_)->tv_usec - (_start_)->tv_usec; \ - (_diff_)->tv_sec = (_end_)->tv_sec - (_start_)->tv_sec; \ - } \ -} - -#endif /* !_RF__RF_UTILS_H_ */ diff --git a/sys/modules/raidframe/Makefile b/sys/modules/raidframe/Makefile deleted file mode 100644 index 74256f1..0000000 --- a/sys/modules/raidframe/Makefile +++ /dev/null @@ -1,28 +0,0 @@ -# $FreeBSD$ - -.PATH: ${.CURDIR}/../../dev/raidframe - -KMOD= raidframe -SRCS= rf_acctrace.c rf_alloclist.c rf_aselect.c rf_callback.c \ - rf_chaindecluster.c rf_copyback.c rf_cvscan.c rf_dagdegrd.c \ - rf_dagdegwr.c rf_dagffrd.c rf_dagffwr.c rf_dagfuncs.c rf_dagutils.c \ - rf_debugMem.c rf_debugprint.c rf_decluster.c rf_declusterPQ.c \ - rf_diskqueue.c rf_disks.c rf_driver.c rf_engine.c rf_evenodd.c \ - rf_evenodd_dagfuncs.c rf_evenodd_dags.c rf_fifo.c rf_interdecluster.c \ - rf_invertq.c rf_layout.c rf_map.c rf_mcpair.c rf_memchunk.c \ - rf_nwayxor.c rf_options.c rf_paritylog.c rf_paritylogDiskMgr.c \ - rf_paritylogging.c rf_parityloggingdags.c rf_parityscan.c rf_pq.c \ - rf_pqdeg.c rf_pqdegdags.c rf_psstatus.c rf_raid0.c rf_raid1.c \ - rf_raid4.c rf_raid5.c rf_raid5_rotatedspare.c rf_reconbuffer.c \ - rf_reconmap.c rf_reconstruct.c rf_reconutil.c rf_revent.c \ - rf_shutdown.c rf_sstf.c rf_states.c rf_stripelocks.c rf_strutils.c \ - rf_threadstuff.c rf_utils.c rf_freebsdkintf.c \ - opt_raid.h vnode_if.h -RF_AUTOCONFIG?= 1 -RF_DEBUG?= 0 - -opt_raid.h: - echo "#define RAID_AUTOCONFIG ${RF_AUTOCONFIG}" > ${.TARGET} - echo "#define RAID_DEBUG ${RF_DEBUG}" >> ${.TARGET} - -.include <bsd.kmod.mk> |