summaryrefslogtreecommitdiffstats
path: root/sys/dev/raidframe/rf_paritylog.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/dev/raidframe/rf_paritylog.c')
-rw-r--r--sys/dev/raidframe/rf_paritylog.c869
1 files changed, 869 insertions, 0 deletions
diff --git a/sys/dev/raidframe/rf_paritylog.c b/sys/dev/raidframe/rf_paritylog.c
new file mode 100644
index 0000000..6c56c95
--- /dev/null
+++ b/sys/dev/raidframe/rf_paritylog.c
@@ -0,0 +1,869 @@
+/* $FreeBSD$ */
+/* $NetBSD: rf_paritylog.c,v 1.5 2000/01/07 03:41:01 oster Exp $ */
+/*
+ * Copyright (c) 1995 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Author: William V. Courtright II
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/* Code for manipulating in-core parity logs
+ *
+ */
+
+#include <dev/raidframe/rf_archs.h>
+
+#if RF_INCLUDE_PARITYLOGGING > 0
+
+/*
+ * Append-only log for recording parity "update" and "overwrite" records
+ */
+
+#include <dev/raidframe/rf_types.h>
+#include <dev/raidframe/rf_threadstuff.h>
+#include <dev/raidframe/rf_mcpair.h>
+#include <dev/raidframe/rf_raid.h>
+#include <dev/raidframe/rf_dag.h>
+#include <dev/raidframe/rf_dagfuncs.h>
+#include <dev/raidframe/rf_desc.h>
+#include <dev/raidframe/rf_layout.h>
+#include <dev/raidframe/rf_diskqueue.h>
+#include <dev/raidframe/rf_etimer.h>
+#include <dev/raidframe/rf_paritylog.h>
+#include <dev/raidframe/rf_general.h>
+#include <dev/raidframe/rf_map.h>
+#include <dev/raidframe/rf_paritylogging.h>
+#include <dev/raidframe/rf_paritylogDiskMgr.h>
+
+static RF_CommonLogData_t *
+AllocParityLogCommonData(RF_Raid_t * raidPtr)
+{
+ RF_CommonLogData_t *common = NULL;
+ int rc;
+
+ /* Return a struct for holding common parity log information from the
+ * free list (rf_parityLogDiskQueue.freeCommonList). If the free list
+ * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */
+
+ RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+ if (raidPtr->parityLogDiskQueue.freeCommonList) {
+ common = raidPtr->parityLogDiskQueue.freeCommonList;
+ raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
+ RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+ } else {
+ RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+ RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *));
+ rc = rf_mutex_init(&common->mutex, __FUNCTION__);
+ if (rc) {
+ RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
+ __LINE__, rc);
+ RF_Free(common, sizeof(RF_CommonLogData_t));
+ common = NULL;
+ }
+ }
+ common->next = NULL;
+ return (common);
+}
+
+static void
+FreeParityLogCommonData(RF_CommonLogData_t * common)
+{
+ RF_Raid_t *raidPtr;
+
+ /* Insert a single struct for holding parity log information (data)
+ * into the free list (rf_parityLogDiskQueue.freeCommonList).
+ * NON-BLOCKING */
+
+ raidPtr = common->raidPtr;
+ RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+ common->next = raidPtr->parityLogDiskQueue.freeCommonList;
+ raidPtr->parityLogDiskQueue.freeCommonList = common;
+ RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+}
+
+static RF_ParityLogData_t *
+AllocParityLogData(RF_Raid_t * raidPtr)
+{
+ RF_ParityLogData_t *data = NULL;
+
+ /* Return a struct for holding parity log information from the free
+ * list (rf_parityLogDiskQueue.freeList). If the free list is empty,
+ * call RF_Malloc to create a new structure. NON-BLOCKING */
+
+ RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+ if (raidPtr->parityLogDiskQueue.freeDataList) {
+ data = raidPtr->parityLogDiskQueue.freeDataList;
+ raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
+ RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+ } else {
+ RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+ RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *));
+ }
+ data->next = NULL;
+ data->prev = NULL;
+ return (data);
+}
+
+
+static void
+FreeParityLogData(RF_ParityLogData_t * data)
+{
+ RF_ParityLogData_t *nextItem;
+ RF_Raid_t *raidPtr;
+
+ /* Insert a linked list of structs for holding parity log information
+ * (data) into the free list (parityLogDiskQueue.freeList).
+ * NON-BLOCKING */
+
+ raidPtr = data->common->raidPtr;
+ RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+ while (data) {
+ nextItem = data->next;
+ data->next = raidPtr->parityLogDiskQueue.freeDataList;
+ raidPtr->parityLogDiskQueue.freeDataList = data;
+ data = nextItem;
+ }
+ RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+}
+
+
+static void
+EnqueueParityLogData(
+ RF_ParityLogData_t * data,
+ RF_ParityLogData_t ** head,
+ RF_ParityLogData_t ** tail)
+{
+ RF_Raid_t *raidPtr;
+
+ /* Insert an in-core parity log (*data) into the head of a disk queue
+ * (*head, *tail). NON-BLOCKING */
+
+ raidPtr = data->common->raidPtr;
+ if (rf_parityLogDebug)
+ printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
+ RF_ASSERT(data->prev == NULL);
+ RF_ASSERT(data->next == NULL);
+ RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+ if (*head) {
+ /* insert into head of queue */
+ RF_ASSERT((*head)->prev == NULL);
+ RF_ASSERT((*tail)->next == NULL);
+ data->next = *head;
+ (*head)->prev = data;
+ *head = data;
+ } else {
+ /* insert into empty list */
+ RF_ASSERT(*head == NULL);
+ RF_ASSERT(*tail == NULL);
+ *head = data;
+ *tail = data;
+ }
+ RF_ASSERT((*head)->prev == NULL);
+ RF_ASSERT((*tail)->next == NULL);
+ RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+}
+
+static RF_ParityLogData_t *
+DequeueParityLogData(
+ RF_Raid_t * raidPtr,
+ RF_ParityLogData_t ** head,
+ RF_ParityLogData_t ** tail,
+ int ignoreLocks)
+{
+ RF_ParityLogData_t *data;
+
+ /* Remove and return an in-core parity log from the tail of a disk
+ * queue (*head, *tail). NON-BLOCKING */
+
+ /* remove from tail, preserving FIFO order */
+ if (!ignoreLocks)
+ RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+ data = *tail;
+ if (data) {
+ if (*head == *tail) {
+ /* removing last item from queue */
+ *head = NULL;
+ *tail = NULL;
+ } else {
+ *tail = (*tail)->prev;
+ (*tail)->next = NULL;
+ RF_ASSERT((*head)->prev == NULL);
+ RF_ASSERT((*tail)->next == NULL);
+ }
+ data->next = NULL;
+ data->prev = NULL;
+ if (rf_parityLogDebug)
+ printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
+ }
+ if (*head) {
+ RF_ASSERT((*head)->prev == NULL);
+ RF_ASSERT((*tail)->next == NULL);
+ }
+ if (!ignoreLocks)
+ RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+ return (data);
+}
+
+
+static void
+RequeueParityLogData(
+ RF_ParityLogData_t * data,
+ RF_ParityLogData_t ** head,
+ RF_ParityLogData_t ** tail)
+{
+ RF_Raid_t *raidPtr;
+
+ /* Insert an in-core parity log (*data) into the tail of a disk queue
+ * (*head, *tail). NON-BLOCKING */
+
+ raidPtr = data->common->raidPtr;
+ RF_ASSERT(data);
+ if (rf_parityLogDebug)
+ printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
+ RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+ if (*tail) {
+ /* append to tail of list */
+ data->prev = *tail;
+ data->next = NULL;
+ (*tail)->next = data;
+ *tail = data;
+ } else {
+ /* inserting into an empty list */
+ *head = data;
+ *tail = data;
+ (*head)->prev = NULL;
+ (*tail)->next = NULL;
+ }
+ RF_ASSERT((*head)->prev == NULL);
+ RF_ASSERT((*tail)->next == NULL);
+ RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+}
+
+RF_ParityLogData_t *
+rf_CreateParityLogData(
+ RF_ParityRecordType_t operation,
+ RF_PhysDiskAddr_t * pda,
+ caddr_t bufPtr,
+ RF_Raid_t * raidPtr,
+ int (*wakeFunc) (RF_DagNode_t * node, int status),
+ void *wakeArg,
+ RF_AccTraceEntry_t * tracerec,
+ RF_Etimer_t startTime)
+{
+ RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
+ RF_CommonLogData_t *common;
+ RF_PhysDiskAddr_t *diskAddress;
+ int boundary, offset = 0;
+
+ /* Return an initialized struct of info to be logged. Build one item
+ * per physical disk address, one item per region.
+ *
+ * NON-BLOCKING */
+
+ diskAddress = pda;
+ common = AllocParityLogCommonData(raidPtr);
+ RF_ASSERT(common);
+
+ common->operation = operation;
+ common->bufPtr = bufPtr;
+ common->raidPtr = raidPtr;
+ common->wakeFunc = wakeFunc;
+ common->wakeArg = wakeArg;
+ common->tracerec = tracerec;
+ common->startTime = startTime;
+ common->cnt = 0;
+
+ if (rf_parityLogDebug)
+ printf("[entering CreateParityLogData]\n");
+ while (diskAddress) {
+ common->cnt++;
+ data = AllocParityLogData(raidPtr);
+ RF_ASSERT(data);
+ data->common = common;
+ data->next = NULL;
+ data->prev = NULL;
+ data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
+ if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) {
+ /* disk address does not cross a region boundary */
+ data->diskAddress = *diskAddress;
+ data->bufOffset = offset;
+ offset = offset + diskAddress->numSector;
+ EnqueueParityLogData(data, &resultHead, &resultTail);
+ /* adjust disk address */
+ diskAddress = diskAddress->next;
+ } else {
+ /* disk address crosses a region boundary */
+ /* find address where region is crossed */
+ boundary = 0;
+ while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
+ boundary++;
+
+ /* enter data before the boundary */
+ data->diskAddress = *diskAddress;
+ data->diskAddress.numSector = boundary;
+ data->bufOffset = offset;
+ offset += boundary;
+ EnqueueParityLogData(data, &resultHead, &resultTail);
+ /* adjust disk address */
+ diskAddress->startSector += boundary;
+ diskAddress->numSector -= boundary;
+ }
+ }
+ if (rf_parityLogDebug)
+ printf("[leaving CreateParityLogData]\n");
+ return (resultHead);
+}
+
+
+RF_ParityLogData_t *
+rf_SearchAndDequeueParityLogData(
+ RF_Raid_t * raidPtr,
+ int regionID,
+ RF_ParityLogData_t ** head,
+ RF_ParityLogData_t ** tail,
+ int ignoreLocks)
+{
+ RF_ParityLogData_t *w;
+
+ /* Remove and return an in-core parity log from a specified region
+ * (regionID). If a matching log is not found, return NULL.
+ *
+ * NON-BLOCKING. */
+
+ /* walk backward through a list, looking for an entry with a matching
+ * region ID */
+ if (!ignoreLocks)
+ RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+ w = (*tail);
+ while (w) {
+ if (w->regionID == regionID) {
+ /* remove an element from the list */
+ if (w == *tail) {
+ if (*head == *tail) {
+ /* removing only element in the list */
+ *head = NULL;
+ *tail = NULL;
+ } else {
+ /* removing last item in the list */
+ *tail = (*tail)->prev;
+ (*tail)->next = NULL;
+ RF_ASSERT((*head)->prev == NULL);
+ RF_ASSERT((*tail)->next == NULL);
+ }
+ } else {
+ if (w == *head) {
+ /* removing first item in the list */
+ *head = (*head)->next;
+ (*head)->prev = NULL;
+ RF_ASSERT((*head)->prev == NULL);
+ RF_ASSERT((*tail)->next == NULL);
+ } else {
+ /* removing an item from the middle of
+ * the list */
+ w->prev->next = w->next;
+ w->next->prev = w->prev;
+ RF_ASSERT((*head)->prev == NULL);
+ RF_ASSERT((*tail)->next == NULL);
+ }
+ }
+ w->prev = NULL;
+ w->next = NULL;
+ if (rf_parityLogDebug)
+ printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector);
+ return (w);
+ } else
+ w = w->prev;
+ }
+ if (!ignoreLocks)
+ RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+ return (NULL);
+}
+
+static RF_ParityLogData_t *
+DequeueMatchingLogData(
+ RF_Raid_t * raidPtr,
+ RF_ParityLogData_t ** head,
+ RF_ParityLogData_t ** tail)
+{
+ RF_ParityLogData_t *logDataList, *logData;
+ int regionID;
+
+ /* Remove and return an in-core parity log from the tail of a disk
+ * queue (*head, *tail). Then remove all matching (identical
+ * regionIDs) logData and return as a linked list.
+ *
+ * NON-BLOCKING */
+
+ logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
+ if (logDataList) {
+ regionID = logDataList->regionID;
+ logData = logDataList;
+ logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
+ while (logData->next) {
+ logData = logData->next;
+ logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
+ }
+ }
+ return (logDataList);
+}
+
+
+static RF_ParityLog_t *
+AcquireParityLog(
+ RF_ParityLogData_t * logData,
+ int finish)
+{
+ RF_ParityLog_t *log = NULL;
+ RF_Raid_t *raidPtr;
+
+ /* Grab a log buffer from the pool and return it. If no buffers are
+ * available, return NULL. NON-BLOCKING */
+ raidPtr = logData->common->raidPtr;
+ RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
+ if (raidPtr->parityLogPool.parityLogs) {
+ log = raidPtr->parityLogPool.parityLogs;
+ raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
+ log->regionID = logData->regionID;
+ log->numRecords = 0;
+ log->next = NULL;
+ raidPtr->logsInUse++;
+ RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
+ } else {
+ /* no logs available, so place ourselves on the queue of work
+ * waiting on log buffers this is done while
+ * parityLogPool.mutex is held, to ensure synchronization with
+ * ReleaseParityLogs. */
+ if (rf_parityLogDebug)
+ printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
+ if (finish)
+ RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
+ else
+ EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
+ }
+ RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
+ return (log);
+}
+
+void
+rf_ReleaseParityLogs(
+ RF_Raid_t * raidPtr,
+ RF_ParityLog_t * firstLog)
+{
+ RF_ParityLogData_t *logDataList;
+ RF_ParityLog_t *log, *lastLog;
+ int cnt;
+
+ /* Insert a linked list of parity logs (firstLog) to the free list
+ * (parityLogPool.parityLogPool)
+ *
+ * NON-BLOCKING. */
+
+ RF_ASSERT(firstLog);
+
+ /* Before returning logs to global free list, service all requests
+ * which are blocked on logs. Holding mutexes for parityLogPool and
+ * parityLogDiskQueue forces synchronization with AcquireParityLog(). */
+ RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
+ RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+ logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
+ log = firstLog;
+ if (firstLog)
+ firstLog = firstLog->next;
+ log->numRecords = 0;
+ log->next = NULL;
+ while (logDataList && log) {
+ RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
+ RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+ rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
+ if (rf_parityLogDebug)
+ printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
+ if (log == NULL) {
+ log = firstLog;
+ if (firstLog) {
+ firstLog = firstLog->next;
+ log->numRecords = 0;
+ log->next = NULL;
+ }
+ }
+ RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
+ RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+ if (log)
+ logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
+ }
+ /* return remaining logs to pool */
+ if (log) {
+ log->next = firstLog;
+ firstLog = log;
+ }
+ if (firstLog) {
+ lastLog = firstLog;
+ raidPtr->logsInUse--;
+ RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
+ while (lastLog->next) {
+ lastLog = lastLog->next;
+ raidPtr->logsInUse--;
+ RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
+ }
+ lastLog->next = raidPtr->parityLogPool.parityLogs;
+ raidPtr->parityLogPool.parityLogs = firstLog;
+ cnt = 0;
+ log = raidPtr->parityLogPool.parityLogs;
+ while (log) {
+ cnt++;
+ log = log->next;
+ }
+ RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
+ }
+ RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
+ RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+}
+
+static void
+ReintLog(
+ RF_Raid_t * raidPtr,
+ int regionID,
+ RF_ParityLog_t * log)
+{
+ RF_ASSERT(log);
+
+ /* Insert an in-core parity log (log) into the disk queue of
+ * reintegration work. Set the flag (reintInProgress) for the
+ * specified region (regionID) to indicate that reintegration is in
+ * progress for this region. NON-BLOCKING */
+
+ RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
+ raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint
+ * complete */
+
+ if (rf_parityLogDebug)
+ printf("[requesting reintegration of region %d]\n", log->regionID);
+ /* move record to reintegration queue */
+ RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+ log->next = raidPtr->parityLogDiskQueue.reintQueue;
+ raidPtr->parityLogDiskQueue.reintQueue = log;
+ RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
+ RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+ RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
+}
+
+static void
+FlushLog(
+ RF_Raid_t * raidPtr,
+ RF_ParityLog_t * log)
+{
+ /* insert a core log (log) into a list of logs
+ * (parityLogDiskQueue.flushQueue) waiting to be written to disk.
+ * NON-BLOCKING */
+
+ RF_ASSERT(log);
+ RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
+ RF_ASSERT(log->next == NULL);
+ /* move log to flush queue */
+ RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+ log->next = raidPtr->parityLogDiskQueue.flushQueue;
+ raidPtr->parityLogDiskQueue.flushQueue = log;
+ RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+ RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
+}
+
+static int
+DumpParityLogToDisk(
+ int finish,
+ RF_ParityLogData_t * logData)
+{
+ int i, diskCount, regionID = logData->regionID;
+ RF_ParityLog_t *log;
+ RF_Raid_t *raidPtr;
+
+ raidPtr = logData->common->raidPtr;
+
+ /* Move a core log to disk. If the log disk is full, initiate
+ * reintegration.
+ *
+ * Return (0) if we can enqueue the dump immediately, otherwise return
+ * (1) to indicate we are blocked on reintegration and control of the
+ * thread should be relinquished.
+ *
+ * Caller must hold regionInfo[regionID].mutex
+ *
+ * NON-BLOCKING */
+
+ if (rf_parityLogDebug)
+ printf("[dumping parity log to disk, region %d]\n", regionID);
+ log = raidPtr->regionInfo[regionID].coreLog;
+ RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
+ RF_ASSERT(log->next == NULL);
+
+ /* if reintegration is in progress, must queue work */
+ RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
+ if (raidPtr->regionInfo[regionID].reintInProgress) {
+ /* Can not proceed since this region is currently being
+ * reintegrated. We can not block, so queue remaining work and
+ * return */
+ if (rf_parityLogDebug)
+ printf("[region %d waiting on reintegration]\n", regionID);
+ /* XXX not sure about the use of finish - shouldn't this
+ * always be "Enqueue"? */
+ if (finish)
+ RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
+ else
+ EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
+ RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
+ return (1); /* relenquish control of this thread */
+ }
+ RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
+ raidPtr->regionInfo[regionID].coreLog = NULL;
+ if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
+ /* IMPORTANT!! this loop bound assumes region disk holds an
+ * integral number of core logs */
+ {
+ /* update disk map for this region */
+ diskCount = raidPtr->regionInfo[regionID].diskCount;
+ for (i = 0; i < raidPtr->numSectorsPerLog; i++) {
+ raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
+ raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
+ }
+ log->diskOffset = diskCount;
+ raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
+ FlushLog(raidPtr, log);
+ } else {
+ /* no room for log on disk, send it to disk manager and
+ * request reintegration */
+ RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
+ ReintLog(raidPtr, regionID, log);
+ }
+ if (rf_parityLogDebug)
+ printf("[finished dumping parity log to disk, region %d]\n", regionID);
+ return (0);
+}
+
+int
+rf_ParityLogAppend(
+ RF_ParityLogData_t * logData,
+ int finish,
+ RF_ParityLog_t ** incomingLog,
+ int clearReintFlag)
+{
+ int regionID, logItem, itemDone;
+ RF_ParityLogData_t *item;
+ int punt, done = RF_FALSE;
+ RF_ParityLog_t *log;
+ RF_Raid_t *raidPtr;
+ RF_Etimer_t timer;
+ int (*wakeFunc) (RF_DagNode_t * node, int status);
+ void *wakeArg;
+
+ /* Add parity to the appropriate log, one sector at a time. This
+ * routine is called is called by dag functions ParityLogUpdateFunc
+ * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
+ *
+ * Parity to be logged is contained in a linked-list (logData). When
+ * this routine returns, every sector in the list will be in one of
+ * three places: 1) entered into the parity log 2) queued, waiting on
+ * reintegration 3) queued, waiting on a core log
+ *
+ * Blocked work is passed to the ParityLoggingDiskManager for completion.
+ * Later, as conditions which required the block are removed, the work
+ * reenters this routine with the "finish" parameter set to "RF_TRUE."
+ *
+ * NON-BLOCKING */
+
+ raidPtr = logData->common->raidPtr;
+ /* lock the region for the first item in logData */
+ RF_ASSERT(logData != NULL);
+ regionID = logData->regionID;
+ RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
+ RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
+
+ if (clearReintFlag) {
+ /* Enable flushing for this region. Holding both locks
+ * provides a synchronization barrier with DumpParityLogToDisk */
+ RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
+ RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+ RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
+ raidPtr->regionInfo[regionID].diskCount = 0;
+ raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
+ RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now
+ * enabled */
+ RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
+ }
+ /* process each item in logData */
+ while (logData) {
+ /* remove an item from logData */
+ item = logData;
+ logData = logData->next;
+ item->next = NULL;
+ item->prev = NULL;
+
+ if (rf_parityLogDebug)
+ printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector);
+
+ /* see if we moved to a new region */
+ if (regionID != item->regionID) {
+ RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
+ regionID = item->regionID;
+ RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
+ RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
+ }
+ punt = RF_FALSE;/* Set to RF_TRUE if work is blocked. This
+ * can happen in one of two ways: 1) no core
+ * log (AcquireParityLog) 2) waiting on
+ * reintegration (DumpParityLogToDisk) If punt
+ * is RF_TRUE, the dataItem was queued, so
+ * skip to next item. */
+
+ /* process item, one sector at a time, until all sectors
+ * processed or we punt */
+ if (item->diskAddress.numSector > 0)
+ done = RF_FALSE;
+ else
+ RF_ASSERT(0);
+ while (!punt && !done) {
+ /* verify that a core log exists for this region */
+ if (!raidPtr->regionInfo[regionID].coreLog) {
+ /* Attempt to acquire a parity log. If
+ * acquisition fails, queue remaining work in
+ * data item and move to nextItem. */
+ if (incomingLog)
+ if (*incomingLog) {
+ RF_ASSERT((*incomingLog)->next == NULL);
+ raidPtr->regionInfo[regionID].coreLog = *incomingLog;
+ raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
+ *incomingLog = NULL;
+ } else
+ raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
+ else
+ raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
+ /* Note: AcquireParityLog either returns a log
+ * or enqueues currentItem */
+ }
+ if (!raidPtr->regionInfo[regionID].coreLog)
+ punt = RF_TRUE; /* failed to find a core log */
+ else {
+ RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
+ /* verify that the log has room for new
+ * entries */
+ /* if log is full, dump it to disk and grab a
+ * new log */
+ if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) {
+ /* log is full, dump it to disk */
+ if (DumpParityLogToDisk(finish, item))
+ punt = RF_TRUE; /* dump unsuccessful,
+ * blocked on
+ * reintegration */
+ else {
+ /* dump was successful */
+ if (incomingLog)
+ if (*incomingLog) {
+ RF_ASSERT((*incomingLog)->next == NULL);
+ raidPtr->regionInfo[regionID].coreLog = *incomingLog;
+ raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
+ *incomingLog = NULL;
+ } else
+ raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
+ else
+ raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
+ /* if a core log is not
+ * available, must queue work
+ * and return */
+ if (!raidPtr->regionInfo[regionID].coreLog)
+ punt = RF_TRUE; /* blocked on log
+ * availability */
+ }
+ }
+ }
+ /* if we didn't punt on this item, attempt to add a
+ * sector to the core log */
+ if (!punt) {
+ RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
+ /* at this point, we have a core log with
+ * enough room for a sector */
+ /* copy a sector into the log */
+ log = raidPtr->regionInfo[regionID].coreLog;
+ RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
+ logItem = log->numRecords++;
+ log->records[logItem].parityAddr = item->diskAddress;
+ RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
+ RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
+ log->records[logItem].parityAddr.numSector = 1;
+ log->records[logItem].operation = item->common->operation;
+ bcopy((item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (1 << item->common->raidPtr->logBytesPerSector));
+ item->diskAddress.numSector--;
+ item->diskAddress.startSector++;
+ if (item->diskAddress.numSector == 0)
+ done = RF_TRUE;
+ }
+ }
+
+ if (!punt) {
+ /* Processed this item completely, decrement count of
+ * items to be processed. */
+ RF_ASSERT(item->diskAddress.numSector == 0);
+ RF_LOCK_MUTEX(item->common->mutex);
+ item->common->cnt--;
+ if (item->common->cnt == 0)
+ itemDone = RF_TRUE;
+ else
+ itemDone = RF_FALSE;
+ RF_UNLOCK_MUTEX(item->common->mutex);
+ if (itemDone) {
+ /* Finished processing all log data for this
+ * IO Return structs to free list and invoke
+ * wakeup function. */
+ timer = item->common->startTime; /* grab initial value of
+ * timer */
+ RF_ETIMER_STOP(timer);
+ RF_ETIMER_EVAL(timer);
+ item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
+ if (rf_parityLogDebug)
+ printf("[waking process for region %d]\n", item->regionID);
+ wakeFunc = item->common->wakeFunc;
+ wakeArg = item->common->wakeArg;
+ FreeParityLogCommonData(item->common);
+ FreeParityLogData(item);
+ (wakeFunc) (wakeArg, 0);
+ } else
+ FreeParityLogData(item);
+ }
+ }
+ RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
+ if (rf_parityLogDebug)
+ printf("[exiting ParityLogAppend]\n");
+ return (0);
+}
+
+
+void
+rf_EnableParityLogging(RF_Raid_t * raidPtr)
+{
+ int regionID;
+
+ for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
+ RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
+ raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
+ RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
+ }
+ if (rf_parityLogDebug)
+ printf("[parity logging enabled]\n");
+}
+#endif /* RF_INCLUDE_PARITYLOGGING > 0 */
OpenPOWER on IntegriCloud