summaryrefslogtreecommitdiffstats
path: root/sys/dev/raidframe/rf_pq.c
diff options
context:
space:
mode:
authorscottl <scottl@FreeBSD.org>2002-10-20 08:17:39 +0000
committerscottl <scottl@FreeBSD.org>2002-10-20 08:17:39 +0000
commit710948de69ddeae56bda663219319f6d859aea1f (patch)
tree71c65823ba2e8591de708d5cb2e990a75135ee11 /sys/dev/raidframe/rf_pq.c
parent63bd46464d6d4587c20c1ca62fb6a6e3be132db9 (diff)
downloadFreeBSD-src-710948de69ddeae56bda663219319f6d859aea1f.zip
FreeBSD-src-710948de69ddeae56bda663219319f6d859aea1f.tar.gz
After much delay and anticipation, welcome RAIDFrame into the FreeBSD
world. This should be considered highly experimental. Approved-by: re
Diffstat (limited to 'sys/dev/raidframe/rf_pq.c')
-rw-r--r--sys/dev/raidframe/rf_pq.c926
1 files changed, 926 insertions, 0 deletions
diff --git a/sys/dev/raidframe/rf_pq.c b/sys/dev/raidframe/rf_pq.c
new file mode 100644
index 0000000..b96729e
--- /dev/null
+++ b/sys/dev/raidframe/rf_pq.c
@@ -0,0 +1,926 @@
+/* $FreeBSD$ */
+/* $NetBSD: rf_pq.c,v 1.7 2000/01/07 03:41:02 oster Exp $ */
+/*
+ * Copyright (c) 1995 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Author: Daniel Stodolsky
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Code for RAID level 6 (P + Q) disk array architecture.
+ */
+
+#include <dev/raidframe/rf_archs.h>
+#include <dev/raidframe/rf_types.h>
+#include <dev/raidframe/rf_raid.h>
+#include <dev/raidframe/rf_dag.h>
+#include <dev/raidframe/rf_dagffrd.h>
+#include <dev/raidframe/rf_dagffwr.h>
+#include <dev/raidframe/rf_dagdegrd.h>
+#include <dev/raidframe/rf_dagdegwr.h>
+#include <dev/raidframe/rf_dagutils.h>
+#include <dev/raidframe/rf_dagfuncs.h>
+#include <dev/raidframe/rf_etimer.h>
+#include <dev/raidframe/rf_pqdeg.h>
+#include <dev/raidframe/rf_general.h>
+#include <dev/raidframe/rf_map.h>
+#include <dev/raidframe/rf_pq.h>
+
+RF_RedFuncs_t rf_pFuncs = {rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P"};
+RF_RedFuncs_t rf_pRecoveryFuncs = {rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func"};
+
+int
+rf_RegularONPFunc(node)
+ RF_DagNode_t *node;
+{
+ return (rf_RegularXorFunc(node));
+}
+/*
+ same as simpleONQ func, but the coefficient is always 1
+*/
+
+int
+rf_SimpleONPFunc(node)
+ RF_DagNode_t *node;
+{
+ return (rf_SimpleXorFunc(node));
+}
+
+int
+rf_RecoveryPFunc(node)
+ RF_DagNode_t *node;
+{
+ return (rf_RecoveryXorFunc(node));
+}
+
+int
+rf_RegularPFunc(node)
+ RF_DagNode_t *node;
+{
+ return (rf_RegularXorFunc(node));
+}
+#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
+
+static void
+QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
+ unsigned char coeff);
+static void
+rf_InvertQ(unsigned long *qbuf, unsigned long *abuf,
+ unsigned length, unsigned coeff);
+
+RF_RedFuncs_t rf_qFuncs = {rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q"};
+RF_RedFuncs_t rf_qRecoveryFuncs = {rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func"};
+RF_RedFuncs_t rf_pqRecoveryFuncs = {rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func"};
+
+void
+rf_PQDagSelect(
+ RF_Raid_t * raidPtr,
+ RF_IoType_t type,
+ RF_AccessStripeMap_t * asmap,
+ RF_VoidFuncPtr * createFunc)
+{
+ RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
+ unsigned ndfail = asmap->numDataFailed;
+ unsigned npfail = asmap->numParityFailed;
+ unsigned ntfail = npfail + ndfail;
+
+ RF_ASSERT(RF_IO_IS_R_OR_W(type));
+ if (ntfail > 2) {
+ RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n");
+ /* *infoFunc = */ *createFunc = NULL;
+ return;
+ }
+ /* ok, we can do this I/O */
+ if (type == RF_IO_TYPE_READ) {
+ switch (ndfail) {
+ case 0:
+ /* fault free read */
+ *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; /* same as raid 5 */
+ break;
+ case 1:
+ /* lost a single data unit */
+ /* two cases: (1) parity is not lost. do a normal raid
+ * 5 reconstruct read. (2) parity is lost. do a
+ * reconstruct read using "q". */
+ if (ntfail == 2) { /* also lost redundancy */
+ if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY)
+ *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateReadDAG;
+ else
+ *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateReadDAG;
+ } else {
+ /* P and Q are ok. But is there a failure in
+ * some unaccessed data unit? */
+ if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
+ *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
+ else
+ *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateReadDAG;
+ }
+ break;
+ case 2:
+ /* lost two data units */
+ /* *infoFunc = PQOneTwo; */
+ *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
+ break;
+ }
+ return;
+ }
+ /* a write */
+ switch (ntfail) {
+ case 0: /* fault free */
+ if (rf_suppressLocksAndLargeWrites ||
+ (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) ||
+ (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) {
+
+ *createFunc = (RF_VoidFuncPtr) rf_PQCreateSmallWriteDAG;
+ } else {
+ *createFunc = (RF_VoidFuncPtr) rf_PQCreateLargeWriteDAG;
+ }
+ break;
+
+ case 1: /* single disk fault */
+ if (npfail == 1) {
+ RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q));
+ if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) { /* q died, treat like
+ * normal mode raid5
+ * write. */
+ if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
+ || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
+ *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateSmallWriteDAG;
+ else
+ *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateLargeWriteDAG;
+ } else {/* parity died, small write only updating Q */
+ if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
+ || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
+ *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateSmallWriteDAG;
+ else
+ *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateLargeWriteDAG;
+ }
+ } else { /* data missing. Do a P reconstruct write if
+ * only a single data unit is lost in the
+ * stripe, otherwise a PQ reconstruct write. */
+ if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
+ *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
+ else
+ *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateWriteDAG;
+ }
+ break;
+
+ case 2: /* two disk faults */
+ switch (npfail) {
+ case 2: /* both p and q dead */
+ *createFunc = (RF_VoidFuncPtr) rf_PQ_011_CreateWriteDAG;
+ break;
+ case 1: /* either p or q and dead data */
+ RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA);
+ RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q));
+ if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)
+ *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateWriteDAG;
+ else
+ *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateWriteDAG;
+ break;
+ case 0: /* double data loss */
+ *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
+ break;
+ }
+ break;
+
+ default: /* more than 2 disk faults */
+ *createFunc = NULL;
+ RF_PANIC();
+ }
+ return;
+}
+/*
+ Used as a stop gap info function
+*/
+#if 0
+static void
+PQOne(raidPtr, nSucc, nAnte, asmap)
+ RF_Raid_t *raidPtr;
+ int *nSucc;
+ int *nAnte;
+ RF_AccessStripeMap_t *asmap;
+{
+ *nSucc = *nAnte = 1;
+}
+
+static void
+PQOneTwo(raidPtr, nSucc, nAnte, asmap)
+ RF_Raid_t *raidPtr;
+ int *nSucc;
+ int *nAnte;
+ RF_AccessStripeMap_t *asmap;
+{
+ *nSucc = 1;
+ *nAnte = 2;
+}
+#endif
+
+RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG)
+{
+ rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2,
+ rf_RegularPQFunc, RF_FALSE);
+}
+
+int
+rf_RegularONQFunc(node)
+ RF_DagNode_t *node;
+{
+ int np = node->numParams;
+ int d;
+ RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
+ int i;
+ RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
+ RF_Etimer_t timer;
+ char *qbuf, *qpbuf;
+ char *obuf, *nbuf;
+ RF_PhysDiskAddr_t *old, *new;
+ unsigned long coeff;
+ unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
+
+ RF_ETIMER_START(timer);
+
+ d = (np - 3) / 4;
+ RF_ASSERT(4 * d + 3 == np);
+ qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */
+ for (i = 0; i < d; i++) {
+ old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
+ obuf = (char *) node->params[2 * i + 1].p;
+ new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
+ nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
+ RF_ASSERT(new->numSector == old->numSector);
+ RF_ASSERT(new->raidAddress == old->raidAddress);
+ /* the stripe unit within the stripe tells us the coefficient
+ * to use for the multiply. */
+ coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
+ /* compute the data unit offset within the column, then add
+ * one */
+ coeff = (coeff % raidPtr->Layout.numDataCol);
+ qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
+ QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
+ }
+
+ RF_ETIMER_STOP(timer);
+ RF_ETIMER_EVAL(timer);
+ tracerec->q_us += RF_ETIMER_VAL_US(timer);
+ rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
+ * I/O in this node */
+ return (0);
+}
+/*
+ See the SimpleXORFunc for the difference between a simple and regular func.
+ These Q functions should be used for
+
+ new q = Q(data,old data,old q)
+
+ style updates and not for
+
+ q = ( new data, new data, .... )
+
+ computations.
+
+ The simple q takes 2(2d+1)+1 params, where d is the number
+ of stripes written. The order of params is
+ old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... old data pda_d, old data buffer_d
+ [2d] old q pda_0, old q buffer
+ [2d_2] new data pda_0, new data buffer_0, ... new data pda_d, new data buffer_d
+ raidPtr
+*/
+
+int
+rf_SimpleONQFunc(node)
+ RF_DagNode_t *node;
+{
+ int np = node->numParams;
+ int d;
+ RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
+ int i;
+ RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
+ RF_Etimer_t timer;
+ char *qbuf;
+ char *obuf, *nbuf;
+ RF_PhysDiskAddr_t *old, *new;
+ unsigned long coeff;
+
+ RF_ETIMER_START(timer);
+
+ d = (np - 3) / 4;
+ RF_ASSERT(4 * d + 3 == np);
+ qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */
+ for (i = 0; i < d; i++) {
+ old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
+ obuf = (char *) node->params[2 * i + 1].p;
+ new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
+ nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
+ RF_ASSERT(new->numSector == old->numSector);
+ RF_ASSERT(new->raidAddress == old->raidAddress);
+ /* the stripe unit within the stripe tells us the coefficient
+ * to use for the multiply. */
+ coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
+ /* compute the data unit offset within the column, then add
+ * one */
+ coeff = (coeff % raidPtr->Layout.numDataCol);
+ QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
+ }
+
+ RF_ETIMER_STOP(timer);
+ RF_ETIMER_EVAL(timer);
+ tracerec->q_us += RF_ETIMER_VAL_US(timer);
+ rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
+ * I/O in this node */
+ return (0);
+}
+RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG)
+{
+ rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs);
+}
+
+static void RegularQSubr(RF_DagNode_t *node, char *qbuf);
+
+static void
+RegularQSubr(node, qbuf)
+ RF_DagNode_t *node;
+ char *qbuf;
+{
+ int np = node->numParams;
+ int d;
+ RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
+ unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
+ int i;
+ RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
+ RF_Etimer_t timer;
+ char *obuf, *qpbuf;
+ RF_PhysDiskAddr_t *old;
+ unsigned long coeff;
+
+ RF_ETIMER_START(timer);
+
+ d = (np - 1) / 2;
+ RF_ASSERT(2 * d + 1 == np);
+ for (i = 0; i < d; i++) {
+ old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
+ obuf = (char *) node->params[2 * i + 1].p;
+ coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
+ /* compute the data unit offset within the column, then add
+ * one */
+ coeff = (coeff % raidPtr->Layout.numDataCol);
+ /* the input buffers may not all be aligned with the start of
+ * the stripe. so shift by their sector offset within the
+ * stripe unit */
+ qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
+ rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
+ }
+
+ RF_ETIMER_STOP(timer);
+ RF_ETIMER_EVAL(timer);
+ tracerec->q_us += RF_ETIMER_VAL_US(timer);
+}
+/*
+ used in degraded writes.
+*/
+
+static void DegrQSubr(RF_DagNode_t *node);
+
+static void
+DegrQSubr(node)
+ RF_DagNode_t *node;
+{
+ int np = node->numParams;
+ int d;
+ RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
+ unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
+ int i;
+ RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
+ RF_Etimer_t timer;
+ char *qbuf = node->results[1];
+ char *obuf, *qpbuf;
+ RF_PhysDiskAddr_t *old;
+ unsigned long coeff;
+ unsigned fail_start;
+ int j;
+
+ old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
+ fail_start = old->startSector % secPerSU;
+
+ RF_ETIMER_START(timer);
+
+ d = (np - 2) / 2;
+ RF_ASSERT(2 * d + 2 == np);
+ for (i = 0; i < d; i++) {
+ old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
+ obuf = (char *) node->params[2 * i + 1].p;
+ coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
+ /* compute the data unit offset within the column, then add
+ * one */
+ coeff = (coeff % raidPtr->Layout.numDataCol);
+ /* the input buffers may not all be aligned with the start of
+ * the stripe. so shift by their sector offset within the
+ * stripe unit */
+ j = old->startSector % secPerSU;
+ RF_ASSERT(j >= fail_start);
+ qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
+ rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
+ }
+
+ RF_ETIMER_STOP(timer);
+ RF_ETIMER_EVAL(timer);
+ tracerec->q_us += RF_ETIMER_VAL_US(timer);
+}
+/*
+ Called by large write code to compute the new parity and the new q.
+
+ structure of the params:
+
+ pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d ( d = numDataCol
+ raidPtr
+
+ for a total of 2d+1 arguments.
+ The result buffers results[0], results[1] are the buffers for the p and q,
+ respectively.
+
+ We compute Q first, then compute P. The P calculation may try to reuse
+ one of the input buffers for its output, so if we computed P first, we would
+ corrupt the input for the q calculation.
+*/
+
+int
+rf_RegularPQFunc(node)
+ RF_DagNode_t *node;
+{
+ RegularQSubr(node, node->results[1]);
+ return (rf_RegularXorFunc(node)); /* does the wakeup */
+}
+
+int
+rf_RegularQFunc(node)
+ RF_DagNode_t *node;
+{
+ /* Almost ... adjust Qsubr args */
+ RegularQSubr(node, node->results[0]);
+ rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
+ * I/O in this node */
+ return (0);
+}
+/*
+ Called by singly degraded write code to compute the new parity and the new q.
+
+ structure of the params:
+
+ pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d
+ failedPDA raidPtr
+
+ for a total of 2d+2 arguments.
+ The result buffers results[0], results[1] are the buffers for the parity and q,
+ respectively.
+
+ We compute Q first, then compute parity. The parity calculation may try to reuse
+ one of the input buffers for its output, so if we computed parity first, we would
+ corrupt the input for the q calculation.
+
+ We treat this identically to the regularPQ case, ignoring the failedPDA extra argument.
+*/
+
+void
+rf_Degraded_100_PQFunc(node)
+ RF_DagNode_t *node;
+{
+ int np = node->numParams;
+
+ RF_ASSERT(np >= 2);
+ DegrQSubr(node);
+ rf_RecoveryXorFunc(node);
+}
+
+
+/*
+ The two below are used when reading a stripe with a single lost data unit.
+ The parameters are
+
+ pda_0, buffer_0, .... pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr
+
+ and results[0] contains the data buffer. Which is originally zero-filled.
+
+*/
+
+/* this Q func is used by the degraded-mode dag functions to recover lost data.
+ * the second-to-last parameter is the PDA for the failed portion of the access.
+ * the code here looks at this PDA and assumes that the xor target buffer is
+ * equal in size to the number of sectors in the failed PDA. It then uses
+ * the other PDAs in the parameter list to determine where within the target
+ * buffer the corresponding data should be xored.
+ *
+ * Recall the basic equation is
+ *
+ * Q = ( data_1 + 2 * data_2 ... + k * data_k ) mod 256
+ *
+ * so to recover data_j we need
+ *
+ * J data_j = (Q - data_1 - 2 data_2 ....- k* data_k) mod 256
+ *
+ * So the coefficient for each buffer is (255 - data_col), and j should be initialized by
+ * copying Q into it. Then we need to do a table lookup to convert to solve
+ * data_j /= J
+ *
+ *
+ */
+int
+rf_RecoveryQFunc(node)
+ RF_DagNode_t *node;
+{
+ RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
+ RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
+ RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
+ int i;
+ RF_PhysDiskAddr_t *pda;
+ RF_RaidAddr_t suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
+ char *srcbuf, *destbuf;
+ RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
+ RF_Etimer_t timer;
+ unsigned long coeff;
+
+ RF_ETIMER_START(timer);
+ /* start by copying Q into the buffer */
+ bcopy(node->params[node->numParams - 3].p, node->results[0],
+ rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
+ for (i = 0; i < node->numParams - 4; i += 2) {
+ RF_ASSERT(node->params[i + 1].p != node->results[0]);
+ pda = (RF_PhysDiskAddr_t *) node->params[i].p;
+ srcbuf = (char *) node->params[i + 1].p;
+ suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
+ destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
+ coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress);
+ /* compute the data unit offset within the column */
+ coeff = (coeff % raidPtr->Layout.numDataCol);
+ rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
+ }
+ /* Do the nasty inversion now */
+ coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), failedPDA->startSector) % raidPtr->Layout.numDataCol);
+ rf_InvertQ(node->results[0], node->results[0], rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
+ RF_ETIMER_STOP(timer);
+ RF_ETIMER_EVAL(timer);
+ tracerec->q_us += RF_ETIMER_VAL_US(timer);
+ rf_GenericWakeupFunc(node, 0);
+ return (0);
+}
+
+int
+rf_RecoveryPQFunc(node)
+ RF_DagNode_t *node;
+{
+ RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
+ printf("raid%d: Recovery from PQ not implemented.\n",raidPtr->raidid);
+ return (1);
+}
+/*
+ Degraded write Q subroutine.
+ Used when P is dead.
+ Large-write style Q computation.
+ Parameters
+
+ (pda,buf),(pda,buf),.....,(failedPDA,bufPtr),failedPDA,raidPtr.
+
+ We ignore failedPDA.
+
+ This is a "simple style" recovery func.
+*/
+
+void
+rf_PQ_DegradedWriteQFunc(node)
+ RF_DagNode_t *node;
+{
+ int np = node->numParams;
+ int d;
+ RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
+ unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
+ int i;
+ RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
+ RF_Etimer_t timer;
+ char *qbuf = node->results[0];
+ char *obuf, *qpbuf;
+ RF_PhysDiskAddr_t *old;
+ unsigned long coeff;
+ int fail_start, j;
+
+ old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
+ fail_start = old->startSector % secPerSU;
+
+ RF_ETIMER_START(timer);
+
+ d = (np - 2) / 2;
+ RF_ASSERT(2 * d + 2 == np);
+
+ for (i = 0; i < d; i++) {
+ old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
+ obuf = (char *) node->params[2 * i + 1].p;
+ coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
+ /* compute the data unit offset within the column, then add
+ * one */
+ coeff = (coeff % raidPtr->Layout.numDataCol);
+ j = old->startSector % secPerSU;
+ RF_ASSERT(j >= fail_start);
+ qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
+ rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
+ }
+
+ RF_ETIMER_STOP(timer);
+ RF_ETIMER_EVAL(timer);
+ tracerec->q_us += RF_ETIMER_VAL_US(timer);
+ rf_GenericWakeupFunc(node, 0);
+}
+
+
+
+
+/* Q computations */
+
+/*
+ coeff - colummn;
+
+ compute dest ^= qfor[28-coeff][rn[coeff+1] a]
+
+ on 5-bit basis;
+ length in bytes;
+*/
+
+void
+rf_IncQ(dest, buf, length, coeff)
+ unsigned long *dest;
+ unsigned long *buf;
+ unsigned length;
+ unsigned coeff;
+{
+ unsigned long a, d, new;
+ unsigned long a1, a2;
+ unsigned int *q = &(rf_qfor[28 - coeff][0]);
+ unsigned r = rf_rn[coeff + 1];
+
+#define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f)
+#define INSERT(a,i) (a << (5L*i))
+
+ length /= 8;
+ /* 13 5 bit quants in a 64 bit word */
+ while (length) {
+ a = *buf++;
+ d = *dest;
+ a1 = EXTRACT(a, 0) ^ r;
+ a2 = EXTRACT(a, 1) ^ r;
+ new = INSERT(a2, 1) | a1;
+ a1 = EXTRACT(a, 2) ^ r;
+ a2 = EXTRACT(a, 3) ^ r;
+ a1 = q[a1];
+ a2 = q[a2];
+ new = new | INSERT(a1, 2) | INSERT(a2, 3);
+ a1 = EXTRACT(a, 4) ^ r;
+ a2 = EXTRACT(a, 5) ^ r;
+ a1 = q[a1];
+ a2 = q[a2];
+ new = new | INSERT(a1, 4) | INSERT(a2, 5);
+ a1 = EXTRACT(a, 5) ^ r;
+ a2 = EXTRACT(a, 6) ^ r;
+ a1 = q[a1];
+ a2 = q[a2];
+ new = new | INSERT(a1, 5) | INSERT(a2, 6);
+#if RF_LONGSHIFT > 2
+ a1 = EXTRACT(a, 7) ^ r;
+ a2 = EXTRACT(a, 8) ^ r;
+ a1 = q[a1];
+ a2 = q[a2];
+ new = new | INSERT(a1, 7) | INSERT(a2, 8);
+ a1 = EXTRACT(a, 9) ^ r;
+ a2 = EXTRACT(a, 10) ^ r;
+ a1 = q[a1];
+ a2 = q[a2];
+ new = new | INSERT(a1, 9) | INSERT(a2, 10);
+ a1 = EXTRACT(a, 11) ^ r;
+ a2 = EXTRACT(a, 12) ^ r;
+ a1 = q[a1];
+ a2 = q[a2];
+ new = new | INSERT(a1, 11) | INSERT(a2, 12);
+#endif /* RF_LONGSHIFT > 2 */
+ d ^= new;
+ *dest++ = d;
+ length--;
+ }
+}
+/*
+ compute
+
+ dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new) ]
+
+ on a five bit basis.
+ optimization: compute old ^ new on 64 bit basis.
+
+ length in bytes.
+*/
+
+static void
+QDelta(
+ char *dest,
+ char *obuf,
+ char *nbuf,
+ unsigned length,
+ unsigned char coeff)
+{
+ unsigned long a, d, new;
+ unsigned long a1, a2;
+ unsigned int *q = &(rf_qfor[28 - coeff][0]);
+ unsigned int r = rf_rn[coeff + 1];
+
+ r = a1 = a2 = new = d = a = 0; /* XXX for now... */
+ q = NULL; /* XXX for now */
+
+#ifdef _KERNEL
+ /* PQ in kernel currently not supported because the encoding/decoding
+ * table is not present */
+ bzero(dest, length);
+#else /* KERNEL */
+ /* this code probably doesn't work and should be rewritten -wvcii */
+ /* 13 5 bit quants in a 64 bit word */
+ length /= 8;
+ while (length) {
+ a = *obuf++; /* XXX need to reorg to avoid cache conflicts */
+ a ^= *nbuf++;
+ d = *dest;
+ a1 = EXTRACT(a, 0) ^ r;
+ a2 = EXTRACT(a, 1) ^ r;
+ a1 = q[a1];
+ a2 = q[a2];
+ new = INSERT(a2, 1) | a1;
+ a1 = EXTRACT(a, 2) ^ r;
+ a2 = EXTRACT(a, 3) ^ r;
+ a1 = q[a1];
+ a2 = q[a2];
+ new = new | INSERT(a1, 2) | INSERT(a2, 3);
+ a1 = EXTRACT(a, 4) ^ r;
+ a2 = EXTRACT(a, 5) ^ r;
+ a1 = q[a1];
+ a2 = q[a2];
+ new = new | INSERT(a1, 4) | INSERT(a2, 5);
+ a1 = EXTRACT(a, 5) ^ r;
+ a2 = EXTRACT(a, 6) ^ r;
+ a1 = q[a1];
+ a2 = q[a2];
+ new = new | INSERT(a1, 5) | INSERT(a2, 6);
+#if RF_LONGSHIFT > 2
+ a1 = EXTRACT(a, 7) ^ r;
+ a2 = EXTRACT(a, 8) ^ r;
+ a1 = q[a1];
+ a2 = q[a2];
+ new = new | INSERT(a1, 7) | INSERT(a2, 8);
+ a1 = EXTRACT(a, 9) ^ r;
+ a2 = EXTRACT(a, 10) ^ r;
+ a1 = q[a1];
+ a2 = q[a2];
+ new = new | INSERT(a1, 9) | INSERT(a2, 10);
+ a1 = EXTRACT(a, 11) ^ r;
+ a2 = EXTRACT(a, 12) ^ r;
+ a1 = q[a1];
+ a2 = q[a2];
+ new = new | INSERT(a1, 11) | INSERT(a2, 12);
+#endif /* RF_LONGSHIFT > 2 */
+ d ^= new;
+ *dest++ = d;
+ length--;
+ }
+#endif /* _KERNEL */
+}
+/*
+ recover columns a and b from the given p and q into
+ bufs abuf and bbuf. All bufs are word aligned.
+ Length is in bytes.
+*/
+
+
+/*
+ * XXX
+ *
+ * Everything about this seems wrong.
+ */
+void
+rf_PQ_recover(pbuf, qbuf, abuf, bbuf, length, coeff_a, coeff_b)
+ unsigned long *pbuf;
+ unsigned long *qbuf;
+ unsigned long *abuf;
+ unsigned long *bbuf;
+ unsigned length;
+ unsigned coeff_a;
+ unsigned coeff_b;
+{
+ unsigned long p, q, a, a0, a1;
+ int col = (29 * coeff_a) + coeff_b;
+ unsigned char *q0 = &(rf_qinv[col][0]);
+
+ length /= 8;
+ while (length) {
+ p = *pbuf++;
+ q = *qbuf++;
+ a0 = EXTRACT(p, 0);
+ a1 = EXTRACT(q, 0);
+ a = q0[a0 << 5 | a1];
+#define MF(i) \
+ a0 = EXTRACT(p,i); \
+ a1 = EXTRACT(q,i); \
+ a = a | INSERT(q0[a0<<5 | a1],i)
+
+ MF(1);
+ MF(2);
+ MF(3);
+ MF(4);
+ MF(5);
+ MF(6);
+#if 0
+ MF(7);
+ MF(8);
+ MF(9);
+ MF(10);
+ MF(11);
+ MF(12);
+#endif /* 0 */
+ *abuf++ = a;
+ *bbuf++ = a ^ p;
+ length--;
+ }
+}
+/*
+ Lost parity and a data column. Recover that data column.
+ Assume col coeff is lost. Let q the contents of Q after
+ all surviving data columns have been q-xored out of it.
+ Then we have the equation
+
+ q[28-coeff][a_i ^ r_i+1] = q
+
+ but q is cyclic with period 31.
+ So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] =
+ q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} .
+
+ so a_i = r_{coeff+1} ^ q[3+coeff][q]
+
+ The routine is passed q buffer and the buffer
+ the data is to be recoverd into. They can be the same.
+*/
+
+
+
+static void
+rf_InvertQ(
+ unsigned long *qbuf,
+ unsigned long *abuf,
+ unsigned length,
+ unsigned coeff)
+{
+ unsigned long a, new;
+ unsigned long a1, a2;
+ unsigned int *q = &(rf_qfor[3 + coeff][0]);
+ unsigned r = rf_rn[coeff + 1];
+
+ /* 13 5 bit quants in a 64 bit word */
+ length /= 8;
+ while (length) {
+ a = *qbuf++;
+ a1 = EXTRACT(a, 0);
+ a2 = EXTRACT(a, 1);
+ a1 = r ^ q[a1];
+ a2 = r ^ q[a2];
+ new = INSERT(a2, 1) | a1;
+#define M(i,j) \
+ a1 = EXTRACT(a,i); \
+ a2 = EXTRACT(a,j); \
+ a1 = r ^ q[a1]; \
+ a2 = r ^ q[a2]; \
+ new = new | INSERT(a1,i) | INSERT(a2,j)
+
+ M(2, 3);
+ M(4, 5);
+ M(5, 6);
+#if RF_LONGSHIFT > 2
+ M(7, 8);
+ M(9, 10);
+ M(11, 12);
+#endif /* RF_LONGSHIFT > 2 */
+ *abuf++ = new;
+ length--;
+ }
+}
+#endif /* (RF_INCLUDE_DECL_PQ > 0) ||
+ * (RF_INCLUDE_RAID6 > 0) */
OpenPOWER on IntegriCloud