summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/md/raid5.c149
-rw-r--r--include/linux/raid/raid5.h2
2 files changed, 115 insertions, 36 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index d9521aa..42439a4 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2077,36 +2077,101 @@ handle_requests_to_failed_array(raid5_conf_t *conf, struct stripe_head *sh,
}
+/* __handle_issuing_new_read_requests5 - returns 0 if there are no more disks
+ * to process
+ */
+static int __handle_issuing_new_read_requests5(struct stripe_head *sh,
+ struct stripe_head_state *s, int disk_idx, int disks)
+{
+ struct r5dev *dev = &sh->dev[disk_idx];
+ struct r5dev *failed_dev = &sh->dev[s->failed_num];
+
+ /* don't schedule compute operations or reads on the parity block while
+ * a check is in flight
+ */
+ if ((disk_idx == sh->pd_idx) &&
+ test_bit(STRIPE_OP_CHECK, &sh->ops.pending))
+ return ~0;
+
+ /* is the data in this block needed, and can we get it? */
+ if (!test_bit(R5_LOCKED, &dev->flags) &&
+ !test_bit(R5_UPTODATE, &dev->flags) && (dev->toread ||
+ (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
+ s->syncing || s->expanding || (s->failed &&
+ (failed_dev->toread || (failed_dev->towrite &&
+ !test_bit(R5_OVERWRITE, &failed_dev->flags)
+ ))))) {
+ /* 1/ We would like to get this block, possibly by computing it,
+ * but we might not be able to.
+ *
+ * 2/ Since parity check operations potentially make the parity
+ * block !uptodate it will need to be refreshed before any
+ * compute operations on data disks are scheduled.
+ *
+ * 3/ We hold off parity block re-reads until check operations
+ * have quiesced.
+ */
+ if ((s->uptodate == disks - 1) &&
+ !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
+ set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
+ set_bit(R5_Wantcompute, &dev->flags);
+ sh->ops.target = disk_idx;
+ s->req_compute = 1;
+ sh->ops.count++;
+ /* Careful: from this point on 'uptodate' is in the eye
+ * of raid5_run_ops which services 'compute' operations
+ * before writes. R5_Wantcompute flags a block that will
+ * be R5_UPTODATE by the time it is needed for a
+ * subsequent operation.
+ */
+ s->uptodate++;
+ return 0; /* uptodate + compute == disks */
+ } else if ((s->uptodate < disks - 1) &&
+ test_bit(R5_Insync, &dev->flags)) {
+ /* Note: we hold off compute operations while checks are
+ * in flight, but we still prefer 'compute' over 'read'
+ * hence we only read if (uptodate < * disks-1)
+ */
+ set_bit(R5_LOCKED, &dev->flags);
+ set_bit(R5_Wantread, &dev->flags);
+ if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
+ sh->ops.count++;
+ s->locked++;
+ pr_debug("Reading block %d (sync=%d)\n", disk_idx,
+ s->syncing);
+ }
+ }
+
+ return ~0;
+}
+
static void handle_issuing_new_read_requests5(struct stripe_head *sh,
struct stripe_head_state *s, int disks)
{
int i;
- for (i = disks; i--; ) {
- struct r5dev *dev = &sh->dev[i];
- if (!test_bit(R5_LOCKED, &dev->flags) &&
- !test_bit(R5_UPTODATE, &dev->flags) &&
- (dev->toread ||
- (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
- s->syncing || s->expanding ||
- (s->failed && (sh->dev[s->failed_num].toread ||
- (sh->dev[s->failed_num].towrite &&
- !test_bit(R5_OVERWRITE, &sh->dev[s->failed_num].flags))
- )))) {
- /* we would like to get this block, possibly
- * by computing it, but we might not be able to
- */
- if (s->uptodate == disks-1) {
- pr_debug("Computing block %d\n", i);
- compute_block(sh, i);
- s->uptodate++;
- } else if (test_bit(R5_Insync, &dev->flags)) {
- set_bit(R5_LOCKED, &dev->flags);
- set_bit(R5_Wantread, &dev->flags);
- s->locked++;
- pr_debug("Reading block %d (sync=%d)\n",
- i, s->syncing);
- }
- }
+
+ /* Clear completed compute operations. Parity recovery
+ * (STRIPE_OP_MOD_REPAIR_PD) implies a write-back which is handled
+ * later on in this routine
+ */
+ if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
+ !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
+ clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
+ clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
+ clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
+ }
+
+ /* look for blocks to read/compute, skip this if a compute
+ * is already in flight, or if the stripe contents are in the
+ * midst of changing due to a write
+ */
+ if (!test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) &&
+ !test_bit(STRIPE_OP_PREXOR, &sh->ops.pending) &&
+ !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
+ for (i = disks; i--; )
+ if (__handle_issuing_new_read_requests5(
+ sh, s, i, disks) == 0)
+ break;
}
set_bit(STRIPE_HANDLE, &sh->state);
}
@@ -2223,7 +2288,8 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
struct r5dev *dev = &sh->dev[i];
if ((dev->towrite || i == sh->pd_idx) &&
!test_bit(R5_LOCKED, &dev->flags) &&
- !test_bit(R5_UPTODATE, &dev->flags)) {
+ !(test_bit(R5_UPTODATE, &dev->flags) ||
+ test_bit(R5_Wantcompute, &dev->flags))) {
if (test_bit(R5_Insync, &dev->flags))
rmw++;
else
@@ -2232,9 +2298,9 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
/* Would I have to read this buffer for reconstruct_write */
if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx &&
!test_bit(R5_LOCKED, &dev->flags) &&
- !test_bit(R5_UPTODATE, &dev->flags)) {
- if (test_bit(R5_Insync, &dev->flags))
- rcw++;
+ !(test_bit(R5_UPTODATE, &dev->flags) ||
+ test_bit(R5_Wantcompute, &dev->flags))) {
+ if (test_bit(R5_Insync, &dev->flags)) rcw++;
else
rcw += 2*disks;
}
@@ -2248,7 +2314,8 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
struct r5dev *dev = &sh->dev[i];
if ((dev->towrite || i == sh->pd_idx) &&
!test_bit(R5_LOCKED, &dev->flags) &&
- !test_bit(R5_UPTODATE, &dev->flags) &&
+ !(test_bit(R5_UPTODATE, &dev->flags) ||
+ test_bit(R5_Wantcompute, &dev->flags)) &&
test_bit(R5_Insync, &dev->flags)) {
if (
test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
@@ -2270,7 +2337,8 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
if (!test_bit(R5_OVERWRITE, &dev->flags) &&
i != sh->pd_idx &&
!test_bit(R5_LOCKED, &dev->flags) &&
- !test_bit(R5_UPTODATE, &dev->flags) &&
+ !(test_bit(R5_UPTODATE, &dev->flags) ||
+ test_bit(R5_Wantcompute, &dev->flags)) &&
test_bit(R5_Insync, &dev->flags)) {
if (
test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
@@ -2288,8 +2356,17 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
/* now if nothing is locked, and if we have enough data,
* we can start a write request
*/
- if (s->locked == 0 && (rcw == 0 || rmw == 0) &&
- !test_bit(STRIPE_BIT_DELAY, &sh->state))
+ /* since handle_stripe can be called at any time we need to handle the
+ * case where a compute block operation has been submitted and then a
+ * subsequent call wants to start a write request. raid5_run_ops only
+ * handles the case where compute block and postxor are requested
+ * simultaneously. If this is not the case then new writes need to be
+ * held off until the compute completes.
+ */
+ if ((s->req_compute ||
+ !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) &&
+ (s->locked == 0 && (rcw == 0 || rmw == 0) &&
+ !test_bit(STRIPE_BIT_DELAY, &sh->state)))
s->locked += handle_write_operations5(sh, rcw == 0, 0);
}
@@ -2650,6 +2727,7 @@ static void handle_stripe5(struct stripe_head *sh)
/* now count some things */
if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
+ if (test_bit(R5_Wantcompute, &dev->flags)) s.compute++;
if (dev->toread)
s.to_read++;
@@ -2706,7 +2784,8 @@ static void handle_stripe5(struct stripe_head *sh)
* or to load a block that is being partially written.
*/
if (s.to_read || s.non_overwrite ||
- (s.syncing && (s.uptodate < disks)) || s.expanding)
+ (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding ||
+ test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending))
handle_issuing_new_read_requests5(sh, &s, disks);
/* Now we check to see if any write operations have recently
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index 6fb9d94..2293015 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -200,7 +200,7 @@ struct stripe_head {
struct stripe_head_state {
int syncing, expanding, expanded;
int locked, uptodate, to_read, to_write, failed, written;
- int non_overwrite;
+ int compute, req_compute, non_overwrite;
int failed_num;
};
OpenPOWER on IntegriCloud