diff options
Diffstat (limited to 'drivers/block/drbd/drbd_req.c')
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 165 |
1 files changed, 99 insertions, 66 deletions
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index f761d98..9e91a25 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -59,17 +59,19 @@ static void _drbd_end_io_acct(struct drbd_conf *mdev, struct drbd_request *req) static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw) { const unsigned long s = req->rq_state; + + /* remove it from the transfer log. + * well, only if it had been there in the first + * place... if it had not (local only or conflicting + * and never sent), it should still be "empty" as + * initialized in drbd_req_new(), so we can list_del() it + * here unconditionally */ + list_del(&req->tl_requests); + /* if it was a write, we may have to set the corresponding * bit(s) out-of-sync first. If it had a local part, we need to * release the reference to the activity log. */ if (rw == WRITE) { - /* remove it from the transfer log. - * well, only if it had been there in the first - * place... if it had not (local only or conflicting - * and never sent), it should still be "empty" as - * initialized in drbd_req_new(), so we can list_del() it - * here unconditionally */ - list_del(&req->tl_requests); /* Set out-of-sync unless both OK flags are set * (local only or remote failed). * Other places where we set out-of-sync: @@ -92,7 +94,8 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const */ if (s & RQ_LOCAL_MASK) { if (get_ldev_if_state(mdev, D_FAILED)) { - drbd_al_complete_io(mdev, req->sector); + if (s & RQ_IN_ACT_LOG) + drbd_al_complete_io(mdev, req->sector); put_ldev(mdev); } else if (__ratelimit(&drbd_ratelimit_state)) { dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu), " @@ -280,6 +283,14 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) * protocol A or B, barrier ack still pending... */ } +static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_error *m) +{ + struct drbd_conf *mdev = req->mdev; + + if (!is_susp(mdev->state)) + _req_may_be_done(req, m); +} + /* * checks whether there was an overlapping request * or ee already registered. @@ -380,10 +391,11 @@ out_conflict: * and it enforces that we have to think in a very structured manner * about the "events" that may happen to a request during its life time ... */ -void __req_mod(struct drbd_request *req, enum drbd_req_event what, +int __req_mod(struct drbd_request *req, enum drbd_req_event what, struct bio_and_error *m) { struct drbd_conf *mdev = req->mdev; + int rv = 0; m->bio = NULL; switch (what) { @@ -420,7 +432,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state |= (RQ_LOCAL_COMPLETED|RQ_LOCAL_OK); req->rq_state &= ~RQ_LOCAL_PENDING; - _req_may_be_done(req, m); + _req_may_be_done_not_susp(req, m); put_ldev(mdev); break; @@ -429,7 +441,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state &= ~RQ_LOCAL_PENDING; __drbd_chk_io_error(mdev, FALSE); - _req_may_be_done(req, m); + _req_may_be_done_not_susp(req, m); put_ldev(mdev); break; @@ -437,7 +449,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, /* it is legal to fail READA */ req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state &= ~RQ_LOCAL_PENDING; - _req_may_be_done(req, m); + _req_may_be_done_not_susp(req, m); put_ldev(mdev); break; @@ -455,7 +467,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, /* no point in retrying if there is no good remote data, * or we have no connection. */ if (mdev->state.pdsk != D_UP_TO_DATE) { - _req_may_be_done(req, m); + _req_may_be_done_not_susp(req, m); break; } @@ -517,11 +529,9 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, D_ASSERT(test_bit(CREATE_BARRIER, &mdev->flags) == 0); req->epoch = mdev->newest_tle->br_number; - list_add_tail(&req->tl_requests, - &mdev->newest_tle->requests); /* increment size of current epoch */ - mdev->newest_tle->n_req++; + mdev->newest_tle->n_writes++; /* queue work item to send data */ D_ASSERT(req->rq_state & RQ_NET_PENDING); @@ -530,7 +540,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, drbd_queue_work(&mdev->data.work, &req->w); /* close the epoch, in case it outgrew the limit */ - if (mdev->newest_tle->n_req >= mdev->net_conf->max_epoch_size) + if (mdev->newest_tle->n_writes >= mdev->net_conf->max_epoch_size) queue_barrier(mdev); break; @@ -543,7 +553,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state &= ~RQ_NET_QUEUED; /* if we did it right, tl_clear should be scheduled only after * this, so this should not be necessary! */ - _req_may_be_done(req, m); + _req_may_be_done_not_susp(req, m); break; case handed_over_to_network: @@ -568,7 +578,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, * "completed_ok" events came in, once we return from * _drbd_send_zc_bio (drbd_send_dblock), we have to check * whether it is done already, and end it. */ - _req_may_be_done(req, m); + _req_may_be_done_not_susp(req, m); break; case read_retry_remote_canceled: @@ -584,7 +594,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, /* if it is still queued, we may not complete it here. * it will be canceled soon. */ if (!(req->rq_state & RQ_NET_QUEUED)) - _req_may_be_done(req, m); + _req_may_be_done(req, m); /* Allowed while state.susp */ break; case write_acked_by_peer_and_sis: @@ -619,7 +629,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, D_ASSERT(req->rq_state & RQ_NET_PENDING); dec_ap_pending(mdev); req->rq_state &= ~RQ_NET_PENDING; - _req_may_be_done(req, m); + _req_may_be_done_not_susp(req, m); break; case neg_acked: @@ -629,11 +639,50 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); req->rq_state |= RQ_NET_DONE; - _req_may_be_done(req, m); + _req_may_be_done_not_susp(req, m); /* else: done by handed_over_to_network */ break; + case fail_frozen_disk_io: + if (!(req->rq_state & RQ_LOCAL_COMPLETED)) + break; + + _req_may_be_done(req, m); /* Allowed while state.susp */ + break; + + case restart_frozen_disk_io: + if (!(req->rq_state & RQ_LOCAL_COMPLETED)) + break; + + req->rq_state &= ~RQ_LOCAL_COMPLETED; + + rv = MR_READ; + if (bio_data_dir(req->master_bio) == WRITE) + rv = MR_WRITE; + + get_ldev(mdev); + req->w.cb = w_restart_disk_io; + drbd_queue_work(&mdev->data.work, &req->w); + break; + + case resend: + /* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK + before the connection loss (B&C only); only P_BARRIER_ACK was missing. + Trowing them out of the TL here by pretending we got a BARRIER_ACK + We ensure that the peer was not rebooted */ + if (!(req->rq_state & RQ_NET_OK)) { + if (req->w.cb) { + drbd_queue_work(&mdev->data.work, &req->w); + rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ; + } + break; + } + /* else, fall through to barrier_acked */ + case barrier_acked: + if (!(req->rq_state & RQ_WRITE)) + break; + if (req->rq_state & RQ_NET_PENDING) { /* barrier came in before all requests have been acked. * this is bad, because if the connection is lost now, @@ -643,7 +692,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, } D_ASSERT(req->rq_state & RQ_NET_SENT); req->rq_state |= RQ_NET_DONE; - _req_may_be_done(req, m); + _req_may_be_done(req, m); /* Allowed while state.susp */ break; case data_received: @@ -651,9 +700,11 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, dec_ap_pending(mdev); req->rq_state &= ~RQ_NET_PENDING; req->rq_state |= (RQ_NET_OK|RQ_NET_DONE); - _req_may_be_done(req, m); + _req_may_be_done_not_susp(req, m); break; }; + + return rv; } /* we may do a local read if: @@ -752,14 +803,16 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) * resync extent to finish, and, if necessary, pulls in the target * extent into the activity log, which involves further disk io because * of transactional on-disk meta data updates. */ - if (rw == WRITE && local) + if (rw == WRITE && local && !test_bit(AL_SUSPENDED, &mdev->flags)) { + req->rq_state |= RQ_IN_ACT_LOG; drbd_al_begin_io(mdev, sector); + } remote = remote && (mdev->state.pdsk == D_UP_TO_DATE || (mdev->state.pdsk == D_INCONSISTENT && mdev->state.conn >= C_CONNECTED)); - if (!(local || remote) && !mdev->state.susp) { + if (!(local || remote) && !is_susp(mdev->state)) { dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); goto fail_free_complete; } @@ -785,7 +838,7 @@ allocate_barrier: /* GOOD, everything prepared, grab the spin_lock */ spin_lock_irq(&mdev->req_lock); - if (mdev->state.susp) { + if (is_susp(mdev->state)) { /* If we got suspended, use the retry mechanism of generic_make_request() to restart processing of this bio. In the next call to drbd_make_request_26 @@ -867,30 +920,10 @@ allocate_barrier: /* check this request on the collision detection hash tables. * if we have a conflict, just complete it here. * THINK do we want to check reads, too? (I don't think so...) */ - if (rw == WRITE && _req_conflicts(req)) { - /* this is a conflicting request. - * even though it may have been only _partially_ - * overlapping with one of the currently pending requests, - * without even submitting or sending it, we will - * pretend that it was successfully served right now. - */ - if (local) { - bio_put(req->private_bio); - req->private_bio = NULL; - drbd_al_complete_io(mdev, req->sector); - put_ldev(mdev); - local = 0; - } - if (remote) - dec_ap_pending(mdev); - _drbd_end_io_acct(mdev, req); - /* THINK: do we want to fail it (-EIO), or pretend success? */ - bio_endio(req->master_bio, 0); - req->master_bio = NULL; - dec_ap_bio(mdev); - drbd_req_free(req); - remote = 0; - } + if (rw == WRITE && _req_conflicts(req)) + goto fail_conflicting; + + list_add_tail(&req->tl_requests, &mdev->newest_tle->requests); /* NOTE remote first: to get the concurrent write detection right, * we must register the request before start of local IO. */ @@ -923,6 +956,21 @@ allocate_barrier: return 0; +fail_conflicting: + /* this is a conflicting request. + * even though it may have been only _partially_ + * overlapping with one of the currently pending requests, + * without even submitting or sending it, we will + * pretend that it was successfully served right now. + */ + _drbd_end_io_acct(mdev, req); + spin_unlock_irq(&mdev->req_lock); + if (remote) + dec_ap_pending(mdev); + /* THINK: do we want to fail it (-EIO), or pretend success? + * this pretends success. */ + err = 0; + fail_free_complete: if (rw == WRITE && local) drbd_al_complete_io(mdev, sector); @@ -961,21 +1009,6 @@ static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write) return 1; } - /* - * Paranoia: we might have been primary, but sync target, or - * even diskless, then lost the connection. - * This should have been handled (panic? suspend?) somewhere - * else. But maybe it was not, so check again here. - * Caution: as long as we do not have a read/write lock on mdev, - * to serialize state changes, this is racy, since we may lose - * the connection *after* we test for the cstate. - */ - if (mdev->state.disk < D_UP_TO_DATE && mdev->state.pdsk < D_UP_TO_DATE) { - if (__ratelimit(&drbd_ratelimit_state)) - dev_err(DEV, "Sorry, I have no access to good data anymore.\n"); - return 1; - } - return 0; } |