summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPhilipp Reisner <philipp.reisner@linbit.com>2011-11-17 10:11:47 +0100
committerPhilipp Reisner <philipp.reisner@linbit.com>2012-11-08 16:58:09 +0100
commit975b297947c85a1cb687d7561b6fc05d48160026 (patch)
treee72ba4a70a1d532c5eff3764f4481485313721a6
parent77fede5137574813e415a4cf23038b6688ba2470 (diff)
downloadop-kernel-dev-975b297947c85a1cb687d7561b6fc05d48160026.zip
op-kernel-dev-975b297947c85a1cb687d7561b6fc05d48160026.tar.gz
drbd: fix potential spinlock deadlock
drbd_try_clear_on_disk_bm() has a sanity check for the number of blocks left to be resynced (rs_left) in the current resync extent. If it detects a mismatch, it complains, and forces a disconnect using drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); Unfortunately, this may be called while holding the req_lock, and drbd_force_state() want's to aquire that lock itself. Deadlock. Don't force a disconnect, but fix up rs_left by recounting and reassigning the number of dirty blocks in that extent. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
-rw-r--r--drivers/block/drbd/drbd_actlog.c20
1 files changed, 12 insertions, 8 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index ade79be..d69fb7d 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -573,16 +573,20 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
else
ext->rs_failed += count;
if (ext->rs_left < ext->rs_failed) {
- dev_err(DEV, "BAD! sector=%llus enr=%u rs_left=%d "
- "rs_failed=%d count=%d\n",
+ dev_warn(DEV, "BAD! sector=%llus enr=%u rs_left=%d "
+ "rs_failed=%d count=%d cstate=%s\n",
(unsigned long long)sector,
ext->lce.lc_number, ext->rs_left,
- ext->rs_failed, count);
- dump_stack();
-
- lc_put(mdev->resync, &ext->lce);
- conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
- return;
+ ext->rs_failed, count,
+ drbd_conn_str(mdev->state.conn));
+
+ /* We don't expect to be able to clear more bits
+ * than have been set when we originally counted
+ * the set bits to cache that value in ext->rs_left.
+ * Whatever the reason (disconnect during resync,
+ * delayed local completion of an application write),
+ * try to fix it up by recounting here. */
+ ext->rs_left = drbd_bm_e_weight(mdev, enr);
}
} else {
/* Normally this element should be in the cache,
OpenPOWER on IntegriCloud