From 8a03ae2a5baed3df09e5643615bdd853fc142a09 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 29 Jan 2010 20:39:07 +0000 Subject: block: drbd: Convert semaphore to mutex The bm_change semaphore is semantically a mutex. Convert it to a real mutex. Signed-off-by: Thomas Gleixner Signed-off-by: Philipp Reisner --- drivers/block/drbd/drbd_bitmap.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/block/drbd') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index b61057e..f58e765 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -66,7 +66,7 @@ struct drbd_bitmap { size_t bm_words; size_t bm_number_of_pages; sector_t bm_dev_capacity; - struct semaphore bm_change; /* serializes resize operations */ + struct mutex bm_change; /* serializes resize operations */ atomic_t bm_async_io; wait_queue_head_t bm_io_wait; @@ -114,7 +114,7 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why) return; } - trylock_failed = down_trylock(&b->bm_change); + trylock_failed = !mutex_trylock(&b->bm_change); if (trylock_failed) { dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n", @@ -125,7 +125,7 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why) b->bm_task == mdev->receiver.task ? "receiver" : b->bm_task == mdev->asender.task ? "asender" : b->bm_task == mdev->worker.task ? "worker" : "?"); - down(&b->bm_change); + mutex_lock(&b->bm_change); } if (__test_and_set_bit(BM_LOCKED, &b->bm_flags)) dev_err(DEV, "FIXME bitmap already locked in bm_lock\n"); @@ -147,7 +147,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev) b->bm_why = NULL; b->bm_task = NULL; - up(&b->bm_change); + mutex_unlock(&b->bm_change); } /* word offset to long pointer */ @@ -295,7 +295,7 @@ int drbd_bm_init(struct drbd_conf *mdev) if (!b) return -ENOMEM; spin_lock_init(&b->bm_lock); - init_MUTEX(&b->bm_change); + mutex_init(&b->bm_change); init_waitqueue_head(&b->bm_io_wait); mdev->bitmap = b; -- cgit v1.1 From cf14c2e987ba0a09a7b09be2ecd55af0bc9c17b4 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 2 Feb 2010 21:03:50 +0100 Subject: drbd: --dry-run option for drbdsetup net ( drbdadm -- --dry-run connect ) Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 8 +++++++- drivers/block/drbd/drbd_main.c | 16 ++++++++++++++-- drivers/block/drbd/drbd_receiver.c | 22 ++++++++++++++++++++-- 3 files changed, 41 insertions(+), 5 deletions(-) (limited to 'drivers/block/drbd') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 2bf3a6e..1aae724 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -443,13 +443,18 @@ struct p_rs_param_89 { char csums_alg[SHARED_SECRET_MAX]; } __packed; +enum drbd_conn_flags { + CF_WANT_LOSE = 1, + CF_DRY_RUN = 2, +}; + struct p_protocol { struct p_header head; u32 protocol; u32 after_sb_0p; u32 after_sb_1p; u32 after_sb_2p; - u32 want_lose; + u32 conn_flags; u32 two_primaries; /* Since protocol version 87 and higher. */ @@ -791,6 +796,7 @@ enum { * while this is set. */ RESIZE_PENDING, /* Size change detected locally, waiting for the response from * the peer, if it changed there as well. */ + CONN_DRY_RUN, /* Expect disconnect after resync handshake. */ }; struct drbd_bitmap; /* opaque for drbd_conf */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index ab871e0..b2d347d 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1668,7 +1668,7 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) int drbd_send_protocol(struct drbd_conf *mdev) { struct p_protocol *p; - int size, rv; + int size, cf, rv; size = sizeof(struct p_protocol); @@ -1685,9 +1685,21 @@ int drbd_send_protocol(struct drbd_conf *mdev) p->after_sb_0p = cpu_to_be32(mdev->net_conf->after_sb_0p); p->after_sb_1p = cpu_to_be32(mdev->net_conf->after_sb_1p); p->after_sb_2p = cpu_to_be32(mdev->net_conf->after_sb_2p); - p->want_lose = cpu_to_be32(mdev->net_conf->want_lose); p->two_primaries = cpu_to_be32(mdev->net_conf->two_primaries); + cf = 0; + if (mdev->net_conf->want_lose) + cf |= CF_WANT_LOSE; + if (mdev->net_conf->dry_run) { + if (mdev->agreed_pro_version >= 92) + cf |= CF_DRY_RUN; + else { + dev_err(DEV, "--dry-run is not supported by peer"); + return 0; + } + } + p->conn_flags = cpu_to_be32(cf); + if (mdev->agreed_pro_version >= 87) strcpy(p->integrity_alg, mdev->net_conf->integrity_alg); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index d065c64..8bcde4a 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2538,6 +2538,16 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } } + if (mdev->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) { + if (hg == 0) + dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n"); + else + dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.", + drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET), + abs(hg) >= 2 ? "full" : "bit-map based"); + return C_MASK; + } + if (abs(hg) >= 2) { dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n"); if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake")) @@ -2585,7 +2595,7 @@ static int receive_protocol(struct drbd_conf *mdev, struct p_header *h) struct p_protocol *p = (struct p_protocol *)h; int header_size, data_size; int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; - int p_want_lose, p_two_primaries; + int p_want_lose, p_two_primaries, cf; char p_integrity_alg[SHARED_SECRET_MAX] = ""; header_size = sizeof(*p) - sizeof(*h); @@ -2598,8 +2608,14 @@ static int receive_protocol(struct drbd_conf *mdev, struct p_header *h) p_after_sb_0p = be32_to_cpu(p->after_sb_0p); p_after_sb_1p = be32_to_cpu(p->after_sb_1p); p_after_sb_2p = be32_to_cpu(p->after_sb_2p); - p_want_lose = be32_to_cpu(p->want_lose); p_two_primaries = be32_to_cpu(p->two_primaries); + cf = be32_to_cpu(p->conn_flags); + p_want_lose = cf & CF_WANT_LOSE; + + clear_bit(CONN_DRY_RUN, &mdev->flags); + + if (cf & CF_DRY_RUN) + set_bit(CONN_DRY_RUN, &mdev->flags); if (p_proto != mdev->net_conf->wire_protocol) { dev_err(DEV, "incompatible communication protocols\n"); @@ -3125,6 +3141,8 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h) dev_err(DEV, "Disk attach process on the peer node was aborted.\n"); peer_state.disk = D_DISKLESS; } else { + if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags)) + return FALSE; D_ASSERT(oconn == C_WF_REPORT_PARAMS); drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); return FALSE; -- cgit v1.1 From 4aa83b7bf122106669346eef40632289f540653f Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 26 Feb 2010 16:53:24 +0100 Subject: drbd: fix NULL pointer dereference on 4k hard sect size we still don't support 4k 'physical' sectors 'natively', but use a read-modify-write workaround. And we even tried to use the extra page before we allocated it :( Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'drivers/block/drbd') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 4df3b40..d53d36c 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -941,6 +941,25 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp drbd_md_set_sector_offsets(mdev, nbc); + /* allocate a second IO page if logical_block_size != 512 */ + logical_block_size = bdev_logical_block_size(nbc->md_bdev); + if (logical_block_size == 0) + logical_block_size = MD_SECTOR_SIZE; + + if (logical_block_size != MD_SECTOR_SIZE) { + if (!mdev->md_io_tmpp) { + struct page *page = alloc_page(GFP_NOIO); + if (!page) + goto force_diskless_dec; + + dev_warn(DEV, "Meta data's bdev logical_block_size = %d != %d\n", + logical_block_size, MD_SECTOR_SIZE); + dev_warn(DEV, "Workaround engaged (has performance impact).\n"); + + mdev->md_io_tmpp = page; + } + } + if (!mdev->bitmap) { if (drbd_bm_init(mdev)) { retcode = ERR_NOMEM; @@ -980,25 +999,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp goto force_diskless_dec; } - /* allocate a second IO page if logical_block_size != 512 */ - logical_block_size = bdev_logical_block_size(nbc->md_bdev); - if (logical_block_size == 0) - logical_block_size = MD_SECTOR_SIZE; - - if (logical_block_size != MD_SECTOR_SIZE) { - if (!mdev->md_io_tmpp) { - struct page *page = alloc_page(GFP_NOIO); - if (!page) - goto force_diskless_dec; - - dev_warn(DEV, "Meta data's bdev logical_block_size = %d != %d\n", - logical_block_size, MD_SECTOR_SIZE); - dev_warn(DEV, "Workaround engaged (has performance impact).\n"); - - mdev->md_io_tmpp = page; - } - } - /* Reset the "barriers don't work" bits here, then force meta data to * be written, to ensure we determine if barriers are supported. */ if (nbc->dc.no_md_flush) -- cgit v1.1 From 580b9767dbdf2c049c4d05330c70ea786ef01016 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 26 Feb 2010 23:15:23 +0100 Subject: drbd: fix broken state change after split-brain attach while connected Situation: we have diverging data sets, i.e. we had a split brain somewhen, but currently are connected, one node diskless. Then we try to attach that disk, figure it is consistent, but has a diverging data set, we refuse to attach. This led to strange state changes: 22:18:35 bb drbd1: peer( Unknown -> Primary ) conn( WFReportParams -> Connected) pdsk( DUnknown -> UpToDate ) 22:19:30 bb drbd1: disk( Diskless -> Attaching ) 22:19:30 bb drbd1: disk( Attaching -> Negotiating ) 22:19:30 bb drbd1: drbd_sync_handshake: 22:19:30 bb drbd1: self 97BF25798B9D5222:F33D1F62ADE698DD:4269796F9D027C83:AC45D8B5C3C1BF93 bits:19449 flags:0 22:19:30 bb drbd1: peer 280DFB6E125465D3:F33D1F62ADE698DC:4269796F9D027C82:AC45D8B5C3C1BF93 bits:2575806 flags:0 22:19:30 bb drbd1: uuid_compare()=100 by rule 90 22:19:30 bb drbd1: Split-Brain detected, dropping connection! 22:19:30 bb drbd1: disk( Negotiating -> Diskless ) while the other side says: 22:19:30 aa drbd1: Split-Brain detected, dropping connection! 22:19:30 aa drbd1: Disk attach process on the peer node was aborted. 22:19:30 aa drbd1: conn( Connected -> TOO_LARGE ) pdsk( Diskless -> Consistent ) This should be fixed now. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/block/drbd') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 8bcde4a..41f36a9 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2513,6 +2513,10 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } if (hg == -100) { + /* FIXME this log message is not correct if we end up here + * after an attempted attach on a diskless node. + * We just refuse to attach -- well, we drop the "connection" + * to that disk, in a way... */ dev_alert(DEV, "Split-Brain detected, dropping connection!\n"); drbd_khelper(mdev, "split-brain"); return C_MASK; @@ -3134,12 +3138,13 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h) put_ldev(mdev); if (nconn == C_MASK) { + nconn = C_CONNECTED; if (mdev->state.disk == D_NEGOTIATING) { drbd_force_state(mdev, NS(disk, D_DISKLESS)); - nconn = C_CONNECTED; } else if (peer_state.disk == D_NEGOTIATING) { dev_err(DEV, "Disk attach process on the peer node was aborted.\n"); peer_state.disk = D_DISKLESS; + real_peer_disk = D_DISKLESS; } else { if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags)) return FALSE; -- cgit v1.1 From 676396d545350a70d922605ec23c2ed26124334a Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 3 Mar 2010 02:08:22 +0100 Subject: fix unit of rs_same_csums accounting Depending on resync request size, we need to account for more than one bit. Impact: cosmetic If SyncTarget reported correctly 100% equal checksums, the SyncSource usually reported 12% equal checksums instead, because it only counted requests, we typically do 32k resync requests, and the bitmap granularity is still 4k. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_worker.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/block/drbd') diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index b453c2b..d97a811 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -938,7 +938,8 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) if (eq) { drbd_set_in_sync(mdev, e->sector, e->size); - mdev->rs_same_csum++; + /* rs_same_csums unit is BM_BLOCK_SIZE */ + mdev->rs_same_csum += e->size >> BM_BLOCK_SHIFT; ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, e); } else { inc_rs_pending(mdev); -- cgit v1.1 From 4589d7f829951c1713ef5a4ad1a9bb563da329b5 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 3 Mar 2010 02:25:33 +0100 Subject: drbd_disconnect: grab meta.socket mutex as well Fixes a race and potential kernel panic if e.g. the worker was just about to send a few P_RS_IS_IN_SYNC via the meta socket for checksum based resync, while the receiver destroys the sockets in drbd_disconnect. To make sure no-one is using the meta socket, it is not enough to stop the asender... Grab the meta socket mutex before destroying it. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 4 ++++ drivers/block/drbd/drbd_receiver.c | 3 --- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/block/drbd') diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index b2d347d..67e0fc5 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3173,14 +3173,18 @@ void drbd_free_bc(struct drbd_backing_dev *ldev) void drbd_free_sock(struct drbd_conf *mdev) { if (mdev->data.socket) { + mutex_lock(&mdev->data.mutex); kernel_sock_shutdown(mdev->data.socket, SHUT_RDWR); sock_release(mdev->data.socket); mdev->data.socket = NULL; + mutex_unlock(&mdev->data.mutex); } if (mdev->meta.socket) { + mutex_lock(&mdev->meta.mutex); kernel_sock_shutdown(mdev->meta.socket, SHUT_RDWR); sock_release(mdev->meta.socket); mdev->meta.socket = NULL; + mutex_unlock(&mdev->meta.mutex); } } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 41f36a9..d803e6c 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3617,10 +3617,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) /* asender does not clean up anything. it must not interfere, either */ drbd_thread_stop(&mdev->asender); - - mutex_lock(&mdev->data.mutex); drbd_free_sock(mdev); - mutex_unlock(&mdev->data.mutex); spin_lock_irq(&mdev->req_lock); _drbd_wait_ee_list_empty(mdev, &mdev->active_ee); -- cgit v1.1 From c42b6cf4b38c9726d4b46c48d04197c9ca74d773 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 3 Mar 2010 02:44:11 +0100 Subject: drbd: add missing drbd command names to avoid in error messages cmdname() should map command number to its human readable representation. The string table was incomplete, though. Maybe rather do a switch() block, and let the compiler help us to keep it complete? Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/block/drbd') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 1aae724..844206c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -261,6 +261,9 @@ static inline const char *cmdname(enum drbd_packets cmd) [P_OV_REQUEST] = "OVRequest", [P_OV_REPLY] = "OVReply", [P_OV_RESULT] = "OVResult", + [P_CSUM_RS_REQUEST] = "CsumRSRequest", + [P_RS_IS_IN_SYNC] = "CsumRSIsInSync", + [P_COMPRESSED_BITMAP] = "CBitmap", [P_MAX_CMD] = NULL, }; -- cgit v1.1 From 309d1608cce32903d67d47e7545e232c400b6aa0 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 2 Mar 2010 15:03:44 +0100 Subject: drbd: Reduce the time an empty resync takes usually This mitigates changes introduced with commit: http://git.drbd.org/?p=drbd-8.3.git;a=commit;h=4b6803a3276652da3737 Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_receiver.c | 2 ++ drivers/block/drbd/drbd_worker.c | 12 +++++++++--- 3 files changed, 12 insertions(+), 3 deletions(-) (limited to 'drivers/block/drbd') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 844206c..2d5cebb 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -800,6 +800,7 @@ enum { RESIZE_PENDING, /* Size change detected locally, waiting for the response from * the peer, if it changed there as well. */ CONN_DRY_RUN, /* Expect disconnect after resync handshake. */ + GOT_PING_ACK, /* set when we receive a ping_ack packet, misc wait gets woken */ }; struct drbd_bitmap; /* opaque for drbd_conf */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index d803e6c..ed9f1de 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4074,6 +4074,8 @@ static int got_PingAck(struct drbd_conf *mdev, struct p_header *h) { /* restore idle timeout */ mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ; + if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags)) + wake_up(&mdev->misc_wait); return TRUE; } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index d97a811..4672f2f 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1289,6 +1289,14 @@ int drbd_alter_sa(struct drbd_conf *mdev, int na) return retcode; } +static void ping_peer(struct drbd_conf *mdev) +{ + clear_bit(GOT_PING_ACK, &mdev->flags); + request_ping(mdev); + wait_event(mdev->misc_wait, + test_bit(GOT_PING_ACK, &mdev->flags) || mdev->state.conn < C_CONNECTED); +} + /** * drbd_start_resync() - Start the resync process * @mdev: DRBD device. @@ -1383,9 +1391,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) if (mdev->rs_total == 0) { /* Peer still reachable? Beware of failing before-resync-target handlers! */ - request_ping(mdev); - __set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(mdev->net_conf->ping_timeo*HZ/9); /* 9 instead 10 */ + ping_peer(mdev); drbd_resync_finished(mdev); return; } -- cgit v1.1 From d0c3f60f3611ceac9b1e4fdffd1497337568e7cb Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 2 Mar 2010 15:06:45 +0100 Subject: drbd: Make sure we do not send state updates during an empty resync [Bugz 271] This is a race condition that existed for ages. The previous commit reduces the window, this one closes it. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_worker.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/block/drbd') diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 4672f2f..44bf6d1 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1380,7 +1380,6 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) _drbd_pause_after(mdev); } write_unlock_irq(&global_state_lock); - drbd_state_unlock(mdev); put_ldev(mdev); if (r == SS_SUCCESS) { @@ -1393,7 +1392,6 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) /* Peer still reachable? Beware of failing before-resync-target handlers! */ ping_peer(mdev); drbd_resync_finished(mdev); - return; } /* ns.conn may already be != mdev->state.conn, @@ -1405,6 +1403,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) drbd_md_sync(mdev); } + drbd_state_unlock(mdev); } int drbd_worker(struct drbd_thread *thi) -- cgit v1.1 From d10a33c68b8526d95ef6ee72b371c392d48df4d3 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 4 Mar 2010 15:11:39 +0100 Subject: drbd: Forcing primary should also work for Consistent disks [Bugz 266] Up to now this only worked for Outdated and Inconsistent disks, that it did not worked for Consistent disks was an inconsistent omission. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/block/drbd') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index d53d36c..6492e32 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -285,8 +285,8 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) } if (r == SS_NO_UP_TO_DATE_DISK && force && - (mdev->state.disk == D_INCONSISTENT || - mdev->state.disk == D_OUTDATED)) { + (mdev->state.disk < D_UP_TO_DATE && + mdev->state.disk >= D_INCONSISTENT)) { mask.disk = D_MASK; val.disk = D_UP_TO_DATE; forced = 1; -- cgit v1.1 From 1f55243024087b56aef0b1e6d9c0ea89c76f0a6b Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 4 Mar 2010 15:51:01 +0100 Subject: drbd: Renamed overwrite_peer to primary_force Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block/drbd') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 6492e32..6429d2b 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -407,7 +407,7 @@ static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, } reply->ret_code = - drbd_set_role(mdev, R_PRIMARY, primary_args.overwrite_peer); + drbd_set_role(mdev, R_PRIMARY, primary_args.primary_force); return 0; } -- cgit v1.1 From 39ad2bbb5900d1bc9ae8f06cebb4cb2529d9e42e Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 4 Mar 2010 15:52:30 +0100 Subject: drbd: fix al-to-on-disk-bitmap for 4k logical_block_size Up to now, applying the in-core activity-log to the on-disk bitmap did not care for logical_block_size. On logical_block_size != 512 byte, this very likely results in misalligned block access and spurious "io errors". We now simply always submit aligned whole 4k blocks, fixing this for logical block sizes of 512, 1024, 2048 and 4096. For even larger logical block sizes, this won't work. But I'm not aware of devices with such properties being available. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'drivers/block/drbd') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 17956ff..43e57f39 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -536,7 +536,9 @@ static void atodb_endio(struct bio *bio, int error) put_ldev(mdev); } +/* sector to word */ #define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL)) + /* activity log to on disk bitmap -- prepare bio unless that sector * is already covered by previously prepared bios */ static int atodb_prepare_unless_covered(struct drbd_conf *mdev, @@ -546,13 +548,20 @@ static int atodb_prepare_unless_covered(struct drbd_conf *mdev, { struct bio *bio; struct page *page; - sector_t on_disk_sector = enr + mdev->ldev->md.md_offset - + mdev->ldev->md.bm_offset; + sector_t on_disk_sector; unsigned int page_offset = PAGE_SIZE; int offset; int i = 0; int err = -ENOMEM; + /* We always write aligned, full 4k blocks, + * so we can ignore the logical_block_size (for now) */ + enr &= ~7U; + on_disk_sector = enr + mdev->ldev->md.md_offset + + mdev->ldev->md.bm_offset; + + D_ASSERT(!(on_disk_sector & 7U)); + /* Check if that enr is already covered by an already created bio. * Caution, bios[] is not NULL terminated, * but only initialized to all NULL. @@ -588,7 +597,7 @@ static int atodb_prepare_unless_covered(struct drbd_conf *mdev, offset = S2W(enr); drbd_bm_get_lel(mdev, offset, - min_t(size_t, S2W(1), drbd_bm_words(mdev) - offset), + min_t(size_t, S2W(8), drbd_bm_words(mdev) - offset), kmap(page) + page_offset); kunmap(page); @@ -597,7 +606,7 @@ static int atodb_prepare_unless_covered(struct drbd_conf *mdev, bio->bi_bdev = mdev->ldev->md_bdev; bio->bi_sector = on_disk_sector; - if (bio_add_page(bio, page, MD_SECTOR_SIZE, page_offset) != MD_SECTOR_SIZE) + if (bio_add_page(bio, page, 4096, page_offset) != 4096) goto out_put_page; atomic_inc(&wc->count); -- cgit v1.1 From 5a0e3ad6af8660be21ca98a971cd00f331318c05 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Mar 2010 17:04:11 +0900 Subject: include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo Guess-its-ok-by: Christoph Lameter Cc: Ingo Molnar Cc: Lee Schermerhorn --- drivers/block/drbd/drbd_bitmap.c | 1 + drivers/block/drbd/drbd_proc.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block/drbd') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index b61057e..3d6f3d9 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "drbd_int.h" diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index df8ad96..be3374b 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include -- cgit v1.1 From b2b163dd47024e445410b72d0c5df6d819c14dfd Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 2 Apr 2010 08:40:33 +0200 Subject: drbd: lc_element_by_index() never returns NULL Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_actlog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block/drbd') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 43e57f39..df01899 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -1336,7 +1336,7 @@ int drbd_rs_del_all(struct drbd_conf *mdev) /* ok, ->resync is there. */ for (i = 0; i < mdev->resync->nr_elements; i++) { e = lc_element_by_index(mdev->resync, i); - bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; + bm_ext = lc_entry(e, struct bm_extent, lce); if (bm_ext->lce.lc_number == LC_FREE) continue; if (bm_ext->lce.lc_number == mdev->resync_wenr) { -- cgit v1.1