From cd9d9f5df6098c50726200d4185e9e8da32785b3 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 4 Apr 2012 13:35:44 -0500 Subject: rbd: don't hold spinlock during messenger flush A recent change made changes to the rbd_client_list be protected by a spinlock. Unfortunately in rbd_put_client(), the lock is taken before possibly dropping the last reference to an rbd_client, and on the last reference that eventually calls flush_workqueue() which can sleep. The problem was flagged by a debug spinlock warning: BUG: spinlock wrong CPU on CPU#3, rbd/27814 The solution is to move the spinlock acquisition and release inside rbd_client_release(), which is the spot where it's really needed for protecting the removal of the rbd_client from the client list. Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- drivers/block/rbd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 013c7a5..c1f7701 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -450,7 +450,9 @@ static void rbd_client_release(struct kref *kref) struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref); dout("rbd_release_client %p\n", rbdc); + spin_lock(&rbd_client_list_lock); list_del(&rbdc->node); + spin_unlock(&rbd_client_list_lock); ceph_destroy_client(rbdc->client); kfree(rbdc->rbd_opts); @@ -463,9 +465,7 @@ static void rbd_client_release(struct kref *kref) */ static void rbd_put_client(struct rbd_device *rbd_dev) { - spin_lock(&rbd_client_list_lock); kref_put(&rbd_dev->rbd_client->kref, rbd_client_release); - spin_unlock(&rbd_client_list_lock); rbd_dev->rbd_client = NULL; } -- cgit v1.1 From 3469ac1aa3a2f1e2586a412923c414779a0af854 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 7 May 2012 15:33:36 -0700 Subject: ceph: drop support for preferred_osd pgs This was an ill-conceived feature that has been removed from Ceph. Do this gracefully: - reject attempts to specify a preferred_osd via the ioctl - stop exposing this information via virtual xattrs - always fill in -1 for requests, in case we talk to an older server - don't calculate preferred_osd placements/pgids Reviewed-by: Alex Elder Signed-off-by: Sage Weil --- drivers/block/rbd.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index c1f7701..a67fa63 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -935,7 +935,6 @@ static int rbd_do_request(struct request *rq, layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); layout->fl_stripe_count = cpu_to_le32(1); layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); - layout->fl_pg_preferred = cpu_to_le32(-1); layout->fl_pg_pool = cpu_to_le32(dev->poolid); ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, req, ops); -- cgit v1.1 From f8ad495a8a0277b88c59bf38319e5e944aaf5a4a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 20 Apr 2012 15:49:44 -0500 Subject: rbd: use gfp_flags parameter in rbd_header_from_disk() We should use the gfp_flags that the caller specified instead of GFP_KERNEL here. There is only one caller and it uses GFP_KERNEL, so this change is just a cleanup and doesn't change how the code works. Signed-off-by: Dan Carpenter Reviewed-by: Alex Elder --- drivers/block/rbd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index a67fa63..ca59d4d 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -506,11 +506,11 @@ static int rbd_header_from_disk(struct rbd_image_header *header, header->snap_names_len = le64_to_cpu(ondisk->snap_names_len); if (snap_count) { header->snap_names = kmalloc(header->snap_names_len, - GFP_KERNEL); + gfp_flags); if (!header->snap_names) goto err_snapc; header->snap_sizes = kmalloc(snap_count * sizeof(u64), - GFP_KERNEL); + gfp_flags); if (!header->snap_sizes) goto err_names; } else { -- cgit v1.1 From 50f7c4c967d0b5acd8e7ba6ab654dc4a7ac869ac Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Fri, 20 Apr 2012 15:49:44 -0500 Subject: rbd: fix integer overflow in rbd_header_from_disk() ondisk->snap_count is read from disk via rbd_req_sync_read() and thus needs validation. Otherwise, a bogus `snap_count' could overflow the kmalloc() size, leading to memory corruption. Also use `u32' consistently for `snap_count'. [elder@dreamhost.com: changed to use UINT_MAX rather than ULONG_MAX] Signed-off-by: Xi Wang Reviewed-by: Alex Elder --- drivers/block/rbd.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index ca59d4d..a75fe93 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -487,16 +487,18 @@ static void rbd_coll_release(struct kref *kref) */ static int rbd_header_from_disk(struct rbd_image_header *header, struct rbd_image_header_ondisk *ondisk, - int allocated_snaps, + u32 allocated_snaps, gfp_t gfp_flags) { - int i; - u32 snap_count; + u32 i, snap_count; if (memcmp(ondisk, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT))) return -ENXIO; snap_count = le32_to_cpu(ondisk->snap_count); + if (snap_count > (UINT_MAX - sizeof(struct ceph_snap_context)) + / sizeof (*ondisk)) + return -EINVAL; header->snapc = kmalloc(sizeof(struct ceph_snap_context) + snap_count * sizeof (*ondisk), gfp_flags); @@ -1591,7 +1593,7 @@ static int rbd_read_header(struct rbd_device *rbd_dev, { ssize_t rc; struct rbd_image_header_ondisk *dh; - int snap_count = 0; + u32 snap_count = 0; u64 ver; size_t len; -- cgit v1.1 From 403f24d3d51760a8b9368d595fa5f48c309f1a0f Mon Sep 17 00:00:00 2001 From: Josh Durgin Date: Mon, 5 Dec 2011 10:47:13 -0800 Subject: rbd: protect read of snapshot sequence number This is updated whenever a snapshot is added or deleted, and the snapc pointer is changed with every refresh of the header. Signed-off-by: Josh Durgin Reviewed-by: Alex Elder Reviewed-by: Yehuda Sadeh --- drivers/block/rbd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index a75fe93..5ab9f55 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1684,7 +1684,9 @@ static int rbd_header_add_snap(struct rbd_device *dev, if (ret < 0) return ret; - dev->header.snapc->seq = new_snapid; + down_write(&dev->header_rwsem); + dev->header.snapc->seq = new_snapid; + up_write(&dev->header_rwsem); return 0; bad: -- cgit v1.1 From 77dfe99fe3cb0b2b0545e19e2d57b7a9134ee3c0 Mon Sep 17 00:00:00 2001 From: Josh Durgin Date: Mon, 21 Nov 2011 13:04:42 -0800 Subject: rbd: store snapshot id instead of index When a device was open at a snapshot, and snapshots were deleted or added, data from the wrong snapshot could be read. Instead of assuming the snap context is constant, store the actual snap id when the device is initialized, and rely on the OSDs to signal an error if we try reading from a snapshot that was deleted. Signed-off-by: Josh Durgin Reviewed-by: Alex Elder Reviewed-by: Yehuda Sadeh --- drivers/block/rbd.c | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 5ab9f55..c1650bd 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -175,8 +175,7 @@ struct rbd_device { /* protects updating the header */ struct rw_semaphore header_rwsem; char snap_name[RBD_MAX_SNAP_NAME_LEN]; - u32 cur_snap; /* index+1 of current snapshot within snap context - 0 - for the head */ + u64 snap_id; /* current snapshot id */ int read_only; struct list_head node; @@ -554,21 +553,6 @@ err_snapc: return -ENOMEM; } -static int snap_index(struct rbd_image_header *header, int snap_num) -{ - return header->total_snaps - snap_num; -} - -static u64 cur_snap_id(struct rbd_device *rbd_dev) -{ - struct rbd_image_header *header = &rbd_dev->header; - - if (!rbd_dev->cur_snap) - return 0; - - return header->snapc->snaps[snap_index(header, rbd_dev->cur_snap)]; -} - static int snap_by_name(struct rbd_image_header *header, const char *snap_name, u64 *seq, u64 *size) { @@ -607,7 +591,7 @@ static int rbd_header_set_snap(struct rbd_device *dev, u64 *size) snapc->seq = header->snap_seq; else snapc->seq = 0; - dev->cur_snap = 0; + dev->snap_id = CEPH_NOSNAP; dev->read_only = 0; if (size) *size = header->image_size; @@ -615,8 +599,7 @@ static int rbd_header_set_snap(struct rbd_device *dev, u64 *size) ret = snap_by_name(header, dev->snap_name, &snapc->seq, size); if (ret < 0) goto done; - - dev->cur_snap = header->total_snaps - ret; + dev->snap_id = snapc->seq; dev->read_only = 1; } @@ -1522,7 +1505,7 @@ static void rbd_rq_fn(struct request_queue *q) coll, cur_seg); else rbd_req_read(rq, rbd_dev, - cur_snap_id(rbd_dev), + rbd_dev->snap_id, ofs, op_size, bio, coll, cur_seg); @@ -1657,7 +1640,7 @@ static int rbd_header_add_snap(struct rbd_device *dev, struct ceph_mon_client *monc; /* we should create a snapshot only if we're pointing at the head */ - if (dev->cur_snap) + if (dev->snap_id != CEPH_NOSNAP) return -EINVAL; monc = &dev->rbd_client->client->monc; -- cgit v1.1 From b06e6a6be796bc365a19b0ac5176b553c13abf2f Mon Sep 17 00:00:00 2001 From: Josh Durgin Date: Mon, 21 Nov 2011 18:16:52 -0800 Subject: rbd: remove conditional snapid parameters The snapid parameters passed to rbd_do_op() and rbd_req_sync_op() are now always either a valid snapid or an explicit CEPH_NOSNAP. [elder@dreamhost.com: Rephrased the description] Signed-off-by: Josh Durgin Reviewed-by: Alex Elder Reviewed-by: Yehuda Sadeh --- drivers/block/rbd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index c1650bd..4a0a829f 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1152,7 +1152,7 @@ static int rbd_req_read(struct request *rq, int coll_index) { return rbd_do_op(rq, rbd_dev, NULL, - (snapid ? snapid : CEPH_NOSNAP), + snapid, CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, 2, @@ -1171,7 +1171,7 @@ static int rbd_req_sync_read(struct rbd_device *dev, u64 *ver) { return rbd_req_sync_op(dev, NULL, - (snapid ? snapid : CEPH_NOSNAP), + snapid, CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, -- cgit v1.1 From 3591538fb272d2432d112d47d7e0ddd0be4cded2 Mon Sep 17 00:00:00 2001 From: Josh Durgin Date: Mon, 5 Dec 2011 18:25:13 -0800 Subject: rbd: fix snapshot size type Snapshot sizes should be the same type as regular image sizes. This only affects their displayed size in sysfs, not the reported size of an actual block device sizes. Signed-off-by: Josh Durgin Reviewed-by: Alex Elder Reviewed-by: Yehuda Sadeh --- drivers/block/rbd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 4a0a829f..8359796 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -141,7 +141,7 @@ struct rbd_request { struct rbd_snap { struct device dev; const char *name; - size_t size; + u64 size; struct list_head node; u64 id; }; @@ -1935,7 +1935,7 @@ static ssize_t rbd_snap_size_show(struct device *dev, { struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); - return sprintf(buf, "%zd\n", snap->size); + return sprintf(buf, "%llu\n", (unsigned long long)snap->size); } static ssize_t rbd_snap_id_show(struct device *dev, @@ -1944,7 +1944,7 @@ static ssize_t rbd_snap_id_show(struct device *dev, { struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); - return sprintf(buf, "%llu\n", (unsigned long long) snap->id); + return sprintf(buf, "%llu\n", (unsigned long long)snap->id); } static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL); -- cgit v1.1 From 263c6ca007a6693fb724a24c5a55716c49d33573 Mon Sep 17 00:00:00 2001 From: Josh Durgin Date: Mon, 5 Dec 2011 10:43:42 -0800 Subject: rbd: rename __rbd_update_snaps to __rbd_refresh_header This function rereads the entire header and handles any changes in it, not just changes in snapshots. Signed-off-by: Josh Durgin Reviewed-by: Alex Elder Reviewed-by: Yehuda Sadeh --- drivers/block/rbd.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 8359796..65665c9 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -240,7 +240,7 @@ static void rbd_put_dev(struct rbd_device *rbd_dev) put_device(&rbd_dev->dev); } -static int __rbd_update_snaps(struct rbd_device *rbd_dev); +static int __rbd_refresh_header(struct rbd_device *rbd_dev); static int rbd_open(struct block_device *bdev, fmode_t mode) { @@ -1222,7 +1222,7 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) dout("rbd_watch_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name, notify_id, (int)opcode); mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - rc = __rbd_update_snaps(dev); + rc = __rbd_refresh_header(dev); mutex_unlock(&ctl_mutex); if (rc) pr_warning(RBD_DRV_NAME "%d got notification but failed to " @@ -1689,7 +1689,7 @@ static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev) /* * only read the first part of the ondisk header, without the snaps info */ -static int __rbd_update_snaps(struct rbd_device *rbd_dev) +static int __rbd_refresh_header(struct rbd_device *rbd_dev) { int ret; struct rbd_image_header h; @@ -1876,7 +1876,7 @@ static ssize_t rbd_image_refresh(struct device *dev, mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - rc = __rbd_update_snaps(rbd_dev); + rc = __rbd_refresh_header(rbd_dev); if (rc < 0) ret = rc; @@ -2159,7 +2159,7 @@ static int rbd_init_watch_dev(struct rbd_device *rbd_dev) rbd_dev->header.obj_version); if (ret == -ERANGE) { mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - rc = __rbd_update_snaps(rbd_dev); + rc = __rbd_refresh_header(rbd_dev); mutex_unlock(&ctl_mutex); if (rc < 0) return rc; @@ -2544,7 +2544,7 @@ static ssize_t rbd_snap_add(struct device *dev, if (ret < 0) goto err_unlock; - ret = __rbd_update_snaps(rbd_dev); + ret = __rbd_refresh_header(rbd_dev); if (ret < 0) goto err_unlock; -- cgit v1.1