diff options
author | Ilya Dryomov <ilya.dryomov@inktank.com> | 2014-01-27 17:40:19 +0200 |
---|---|---|
committer | Ilya Dryomov <ilya.dryomov@inktank.com> | 2014-01-27 23:57:45 +0200 |
commit | 17a13e4028e6ad7ded079cf32370c47bd0e0fc07 (patch) | |
tree | b09232ed5e1037f4ff9cfd3c60902dd0c0d61973 | |
parent | ce7f6a2790464047199f54b66420243d433142bd (diff) | |
download | op-kernel-dev-17a13e4028e6ad7ded079cf32370c47bd0e0fc07.zip op-kernel-dev-17a13e4028e6ad7ded079cf32370c47bd0e0fc07.tar.gz |
libceph: follow {read,write}_tier fields on osd request submission
Overwrite ceph_osd_request::r_oloc.pool with read_tier for read ops and
write_tier for write and read+write ops (aka basic tiering support).
{read,write}_tier are part of pg_pool_t since v9. This commit bumps
our pg_pool_t decode compat version from v7 to v9, all new fields
except for {read,write}_tier are ignored.
Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Sage Weil <sage@inktank.com>
-rw-r--r-- | include/linux/ceph/osdmap.h | 2 | ||||
-rw-r--r-- | net/ceph/osd_client.c | 30 | ||||
-rw-r--r-- | net/ceph/osdmap.c | 28 |
3 files changed, 55 insertions, 5 deletions
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 7f894a6..49ff69f 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -35,6 +35,8 @@ struct ceph_pg_pool_info { u8 object_hash; u32 pg_num, pgp_num; int pg_num_mask, pgp_num_mask; + s64 read_tier; + s64 write_tier; /* wins for read+write ops */ u64 flags; char *name; }; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 10360de..0eb009e 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1250,6 +1250,32 @@ static bool __req_should_be_paused(struct ceph_osd_client *osdc, } /* + * Calculate mapping of a request to a PG. Takes tiering into account. + */ +static int __calc_request_pg(struct ceph_osdmap *osdmap, + struct ceph_osd_request *req, + struct ceph_pg *pg_out) +{ + if ((req->r_flags & CEPH_OSD_FLAG_IGNORE_OVERLAY) == 0) { + struct ceph_pg_pool_info *pi; + + pi = ceph_pg_pool_by_id(osdmap, req->r_oloc.pool); + if (pi) { + if ((req->r_flags & CEPH_OSD_FLAG_READ) && + pi->read_tier >= 0) + req->r_oloc.pool = pi->read_tier; + if ((req->r_flags & CEPH_OSD_FLAG_WRITE) && + pi->write_tier >= 0) + req->r_oloc.pool = pi->write_tier; + } + /* !pi is caught in ceph_oloc_oid_to_pg() */ + } + + return ceph_oloc_oid_to_pg(osdmap, &req->r_oloc, + &req->r_oid, pg_out); +} + +/* * Pick an osd (the first 'up' osd in the pg), allocate the osd struct * (as needed), and set the request r_osd appropriately. If there is * no up osd, set r_osd to NULL. Move the request to the appropriate list @@ -1269,8 +1295,8 @@ static int __map_request(struct ceph_osd_client *osdc, bool was_paused; dout("map_request %p tid %lld\n", req, req->r_tid); - err = ceph_oloc_oid_to_pg(osdc->osdmap, &req->r_oloc, &req->r_oid, - &pgid); + + err = __calc_request_pg(osdc->osdmap, req, &pgid); if (err) { list_move(&req->r_req_lru_item, &osdc->req_notarget); return err; diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index d69d2355..aade4a5 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -519,8 +519,8 @@ static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) pr_warning("got v %d < 5 cv %d of ceph_pg_pool\n", ev, cv); return -EINVAL; } - if (cv > 7) { - pr_warning("got v %d cv %d > 7 of ceph_pg_pool\n", ev, cv); + if (cv > 9) { + pr_warning("got v %d cv %d > 9 of ceph_pg_pool\n", ev, cv); return -EINVAL; } len = ceph_decode_32(p); @@ -548,12 +548,34 @@ static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) *p += len; } - /* skip removed snaps */ + /* skip removed_snaps */ num = ceph_decode_32(p); *p += num * (8 + 8); *p += 8; /* skip auid */ pi->flags = ceph_decode_64(p); + *p += 4; /* skip crash_replay_interval */ + + if (ev >= 7) + *p += 1; /* skip min_size */ + + if (ev >= 8) + *p += 8 + 8; /* skip quota_max_* */ + + if (ev >= 9) { + /* skip tiers */ + num = ceph_decode_32(p); + *p += num * 8; + + *p += 8; /* skip tier_of */ + *p += 1; /* skip cache_mode */ + + pi->read_tier = ceph_decode_64(p); + pi->write_tier = ceph_decode_64(p); + } else { + pi->read_tier = -1; + pi->write_tier = -1; + } /* ignore the rest */ |