diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-26 15:53:16 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-26 15:53:16 -0700 |
commit | d5a38f6e4668b3110a66cd96ce2096184bf66def (patch) | |
tree | bd2209de25a343e7b01d143abce7bf774122227f /net/ceph/osd_client.c | |
parent | 698f415cf5756e320623bdb015a600945743377c (diff) | |
parent | 5ee61e95b6b33c82f6fa1382585faed66aa01245 (diff) | |
download | op-kernel-dev-d5a38f6e4668b3110a66cd96ce2096184bf66def.zip op-kernel-dev-d5a38f6e4668b3110a66cd96ce2096184bf66def.tar.gz |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph updates from Sage Weil:
"There is quite a bit here, including some overdue refactoring and
cleanup on the mon_client and osd_client code from Ilya, scattered
writeback support for CephFS and a pile of bug fixes from Zheng, and a
few random cleanups and fixes from others"
[ I already decided not to pull this because of it having been rebased
recently, but ended up changing my mind after all. Next time I'll
really hold people to it. Oh well. - Linus ]
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (34 commits)
libceph: use KMEM_CACHE macro
ceph: use kmem_cache_zalloc
rbd: use KMEM_CACHE macro
ceph: use lookup request to revalidate dentry
ceph: kill ceph_get_dentry_parent_inode()
ceph: fix security xattr deadlock
ceph: don't request vxattrs from MDS
ceph: fix mounting same fs multiple times
ceph: remove unnecessary NULL check
ceph: avoid updating directory inode's i_size accidentally
ceph: fix race during filling readdir cache
libceph: use sizeof_footer() more
ceph: kill ceph_empty_snapc
ceph: fix a wrong comparison
ceph: replace CURRENT_TIME by current_fs_time()
ceph: scattered page writeback
libceph: add helper that duplicates last extent operation
libceph: enable large, variable-sized OSD requests
libceph: osdc->req_mempool should be backed by a slab pool
libceph: make r_request msg_size calculation clearer
...
Diffstat (limited to 'net/ceph/osd_client.c')
-rw-r--r-- | net/ceph/osd_client.c | 109 |
1 files changed, 73 insertions, 36 deletions
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 5bc0537..32355d9d 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -338,9 +338,10 @@ static void ceph_osdc_release_request(struct kref *kref) ceph_put_snap_context(req->r_snapc); if (req->r_mempool) mempool_free(req, req->r_osdc->req_mempool); - else + else if (req->r_num_ops <= CEPH_OSD_SLAB_OPS) kmem_cache_free(ceph_osd_request_cache, req); - + else + kfree(req); } void ceph_osdc_get_request(struct ceph_osd_request *req) @@ -369,28 +370,22 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, struct ceph_msg *msg; size_t msg_size; - BUILD_BUG_ON(CEPH_OSD_MAX_OP > U16_MAX); - BUG_ON(num_ops > CEPH_OSD_MAX_OP); - - msg_size = 4 + 4 + 8 + 8 + 4+8; - msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */ - msg_size += 1 + 8 + 4 + 4; /* pg_t */ - msg_size += 4 + CEPH_MAX_OID_NAME_LEN; /* oid */ - msg_size += 2 + num_ops*sizeof(struct ceph_osd_op); - msg_size += 8; /* snapid */ - msg_size += 8; /* snap_seq */ - msg_size += 8 * (snapc ? snapc->num_snaps : 0); /* snaps */ - msg_size += 4; - if (use_mempool) { + BUG_ON(num_ops > CEPH_OSD_SLAB_OPS); req = mempool_alloc(osdc->req_mempool, gfp_flags); - memset(req, 0, sizeof(*req)); + } else if (num_ops <= CEPH_OSD_SLAB_OPS) { + req = kmem_cache_alloc(ceph_osd_request_cache, gfp_flags); } else { - req = kmem_cache_zalloc(ceph_osd_request_cache, gfp_flags); + BUG_ON(num_ops > CEPH_OSD_MAX_OPS); + req = kmalloc(sizeof(*req) + num_ops * sizeof(req->r_ops[0]), + gfp_flags); } - if (req == NULL) + if (unlikely(!req)) return NULL; + /* req only, each op is zeroed in _osd_req_op_init() */ + memset(req, 0, sizeof(*req)); + req->r_osdc = osdc; req->r_mempool = use_mempool; req->r_num_ops = num_ops; @@ -408,18 +403,36 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, req->r_base_oloc.pool = -1; req->r_target_oloc.pool = -1; + msg_size = OSD_OPREPLY_FRONT_LEN; + if (num_ops > CEPH_OSD_SLAB_OPS) { + /* ceph_osd_op and rval */ + msg_size += (num_ops - CEPH_OSD_SLAB_OPS) * + (sizeof(struct ceph_osd_op) + 4); + } + /* create reply message */ if (use_mempool) msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); else - msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, - OSD_OPREPLY_FRONT_LEN, gfp_flags, true); + msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, msg_size, + gfp_flags, true); if (!msg) { ceph_osdc_put_request(req); return NULL; } req->r_reply = msg; + msg_size = 4 + 4 + 4; /* client_inc, osdmap_epoch, flags */ + msg_size += 4 + 4 + 4 + 8; /* mtime, reassert_version */ + msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */ + msg_size += 1 + 8 + 4 + 4; /* pgid */ + msg_size += 4 + CEPH_MAX_OID_NAME_LEN; /* oid */ + msg_size += 2 + num_ops * sizeof(struct ceph_osd_op); + msg_size += 8; /* snapid */ + msg_size += 8; /* snap_seq */ + msg_size += 4 + 8 * (snapc ? snapc->num_snaps : 0); /* snaps */ + msg_size += 4; /* retry_attempt */ + /* create request message; allow space for oid */ if (use_mempool) msg = ceph_msgpool_get(&osdc->msgpool_op, 0); @@ -498,7 +511,7 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req, if (opcode == CEPH_OSD_OP_WRITE || opcode == CEPH_OSD_OP_WRITEFULL) payload_len += length; - op->payload_len = payload_len; + op->indata_len = payload_len; } EXPORT_SYMBOL(osd_req_op_extent_init); @@ -517,10 +530,32 @@ void osd_req_op_extent_update(struct ceph_osd_request *osd_req, BUG_ON(length > previous); op->extent.length = length; - op->payload_len -= previous - length; + op->indata_len -= previous - length; } EXPORT_SYMBOL(osd_req_op_extent_update); +void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req, + unsigned int which, u64 offset_inc) +{ + struct ceph_osd_req_op *op, *prev_op; + + BUG_ON(which + 1 >= osd_req->r_num_ops); + + prev_op = &osd_req->r_ops[which]; + op = _osd_req_op_init(osd_req, which + 1, prev_op->op, prev_op->flags); + /* dup previous one */ + op->indata_len = prev_op->indata_len; + op->outdata_len = prev_op->outdata_len; + op->extent = prev_op->extent; + /* adjust offset */ + op->extent.offset += offset_inc; + op->extent.length -= offset_inc; + + if (op->op == CEPH_OSD_OP_WRITE || op->op == CEPH_OSD_OP_WRITEFULL) + op->indata_len -= offset_inc; +} +EXPORT_SYMBOL(osd_req_op_extent_dup_last); + void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, const char *class, const char *method) { @@ -554,7 +589,7 @@ void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, op->cls.argc = 0; /* currently unused */ - op->payload_len = payload_len; + op->indata_len = payload_len; } EXPORT_SYMBOL(osd_req_op_cls_init); @@ -587,7 +622,7 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which, op->xattr.cmp_mode = cmp_mode; ceph_osd_data_pagelist_init(&op->xattr.osd_data, pagelist); - op->payload_len = payload_len; + op->indata_len = payload_len; return 0; } EXPORT_SYMBOL(osd_req_op_xattr_init); @@ -707,7 +742,7 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req, BUG_ON(osd_data->type == CEPH_OSD_DATA_TYPE_NONE); dst->cls.indata_len = cpu_to_le32(data_length); ceph_osdc_msg_data_add(req->r_request, osd_data); - src->payload_len += data_length; + src->indata_len += data_length; request_data_len += data_length; } osd_data = &src->cls.response_data; @@ -750,7 +785,7 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req, dst->op = cpu_to_le16(src->op); dst->flags = cpu_to_le32(src->flags); - dst->payload_len = cpu_to_le32(src->payload_len); + dst->payload_len = cpu_to_le32(src->indata_len); return request_data_len; } @@ -1810,7 +1845,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg) ceph_decode_need(&p, end, 4, bad_put); numops = ceph_decode_32(&p); - if (numops > CEPH_OSD_MAX_OP) + if (numops > CEPH_OSD_MAX_OPS) goto bad_put; if (numops != req->r_num_ops) goto bad_put; @@ -1821,7 +1856,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg) int len; len = le32_to_cpu(op->payload_len); - req->r_reply_op_len[i] = len; + req->r_ops[i].outdata_len = len; dout(" op %d has %d bytes\n", i, len); payload_len += len; p += sizeof(*op); @@ -1836,7 +1871,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg) ceph_decode_need(&p, end, 4 + numops * 4, bad_put); retry_attempt = ceph_decode_32(&p); for (i = 0; i < numops; i++) - req->r_reply_op_result[i] = ceph_decode_32(&p); + req->r_ops[i].rval = ceph_decode_32(&p); if (le16_to_cpu(msg->hdr.version) >= 6) { p += 8 + 4; /* skip replay_version */ @@ -2187,7 +2222,8 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) goto bad; done: downgrade_write(&osdc->map_sem); - ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch); + ceph_monc_got_map(&osdc->client->monc, CEPH_SUB_OSDMAP, + osdc->osdmap->epoch); /* * subscribe to subsequent osdmap updates if full to ensure @@ -2646,8 +2682,8 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) round_jiffies_relative(osdc->client->options->osd_idle_ttl)); err = -ENOMEM; - osdc->req_mempool = mempool_create_kmalloc_pool(10, - sizeof(struct ceph_osd_request)); + osdc->req_mempool = mempool_create_slab_pool(10, + ceph_osd_request_cache); if (!osdc->req_mempool) goto out; @@ -2782,11 +2818,12 @@ EXPORT_SYMBOL(ceph_osdc_writepages); int ceph_osdc_setup(void) { + size_t size = sizeof(struct ceph_osd_request) + + CEPH_OSD_SLAB_OPS * sizeof(struct ceph_osd_req_op); + BUG_ON(ceph_osd_request_cache); - ceph_osd_request_cache = kmem_cache_create("ceph_osd_request", - sizeof (struct ceph_osd_request), - __alignof__(struct ceph_osd_request), - 0, NULL); + ceph_osd_request_cache = kmem_cache_create("ceph_osd_request", size, + 0, 0, NULL); return ceph_osd_request_cache ? 0 : -ENOMEM; } |