diff options
Diffstat (limited to 'fs/ceph/mds_client.c')
-rw-r--r-- | fs/ceph/mds_client.c | 136 |
1 files changed, 107 insertions, 29 deletions
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index bad07c0..a92d3f5 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -7,6 +7,7 @@ #include <linux/sched.h> #include <linux/debugfs.h> #include <linux/seq_file.h> +#include <linux/utsname.h> #include "super.h" #include "mds_client.h" @@ -334,7 +335,7 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info) /* * sessions */ -static const char *session_state_name(int s) +const char *ceph_session_state_name(int s) { switch (s) { case CEPH_MDS_SESSION_NEW: return "new"; @@ -542,6 +543,8 @@ void ceph_mdsc_release_request(struct kref *kref) } kfree(req->r_path1); kfree(req->r_path2); + if (req->r_pagelist) + ceph_pagelist_release(req->r_pagelist); put_request_session(req); ceph_unreserve_caps(req->r_mdsc, &req->r_caps_reservation); kfree(req); @@ -812,6 +815,74 @@ static struct ceph_msg *create_session_msg(u32 op, u64 seq) h = msg->front.iov_base; h->op = cpu_to_le32(op); h->seq = cpu_to_le64(seq); + + return msg; +} + +/* + * session message, specialization for CEPH_SESSION_REQUEST_OPEN + * to include additional client metadata fields. + */ +static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u64 seq) +{ + struct ceph_msg *msg; + struct ceph_mds_session_head *h; + int i = -1; + int metadata_bytes = 0; + int metadata_key_count = 0; + struct ceph_options *opt = mdsc->fsc->client->options; + void *p; + + const char* metadata[3][2] = { + {"hostname", utsname()->nodename}, + {"entity_id", opt->name ? opt->name : ""}, + {NULL, NULL} + }; + + /* Calculate serialized length of metadata */ + metadata_bytes = 4; /* map length */ + for (i = 0; metadata[i][0] != NULL; ++i) { + metadata_bytes += 8 + strlen(metadata[i][0]) + + strlen(metadata[i][1]); + metadata_key_count++; + } + + /* Allocate the message */ + msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + metadata_bytes, + GFP_NOFS, false); + if (!msg) { + pr_err("create_session_msg ENOMEM creating msg\n"); + return NULL; + } + h = msg->front.iov_base; + h->op = cpu_to_le32(CEPH_SESSION_REQUEST_OPEN); + h->seq = cpu_to_le64(seq); + + /* + * Serialize client metadata into waiting buffer space, using + * the format that userspace expects for map<string, string> + */ + msg->hdr.version = 2; /* ClientSession messages with metadata are v2 */ + + /* The write pointer, following the session_head structure */ + p = msg->front.iov_base + sizeof(*h); + + /* Number of entries in the map */ + ceph_encode_32(&p, metadata_key_count); + + /* Two length-prefixed strings for each entry in the map */ + for (i = 0; metadata[i][0] != NULL; ++i) { + size_t const key_len = strlen(metadata[i][0]); + size_t const val_len = strlen(metadata[i][1]); + + ceph_encode_32(&p, key_len); + memcpy(p, metadata[i][0], key_len); + p += key_len; + ceph_encode_32(&p, val_len); + memcpy(p, metadata[i][1], val_len); + p += val_len; + } + return msg; } @@ -835,7 +906,7 @@ static int __open_session(struct ceph_mds_client *mdsc, session->s_renew_requested = jiffies; /* send connect message */ - msg = create_session_msg(CEPH_SESSION_REQUEST_OPEN, session->s_seq); + msg = create_session_open_msg(mdsc, session->s_seq); if (!msg) return -ENOMEM; ceph_con_send(&session->s_con, msg); @@ -1164,7 +1235,7 @@ static int send_flushmsg_ack(struct ceph_mds_client *mdsc, struct ceph_msg *msg; dout("send_flushmsg_ack to mds%d (%s)s seq %lld\n", - session->s_mds, session_state_name(session->s_state), seq); + session->s_mds, ceph_session_state_name(session->s_state), seq); msg = create_session_msg(CEPH_SESSION_FLUSHMSG_ACK, seq); if (!msg) return -ENOMEM; @@ -1216,7 +1287,7 @@ static int request_close_session(struct ceph_mds_client *mdsc, struct ceph_msg *msg; dout("request_close_session mds%d state %s seq %lld\n", - session->s_mds, session_state_name(session->s_state), + session->s_mds, ceph_session_state_name(session->s_state), session->s_seq); msg = create_session_msg(CEPH_SESSION_REQUEST_CLOSE, session->s_seq); if (!msg) @@ -1847,13 +1918,15 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, msg->front.iov_len = p - msg->front.iov_base; msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); - if (req->r_data_len) { - /* outbound data set only by ceph_sync_setxattr() */ - BUG_ON(!req->r_pages); - ceph_msg_data_add_pages(msg, req->r_pages, req->r_data_len, 0); + if (req->r_pagelist) { + struct ceph_pagelist *pagelist = req->r_pagelist; + atomic_inc(&pagelist->refcnt); + ceph_msg_data_add_pagelist(msg, pagelist); + msg->hdr.data_len = cpu_to_le32(pagelist->length); + } else { + msg->hdr.data_len = 0; } - msg->hdr.data_len = cpu_to_le32(req->r_data_len); msg->hdr.data_off = cpu_to_le16(0); out_free2: @@ -2007,7 +2080,7 @@ static int __do_request(struct ceph_mds_client *mdsc, req->r_session = get_session(session); dout("do_request mds%d session %p state %s\n", mds, session, - session_state_name(session->s_state)); + ceph_session_state_name(session->s_state)); if (session->s_state != CEPH_MDS_SESSION_OPEN && session->s_state != CEPH_MDS_SESSION_HUNG) { if (session->s_state == CEPH_MDS_SESSION_NEW || @@ -2078,6 +2151,7 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds) if (req->r_session && req->r_session->s_mds == mds) { dout(" kicking tid %llu\n", req->r_tid); + list_del_init(&req->r_wait); __do_request(mdsc, req); } } @@ -2444,7 +2518,7 @@ static void handle_session(struct ceph_mds_session *session, dout("handle_session mds%d %s %p state %s seq %llu\n", mds, ceph_session_op_name(op), session, - session_state_name(session->s_state), seq); + ceph_session_state_name(session->s_state), seq); if (session->s_state == CEPH_MDS_SESSION_HUNG) { session->s_state = CEPH_MDS_SESSION_OPEN; @@ -2471,9 +2545,8 @@ static void handle_session(struct ceph_mds_session *session, if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) pr_info("mds%d reconnect denied\n", session->s_mds); remove_session_caps(session); - wake = 1; /* for good measure */ + wake = 2; /* for good measure */ wake_up_all(&mdsc->session_close_wq); - kick_requests(mdsc, mds); break; case CEPH_SESSION_STALE: @@ -2503,6 +2576,8 @@ static void handle_session(struct ceph_mds_session *session, if (wake) { mutex_lock(&mdsc->mutex); __wake_requests(mdsc, &session->s_waiting); + if (wake == 2) + kick_requests(mdsc, mds); mutex_unlock(&mdsc->mutex); } return; @@ -2695,18 +2770,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, session->s_state = CEPH_MDS_SESSION_RECONNECTING; session->s_seq = 0; - ceph_con_close(&session->s_con); - ceph_con_open(&session->s_con, - CEPH_ENTITY_TYPE_MDS, mds, - ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); - - /* replay unsafe requests */ - replay_unsafe_requests(mdsc, session); - - down_read(&mdsc->snap_rwsem); - dout("session %p state %s\n", session, - session_state_name(session->s_state)); + ceph_session_state_name(session->s_state)); spin_lock(&session->s_gen_ttl_lock); session->s_cap_gen++; @@ -2723,6 +2788,19 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, discard_cap_releases(mdsc, session); spin_unlock(&session->s_cap_lock); + /* trim unused caps to reduce MDS's cache rejoin time */ + shrink_dcache_parent(mdsc->fsc->sb->s_root); + + ceph_con_close(&session->s_con); + ceph_con_open(&session->s_con, + CEPH_ENTITY_TYPE_MDS, mds, + ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); + + /* replay unsafe requests */ + replay_unsafe_requests(mdsc, session); + + down_read(&mdsc->snap_rwsem); + /* traverse this session's caps */ s_nr_caps = session->s_nr_caps; err = ceph_pagelist_encode_32(pagelist, s_nr_caps); @@ -2791,7 +2869,6 @@ fail: mutex_unlock(&session->s_mutex); fail_nomsg: ceph_pagelist_release(pagelist); - kfree(pagelist); fail_nopagelist: pr_err("error %d preparing reconnect for mds%d\n", err, mds); return; @@ -2827,7 +2904,7 @@ static void check_new_map(struct ceph_mds_client *mdsc, ceph_mdsmap_is_laggy(oldmap, i) ? " (laggy)" : "", ceph_mds_state_name(newstate), ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "", - session_state_name(s->s_state)); + ceph_session_state_name(s->s_state)); if (i >= newmap->m_max_mds || memcmp(ceph_mdsmap_get_addr(oldmap, i), @@ -2939,14 +3016,15 @@ static void handle_lease(struct ceph_mds_client *mdsc, if (dname.len != get_unaligned_le32(h+1)) goto bad; - mutex_lock(&session->s_mutex); - session->s_seq++; - /* lookup inode */ inode = ceph_find_inode(sb, vino); dout("handle_lease %s, ino %llx %p %.*s\n", ceph_lease_op_name(h->action), vino.ino, inode, dname.len, dname.name); + + mutex_lock(&session->s_mutex); + session->s_seq++; + if (inode == NULL) { dout("handle_lease no inode %llx\n", vino.ino); goto release; |