diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-17 16:03:12 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-17 16:03:12 -0800 |
commit | 57666509b70030a9483d13222bfec8eec5db07df (patch) | |
tree | 1e0021c2aabc2ce8832e8c816e2aa94b0b77a323 /fs/ceph/caps.c | |
parent | 87c31b39abcb6fb6bd7d111200c9627a594bf6a9 (diff) | |
parent | 0aeff37abada9f8c08d2b10481a43d3ae406c823 (diff) | |
download | op-kernel-dev-57666509b70030a9483d13222bfec8eec5db07df.zip op-kernel-dev-57666509b70030a9483d13222bfec8eec5db07df.tar.gz |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull ceph updates from Sage Weil:
"The big item here is support for inline data for CephFS and for
message signatures from Zheng. There are also several bug fixes,
including interrupted flock request handling, 0-length xattrs, mksnap,
cached readdir results, and a message version compat field. Finally
there are several cleanups from Ilya, Dan, and Markus.
Note that there is another series coming soon that fixes some bugs in
the RBD 'lingering' requests, but it isn't quite ready yet"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (27 commits)
ceph: fix setting empty extended attribute
ceph: fix mksnap crash
ceph: do_sync is never initialized
libceph: fixup includes in pagelist.h
ceph: support inline data feature
ceph: flush inline version
ceph: convert inline data to normal data before data write
ceph: sync read inline data
ceph: fetch inline data when getting Fcr cap refs
ceph: use getattr request to fetch inline data
ceph: add inline data to pagecache
ceph: parse inline data in MClientReply and MClientCaps
libceph: specify position of extent operation
libceph: add CREATE osd operation support
libceph: add SETXATTR/CMPXATTR osd operations support
rbd: don't treat CEPH_OSD_OP_DELETE as extent op
ceph: remove unused stringification macros
libceph: require cephx message signature by default
ceph: introduce global empty snap context
ceph: message versioning fixes
...
Diffstat (limited to 'fs/ceph/caps.c')
-rw-r--r-- | fs/ceph/caps.c | 132 |
1 files changed, 102 insertions, 30 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index cefca66..b93c631 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -975,10 +975,12 @@ static int send_cap_msg(struct ceph_mds_session *session, kuid_t uid, kgid_t gid, umode_t mode, u64 xattr_version, struct ceph_buffer *xattrs_buf, - u64 follows) + u64 follows, bool inline_data) { struct ceph_mds_caps *fc; struct ceph_msg *msg; + void *p; + size_t extra_len; dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s" " seq %u/%u mseq %u follows %lld size %llu/%llu" @@ -988,7 +990,10 @@ static int send_cap_msg(struct ceph_mds_session *session, seq, issue_seq, mseq, follows, size, max_size, xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0); - msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS, false); + /* flock buffer size + inline version + inline data size */ + extra_len = 4 + 8 + 4; + msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc) + extra_len, + GFP_NOFS, false); if (!msg) return -ENOMEM; @@ -1020,6 +1025,14 @@ static int send_cap_msg(struct ceph_mds_session *session, fc->gid = cpu_to_le32(from_kgid(&init_user_ns, gid)); fc->mode = cpu_to_le32(mode); + p = fc + 1; + /* flock buffer size */ + ceph_encode_32(&p, 0); + /* inline version */ + ceph_encode_64(&p, inline_data ? 0 : CEPH_INLINE_NONE); + /* inline data size */ + ceph_encode_32(&p, 0); + fc->xattr_version = cpu_to_le64(xattr_version); if (xattrs_buf) { msg->middle = ceph_buffer_get(xattrs_buf); @@ -1126,6 +1139,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, u64 flush_tid = 0; int i; int ret; + bool inline_data; held = cap->issued | cap->implemented; revoking = cap->implemented & ~cap->issued; @@ -1209,13 +1223,15 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, xattr_version = ci->i_xattrs.version; } + inline_data = ci->i_inline_version != CEPH_INLINE_NONE; + spin_unlock(&ci->i_ceph_lock); ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id, op, keep, want, flushing, seq, flush_tid, issue_seq, mseq, size, max_size, &mtime, &atime, time_warp_seq, uid, gid, mode, xattr_version, xattr_blob, - follows); + follows, inline_data); if (ret < 0) { dout("error sending cap msg, must requeue %p\n", inode); delayed = 1; @@ -1336,7 +1352,7 @@ retry: capsnap->time_warp_seq, capsnap->uid, capsnap->gid, capsnap->mode, capsnap->xattr_version, capsnap->xattr_blob, - capsnap->follows); + capsnap->follows, capsnap->inline_data); next_follows = capsnap->follows + 1; ceph_put_cap_snap(capsnap); @@ -2057,15 +2073,17 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got) * requested from the MDS. */ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, - int *got, loff_t endoff, int *check_max, int *err) + loff_t endoff, int *got, struct page **pinned_page, + int *check_max, int *err) { struct inode *inode = &ci->vfs_inode; int ret = 0; - int have, implemented; + int have, implemented, _got = 0; int file_wanted; dout("get_cap_refs %p need %s want %s\n", inode, ceph_cap_string(need), ceph_cap_string(want)); +again: spin_lock(&ci->i_ceph_lock); /* make sure file is actually open */ @@ -2075,7 +2093,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, ceph_cap_string(need), ceph_cap_string(file_wanted)); *err = -EBADF; ret = 1; - goto out; + goto out_unlock; } /* finish pending truncate */ @@ -2095,7 +2113,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, *check_max = 1; ret = 1; } - goto out; + goto out_unlock; } /* * If a sync write is in progress, we must wait, so that we @@ -2103,7 +2121,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, */ if (__ceph_have_pending_cap_snap(ci)) { dout("get_cap_refs %p cap_snap_pending\n", inode); - goto out; + goto out_unlock; } } @@ -2120,18 +2138,50 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, inode, ceph_cap_string(have), ceph_cap_string(not), ceph_cap_string(revoking)); if ((revoking & not) == 0) { - *got = need | (have & want); - __take_cap_refs(ci, *got); + _got = need | (have & want); + __take_cap_refs(ci, _got); ret = 1; } } else { dout("get_cap_refs %p have %s needed %s\n", inode, ceph_cap_string(have), ceph_cap_string(need)); } -out: +out_unlock: spin_unlock(&ci->i_ceph_lock); + + if (ci->i_inline_version != CEPH_INLINE_NONE && + (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) && + i_size_read(inode) > 0) { + int ret1; + struct page *page = find_get_page(inode->i_mapping, 0); + if (page) { + if (PageUptodate(page)) { + *pinned_page = page; + goto out; + } + page_cache_release(page); + } + /* + * drop cap refs first because getattr while holding + * caps refs can cause deadlock. + */ + ceph_put_cap_refs(ci, _got); + _got = 0; + + /* getattr request will bring inline data into page cache */ + ret1 = __ceph_do_getattr(inode, NULL, + CEPH_STAT_CAP_INLINE_DATA, true); + if (ret1 >= 0) { + ret = 0; + goto again; + } + *err = ret1; + ret = 1; + } +out: dout("get_cap_refs %p ret %d got %s\n", inode, - ret, ceph_cap_string(*got)); + ret, ceph_cap_string(_got)); + *got = _got; return ret; } @@ -2168,8 +2218,8 @@ static void check_max_size(struct inode *inode, loff_t endoff) * due to a small max_size, make sure we check_max_size (and possibly * ask the mds) so we don't get hung up indefinitely. */ -int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, int *got, - loff_t endoff) +int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, + loff_t endoff, int *got, struct page **pinned_page) { int check_max, ret, err; @@ -2179,8 +2229,8 @@ retry: check_max = 0; err = 0; ret = wait_event_interruptible(ci->i_cap_wq, - try_get_cap_refs(ci, need, want, - got, endoff, + try_get_cap_refs(ci, need, want, endoff, + got, pinned_page, &check_max, &err)); if (err) ret = err; @@ -2383,6 +2433,8 @@ static void invalidate_aliases(struct inode *inode) static void handle_cap_grant(struct ceph_mds_client *mdsc, struct inode *inode, struct ceph_mds_caps *grant, void *snaptrace, int snaptrace_len, + u64 inline_version, + void *inline_data, int inline_len, struct ceph_buffer *xattr_buf, struct ceph_mds_session *session, struct ceph_cap *cap, int issued) @@ -2403,6 +2455,7 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, bool queue_invalidate = false; bool queue_revalidate = false; bool deleted_inode = false; + bool fill_inline = false; dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", inode, cap, mds, seq, ceph_cap_string(newcaps)); @@ -2576,6 +2629,13 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, } BUG_ON(cap->issued & ~cap->implemented); + if (inline_version > 0 && inline_version >= ci->i_inline_version) { + ci->i_inline_version = inline_version; + if (ci->i_inline_version != CEPH_INLINE_NONE && + (newcaps & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO))) + fill_inline = true; + } + spin_unlock(&ci->i_ceph_lock); if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) { @@ -2589,6 +2649,9 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, wake = true; } + if (fill_inline) + ceph_fill_inline_data(inode, NULL, inline_data, inline_len); + if (queue_trunc) { ceph_queue_vmtruncate(inode); ceph_queue_revalidate(inode); @@ -2996,11 +3059,12 @@ void ceph_handle_caps(struct ceph_mds_session *session, u64 cap_id; u64 size, max_size; u64 tid; + u64 inline_version = 0; + void *inline_data = NULL; + u32 inline_len = 0; void *snaptrace; size_t snaptrace_len; - void *flock; - void *end; - u32 flock_len; + void *p, *end; dout("handle_caps from mds%d\n", mds); @@ -3021,30 +3085,37 @@ void ceph_handle_caps(struct ceph_mds_session *session, snaptrace = h + 1; snaptrace_len = le32_to_cpu(h->snap_trace_len); + p = snaptrace + snaptrace_len; if (le16_to_cpu(msg->hdr.version) >= 2) { - void *p = snaptrace + snaptrace_len; + u32 flock_len; ceph_decode_32_safe(&p, end, flock_len, bad); if (p + flock_len > end) goto bad; - flock = p; - } else { - flock = NULL; - flock_len = 0; + p += flock_len; } if (le16_to_cpu(msg->hdr.version) >= 3) { if (op == CEPH_CAP_OP_IMPORT) { - void *p = flock + flock_len; if (p + sizeof(*peer) > end) goto bad; peer = p; + p += sizeof(*peer); } else if (op == CEPH_CAP_OP_EXPORT) { /* recorded in unused fields */ peer = (void *)&h->size; } } + if (le16_to_cpu(msg->hdr.version) >= 4) { + ceph_decode_64_safe(&p, end, inline_version, bad); + ceph_decode_32_safe(&p, end, inline_len, bad); + if (p + inline_len > end) + goto bad; + inline_data = p; + p += inline_len; + } + /* lookup ino */ inode = ceph_find_inode(sb, vino); ci = ceph_inode(inode); @@ -3085,6 +3156,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, handle_cap_import(mdsc, inode, h, peer, session, &cap, &issued); handle_cap_grant(mdsc, inode, h, snaptrace, snaptrace_len, + inline_version, inline_data, inline_len, msg->middle, session, cap, issued); goto done_unlocked; } @@ -3105,8 +3177,9 @@ void ceph_handle_caps(struct ceph_mds_session *session, case CEPH_CAP_OP_GRANT: __ceph_caps_issued(ci, &issued); issued |= __ceph_caps_dirty(ci); - handle_cap_grant(mdsc, inode, h, NULL, 0, msg->middle, - session, cap, issued); + handle_cap_grant(mdsc, inode, h, NULL, 0, + inline_version, inline_data, inline_len, + msg->middle, session, cap, issued); goto done_unlocked; case CEPH_CAP_OP_FLUSH_ACK: @@ -3137,8 +3210,7 @@ flush_cap_releases: done: mutex_unlock(&session->s_mutex); done_unlocked: - if (inode) - iput(inode); + iput(inode); return; bad: |