From 79aec9844de339531f05b019644ccaf5dd777144 Mon Sep 17 00:00:00 2001 From: Sam Lang Date: Wed, 19 Dec 2012 09:44:23 -1000 Subject: ceph: Check for err on mds request in atomic_open The error returned by ceph_mdsc_do_request includes errors sending the request, errors on timeout, or any errors coming from the mds. If ceph_mdsc_do_request returns an error, the reply struct will most likely be bogus. We need to bail out and propogate the error instead of overwriting it. Signed-off-by: Sam Lang Reviewed-by: Sage Weil --- fs/ceph/file.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index d415096..2c71cbd 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -243,6 +243,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, err = ceph_mdsc_do_request(mdsc, (flags & (O_CREAT|O_TRUNC)) ? dir : NULL, req); + if (err) + goto out_err; + err = ceph_handle_snapdir(req, dentry, err); if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry) err = ceph_handle_notrace_create(dir, dentry); -- cgit v1.1 From 6e8575faa8fa680d59404a4d58d12190667be815 Mon Sep 17 00:00:00 2001 From: Sam Lang Date: Fri, 28 Dec 2012 09:56:46 -0800 Subject: ceph: Check for created flag in response from mds The mds now sends back a created inode if the create request performed the create. If the file already existed, no inode is returned in the reply. This allows ceph to set the created flag in atomic_open so that permissions are properly checked in the case that the file wasn't created by the create call to the mds. To ensure compability with previous kernels, a feature for sending back the inode in the create reply was added, so that the mds will only send back the inode if the client indicates it supports the feature. Signed-off-by: Sam Lang Reviewed-by: Sage Weil --- fs/ceph/file.c | 3 +++ fs/ceph/mds_client.c | 33 +++++++++++++++++++++++++++++++-- fs/ceph/mds_client.h | 6 ++++++ 3 files changed, 40 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 2c71cbd..22b5b71 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -266,6 +266,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, err = finish_no_open(file, dn); } else { dout("atomic_open finish_open on dn %p\n", dn); + if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) { + *opened |= FILE_CREATED; + } err = finish_open(file, dentry, ceph_open, opened); } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 9165eb8..d958420 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -233,6 +233,30 @@ bad: } /* + * parse create results + */ +static int parse_reply_info_create(void **p, void *end, + struct ceph_mds_reply_info_parsed *info, + int features) +{ + if (features & CEPH_FEATURE_REPLY_CREATE_INODE) { + if (*p == end) { + info->has_create_ino = false; + } else { + info->has_create_ino = true; + info->ino = ceph_decode_64(p); + } + } + + if (unlikely(*p != end)) + goto bad; + return 0; + +bad: + return -EIO; +} + +/* * parse extra results */ static int parse_reply_info_extra(void **p, void *end, @@ -241,8 +265,12 @@ static int parse_reply_info_extra(void **p, void *end, { if (info->head->op == CEPH_MDS_OP_GETFILELOCK) return parse_reply_info_filelock(p, end, info, features); - else + else if (info->head->op == CEPH_MDS_OP_READDIR) return parse_reply_info_dir(p, end, info, features); + else if (info->head->op == CEPH_MDS_OP_CREATE) + return parse_reply_info_create(p, end, info, features); + else + return -EIO; } /* @@ -2170,7 +2198,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) mutex_lock(&req->r_fill_mutex); err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); if (err == 0) { - if (result == 0 && req->r_op != CEPH_MDS_OP_GETFILELOCK && + if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR || + req->r_op == CEPH_MDS_OP_LSSNAP) && rinfo->dir_nr) ceph_readdir_prepopulate(req, req->r_session); ceph_unreserve_caps(mdsc, &req->r_caps_reservation); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index dd26846..567f7c6 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -74,6 +74,12 @@ struct ceph_mds_reply_info_parsed { struct ceph_mds_reply_info_in *dir_in; u8 dir_complete, dir_end; }; + + /* for create results */ + struct { + bool has_create_ino; + u64 ino; + }; }; /* encoded blob describing snapshot contexts for certain -- cgit v1.1 From 8a92a119b292012a9bd920b908c3e9f1c512291d Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 4 Jan 2013 14:28:07 +0800 Subject: ceph: move dirty inode to migrating list when clearing auth caps Signed-off-by: Yan, Zheng Reviewed-by: Sage Weil --- fs/ceph/caps.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index a1d9bb3..a9fe2d5 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -611,8 +611,16 @@ retry: if (flags & CEPH_CAP_FLAG_AUTH) ci->i_auth_cap = cap; - else if (ci->i_auth_cap == cap) + else if (ci->i_auth_cap == cap) { ci->i_auth_cap = NULL; + spin_lock(&mdsc->cap_dirty_lock); + if (!list_empty(&ci->i_dirty_item)) { + dout(" moving %p to cap_dirty_migrating\n", inode); + list_move(&ci->i_dirty_item, + &mdsc->cap_dirty_migrating); + } + spin_unlock(&mdsc->cap_dirty_lock); + } dout("add_cap inode %p (%llx.%llx) cap %p %s now %s seq %d mds%d\n", inode, ceph_vinop(inode), cap, ceph_cap_string(issued), -- cgit v1.1 From 395c312b9c535d57db122cbb5b7292223561d0b8 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 4 Jan 2013 14:37:57 +0800 Subject: ceph: allow revoking duplicated caps issued by non-auth MDS Allow revoking duplicated caps issued by non-auth MDS if these caps are also issued by auth MDS. Signed-off-by: Yan, Zheng Reviewed-by: Sage Weil --- fs/ceph/caps.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index a9fe2d5..76b1923 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1468,7 +1468,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, struct ceph_mds_client *mdsc = fsc->mdsc; struct inode *inode = &ci->vfs_inode; struct ceph_cap *cap; - int file_wanted, used; + int file_wanted, used, cap_used; int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */ int issued, implemented, want, retain, revoking, flushing = 0; int mds = -1; /* keep track of how far we've gone through i_caps list @@ -1571,9 +1571,14 @@ retry_locked: /* NOTE: no side-effects allowed, until we take s_mutex */ + cap_used = used; + if (ci->i_auth_cap && cap != ci->i_auth_cap) + cap_used &= ~ci->i_auth_cap->issued; + revoking = cap->implemented & ~cap->issued; - dout(" mds%d cap %p issued %s implemented %s revoking %s\n", + dout(" mds%d cap %p used %s issued %s implemented %s revoking %s\n", cap->mds, cap, ceph_cap_string(cap->issued), + ceph_cap_string(cap_used), ceph_cap_string(cap->implemented), ceph_cap_string(revoking)); @@ -1601,7 +1606,7 @@ retry_locked: } /* completed revocation? going down and there are no caps? */ - if (revoking && (revoking & used) == 0) { + if (revoking && (revoking & cap_used) == 0) { dout("completed revocation of %s\n", ceph_cap_string(cap->implemented & ~cap->issued)); goto ack; @@ -1678,8 +1683,8 @@ ack: sent++; /* __send_cap drops i_ceph_lock */ - delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, used, want, - retain, flushing, NULL); + delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, cap_used, + want, retain, flushing, NULL); goto retry; /* retake i_ceph_lock and restart our cap scan. */ } -- cgit v1.1 From 66f58691c5c820283dd7e4d6fe8649033ed43ceb Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 4 Jan 2013 14:45:18 +0800 Subject: ceph: allocate cap_release message when receiving cap import When client wants to release an imported cap, it's possible there is no reserved cap_release message in corresponding mds session. so __queue_cap_release causes kernel panic. Signed-off-by: Yan, Zheng Reviewed-by: Sage Weil --- fs/ceph/caps.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 76b1923..40b5bbe 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2833,6 +2833,9 @@ void ceph_handle_caps(struct ceph_mds_session *session, dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq, (unsigned)seq); + if (op == CEPH_CAP_OP_IMPORT) + ceph_add_cap_releases(mdsc, session); + /* lookup ino */ inode = ceph_find_inode(sb, vino); ci = ceph_inode(inode); -- cgit v1.1 From 390306c38dd43908f7f7730229999790a773d1d5 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 4 Jan 2013 15:30:10 +0800 Subject: ceph: check mds_wanted for imported cap The MDS may have incorrect wanted caps after importing caps. So the client should check the value mds has and send cap update if necessary. Signed-off-by: Yan, Zheng Reviewed-by: Sage Weil --- fs/ceph/caps.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 40b5bbe..1e1e020 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2429,7 +2429,9 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, dout("mds wanted %s -> %s\n", ceph_cap_string(le32_to_cpu(grant->wanted)), ceph_cap_string(wanted)); - grant->wanted = cpu_to_le32(wanted); + /* imported cap may not have correct mds_wanted */ + if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) + check_caps = 1; } cap->seq = seq; -- cgit v1.1 From e8afad656cbcd06d02a7bacd4b318fa0e2907de0 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 14 Nov 2012 09:38:19 -0600 Subject: libceph: pass length to ceph_calc_file_object_mapping() ceph_calc_file_object_mapping() takes (among other things) a "file" offset and length, and based on the layout, determines the object number ("bno") backing the affected portion of the file's data and the offset into that object where the desired range begins. It also computes the size that should be used for the request--either the amount requested or something less if that would exceed the end of the object. This patch changes the input length parameter in this function so it is used only for input. That is, the argument will be passed by value rather than by address, so the value provided won't get updated by the function. The value would only get updated if the length would surpass the current object, and in that case the value it got updated to would be exactly that returned in *oxlen. Only one of the two callers is affected by this change. Update ceph_calc_raw_layout() so it records any updated value. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- fs/ceph/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 36549a4..3b22150 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -194,7 +194,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) return -EFAULT; down_read(&osdc->map_sem); - r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, &len, + r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len, &dl.object_no, &dl.object_offset, &olen); if (r < 0) -- cgit v1.1 From 39b648d9ec7d4ab0b4362872c6284a12c582afa6 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 31 Jan 2013 11:53:05 -0800 Subject: ceph: remove 'ceph.layout' virtual xattr This has been deprecated since v3.3, 114fc474. Kill it. Signed-off-by: Sage Weil Reviewed-by: Sam Lang --- fs/ceph/xattr.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'fs') diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 2c2ae5b..c2048b1 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -123,13 +123,6 @@ static size_t ceph_vxattrcb_file_layout(struct ceph_inode_info *ci, char *val, static struct ceph_vxattr ceph_file_vxattrs[] = { XATTR_NAME_CEPH(file, layout), - /* The following extended attribute name is deprecated */ - { - .name = XATTR_CEPH_PREFIX "layout", - .name_size = sizeof (XATTR_CEPH_PREFIX "layout"), - .getxattr_cb = ceph_vxattrcb_file_layout, - .readonly = true, - }, { 0 } /* Required table terminator */ }; static size_t ceph_file_vxattrs_name_size; /* total size of all names */ -- cgit v1.1 From 8860147a01c4243f64f7d602dbf8342ca616ed45 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 31 Jan 2013 11:53:27 -0800 Subject: ceph: support hidden vxattrs Add ability to flag virtual xattrs as hidden, such that you can getxattr them but they do not appear in listxattr. Signed-off-by: Sage Weil Reviewed-by: Sam Lang --- fs/ceph/xattr.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index c2048b1..43063d0 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -29,7 +29,7 @@ struct ceph_vxattr { size_t name_size; /* strlen(name) + 1 (for '\0') */ size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val, size_t size); - bool readonly; + bool readonly, hidden; }; /* directories */ @@ -85,13 +85,14 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val, #define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name -#define XATTR_NAME_CEPH(_type, _name) \ - { \ - .name = CEPH_XATTR_NAME(_type, _name), \ - .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \ - .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \ - .readonly = true, \ - } +#define XATTR_NAME_CEPH(_type, _name) \ + { \ + .name = CEPH_XATTR_NAME(_type, _name), \ + .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \ + .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \ + .readonly = true, \ + .hidden = false, \ + } static struct ceph_vxattr ceph_dir_vxattrs[] = { XATTR_NAME_CEPH(dir, entries), @@ -157,7 +158,8 @@ static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs) size_t size = 0; for (vxattr = vxattrs; vxattr->name; vxattr++) - size += vxattr->name_size; + if (!vxattr->hidden) + size += vxattr->name_size; return size; } -- cgit v1.1 From 3adf654ddbc355c23d75c6684128d4b067a7b792 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 31 Jan 2013 11:53:41 -0800 Subject: ceph: pass unhandled ceph.* setxattrs through to MDS If we do not specifically understand a setxattr on a ceph.* virtual xattr, send it through to the MDS. This allows us to implement new functionality via the MDS without direct support on the client side. Signed-off-by: Sage Weil Reviewed-by: Sam Lang --- fs/ceph/xattr.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 43063d0..edc47de 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -777,6 +777,10 @@ int ceph_setxattr(struct dentry *dentry, const char *name, if (vxattr && vxattr->readonly) return -EOPNOTSUPP; + /* pass any unhandled ceph.* xattrs through to the MDS */ + if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN)) + goto do_sync_unlocked; + /* preallocate memory for xattr name, value, index node */ err = -ENOMEM; newname = kmemdup(name, name_len + 1, GFP_NOFS); @@ -833,6 +837,7 @@ retry: do_sync: spin_unlock(&ci->i_ceph_lock); +do_sync_unlocked: err = ceph_sync_setxattr(dentry, name, value, size, flags); out: kfree(newname); -- cgit v1.1 From d421acb1ad7dfa31b7463b67f1593714b0b727c3 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 20 Jan 2013 21:55:30 -0800 Subject: ceph: pass ceph.* removexattrs through to MDS If we do not explicitly recognized a vxattr (e.g., as readonly), pass the request through to the MDS and deal with it there. Signed-off-by: Sage Weil Reviewed-by: Sam Lang --- fs/ceph/xattr.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index edc47de..234270f 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -892,6 +892,10 @@ int ceph_removexattr(struct dentry *dentry, const char *name) if (vxattr && vxattr->readonly) return -EOPNOTSUPP; + /* pass any unhandled ceph.* xattrs through to the MDS */ + if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN)) + goto do_sync_unlocked; + err = -ENOMEM; spin_lock(&ci->i_ceph_lock); retry: @@ -931,6 +935,7 @@ retry: return err; do_sync: spin_unlock(&ci->i_ceph_lock); +do_sync_unlocked: err = ceph_send_removexattr(dentry, name); out: return err; -- cgit v1.1 From f36e4472969a78ae65e514b553e9a0feacb40a28 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 20 Jan 2013 21:59:29 -0800 Subject: ceph: add exists_cb to vxattr struct Allow for a callback to dynamically determine if a vxattr exists for the given inode. Signed-off-by: Sage Weil Reviewed-by: Sam Lang --- fs/ceph/xattr.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 234270f..06344da 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -30,6 +30,7 @@ struct ceph_vxattr { size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val, size_t size); bool readonly, hidden; + bool (*exists_cb)(struct ceph_inode_info *ci); }; /* directories */ @@ -92,6 +93,7 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val, .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \ .readonly = true, \ .hidden = false, \ + .exists_cb = NULL, \ } static struct ceph_vxattr ceph_dir_vxattrs[] = { -- cgit v1.1 From 0bee82fb4b8d49541fe474ed460d2b917f329568 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 20 Jan 2013 22:00:58 -0800 Subject: ceph: fix getxattr vxattr handling Change the vxattr handling for getxattr so that vxattrs are checked prior to any xattr content, and never after. Enforce vxattr existence via the exists_cb callback. Signed-off-by: Sage Weil Reviewed-by: Sam Lang --- fs/ceph/xattr.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 06344da..87b85f3 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -569,13 +569,17 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value, if (!ceph_is_valid_xattr(name)) return -ENODATA; - /* let's see if a virtual xattr was requested */ - vxattr = ceph_match_vxattr(inode, name); - spin_lock(&ci->i_ceph_lock); dout("getxattr %p ver=%lld index_ver=%lld\n", inode, ci->i_xattrs.version, ci->i_xattrs.index_version); + /* let's see if a virtual xattr was requested */ + vxattr = ceph_match_vxattr(inode, name); + if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) { + err = vxattr->getxattr_cb(ci, value, size); + goto out; + } + if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) && (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { goto get_xattr; @@ -589,11 +593,6 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value, spin_lock(&ci->i_ceph_lock); - if (vxattr && vxattr->readonly) { - err = vxattr->getxattr_cb(ci, value, size); - goto out; - } - err = __build_xattrs(inode); if (err < 0) goto out; @@ -601,11 +600,8 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value, get_xattr: err = -ENODATA; /* == ENOATTR */ xattr = __get_xattr(ci, name); - if (!xattr) { - if (vxattr) - err = vxattr->getxattr_cb(ci, value, size); + if (!xattr) goto out; - } err = -ERANGE; if (size && size < xattr->val_len) -- cgit v1.1 From b65917dd2700b7d12e25e2e0bbfd58eb3c932158 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 20 Jan 2013 22:02:39 -0800 Subject: ceph: fix listxattr handling for vxattrs Only include vxattrs in the result if they are not hidden and exist (as determined by the exists_cb callback). Note that the buffer size we return when 0 is passed in always includes vxattrs that *might* exist, forming an upper bound. Signed-off-by: Sage Weil Reviewed-by: Sam Lang --- fs/ceph/xattr.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 87b85f3..ec09ea5 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -657,23 +657,30 @@ list_xattr: vir_namelen = ceph_vxattrs_name_size(vxattrs); /* adding 1 byte per each variable due to the null termination */ - namelen = vir_namelen + ci->i_xattrs.names_size + ci->i_xattrs.count; + namelen = ci->i_xattrs.names_size + ci->i_xattrs.count; err = -ERANGE; - if (size && namelen > size) + if (size && vir_namelen + namelen > size) goto out; - err = namelen; + err = namelen + vir_namelen; if (size == 0) goto out; names = __copy_xattr_names(ci, names); /* virtual xattr names, too */ - if (vxattrs) + err = namelen; + if (vxattrs) { for (i = 0; vxattrs[i].name; i++) { - len = sprintf(names, "%s", vxattrs[i].name); - names += len + 1; + if (!vxattrs[i].hidden && + !(vxattrs[i].exists_cb && + !vxattrs[i].exists_cb(ci))) { + len = sprintf(names, "%s", vxattrs[i].name); + names += len + 1; + err += len + 1; + } } + } out: spin_unlock(&ci->i_ceph_lock); -- cgit v1.1 From 32ab0bd78d7d9235efb38ad5cba6a3a6b39a1da6 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sat, 19 Jan 2013 16:46:32 -0800 Subject: ceph: change ceph.file.layout.* implementation, content Implement a new method to generate the ceph.file.layout vxattr using the new framework. Use 'stripe_unit' instead of 'chunk_size'. Include pool name, either as a string or as an integer. Signed-off-by: Sage Weil Reviewed-by: Sam Lang --- fs/ceph/xattr.c | 67 +++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index ec09ea5..532c95a 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -33,6 +33,50 @@ struct ceph_vxattr { bool (*exists_cb)(struct ceph_inode_info *ci); }; +/* layouts */ + +static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci) +{ + size_t s; + char *p = (char *)&ci->i_layout; + + for (s = 0; s < sizeof(ci->i_layout); s++, p++) + if (*p) + return true; + return false; +} + +static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, + size_t size) +{ + int ret; + struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); + struct ceph_osd_client *osdc = &fsc->client->osdc; + s64 pool = ceph_file_layout_pg_pool(ci->i_layout); + const char *pool_name; + + dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode); + down_read(&osdc->map_sem); + pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); + if (pool_name) + ret = snprintf(val, size, + "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%s", + (unsigned long long)ceph_file_layout_su(ci->i_layout), + (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), + (unsigned long long)ceph_file_layout_object_size(ci->i_layout), + pool_name); + else + ret = snprintf(val, size, + "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld", + (unsigned long long)ceph_file_layout_su(ci->i_layout), + (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), + (unsigned long long)ceph_file_layout_object_size(ci->i_layout), + (unsigned long long)pool); + + up_read(&osdc->map_sem); + return ret; +} + /* directories */ static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val, @@ -84,6 +128,7 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val, (long)ci->i_rctime.tv_nsec); } + #define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name #define XATTR_NAME_CEPH(_type, _name) \ @@ -111,21 +156,15 @@ static size_t ceph_dir_vxattrs_name_size; /* total size of all names */ /* files */ -static size_t ceph_vxattrcb_file_layout(struct ceph_inode_info *ci, char *val, - size_t size) -{ - int ret; - - ret = snprintf(val, size, - "chunk_bytes=%lld\nstripe_count=%lld\nobject_size=%lld\n", - (unsigned long long)ceph_file_layout_su(ci->i_layout), - (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), - (unsigned long long)ceph_file_layout_object_size(ci->i_layout)); - return ret; -} - static struct ceph_vxattr ceph_file_vxattrs[] = { - XATTR_NAME_CEPH(file, layout), + { + .name = "ceph.file.layout", + .name_size = sizeof("ceph.file.layout"), + .getxattr_cb = ceph_vxattrcb_layout, + .readonly = false, + .hidden = false, + .exists_cb = ceph_vxattrcb_layout_exists, + }, { 0 } /* Required table terminator */ }; static size_t ceph_file_vxattrs_name_size; /* total size of all names */ -- cgit v1.1 From 1f08f2b056ea6c2e12f4e95e88949a882a996208 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 20 Jan 2013 22:07:12 -0800 Subject: ceph: add ceph.dir.layout vxattr This virtual xattr will only appear when there is a dir layout policy set on the directory. It can be set via setxattr and removed via removexattr (implemented by the MDS). Signed-off-by: Sage Weil Reviewed-by: Sam Lang --- fs/ceph/xattr.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs') diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 532c95a..f3c4fe7 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -142,6 +142,14 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val, } static struct ceph_vxattr ceph_dir_vxattrs[] = { + { + .name = "ceph.dir.layout", + .name_size = sizeof("ceph.dir.layout"), + .getxattr_cb = ceph_vxattrcb_layout, + .readonly = false, + .hidden = false, + .exists_cb = ceph_vxattrcb_layout_exists, + }, XATTR_NAME_CEPH(dir, entries), XATTR_NAME_CEPH(dir, files), XATTR_NAME_CEPH(dir, subdirs), -- cgit v1.1 From 695b711933689ea51af782760f4b1e2c6a42a631 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 20 Jan 2013 22:08:33 -0800 Subject: ceph: implement hidden per-field ceph.*.layout.* vxattrs Allow individual fields of the layout to be fetched via getxattr. The ceph.dir.layout.* vxattr with "disappear" if the exists_cb indicates there no dir layout set. Signed-off-by: Sage Weil Reviewed-by: Sam Lang --- fs/ceph/xattr.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'fs') diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index f3c4fe7..2135817 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -77,6 +77,46 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, return ret; } +static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci, + char *val, size_t size) +{ + return snprintf(val, size, "%lld", + (unsigned long long)ceph_file_layout_su(ci->i_layout)); +} + +static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci, + char *val, size_t size) +{ + return snprintf(val, size, "%lld", + (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout)); +} + +static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci, + char *val, size_t size) +{ + return snprintf(val, size, "%lld", + (unsigned long long)ceph_file_layout_object_size(ci->i_layout)); +} + +static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci, + char *val, size_t size) +{ + int ret; + struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); + struct ceph_osd_client *osdc = &fsc->client->osdc; + s64 pool = ceph_file_layout_pg_pool(ci->i_layout); + const char *pool_name; + + down_read(&osdc->map_sem); + pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); + if (pool_name) + ret = snprintf(val, size, "%s", pool_name); + else + ret = snprintf(val, size, "%lld", (unsigned long long)pool); + up_read(&osdc->map_sem); + return ret; +} + /* directories */ static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val, @@ -130,6 +170,8 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val, #define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name +#define CEPH_XATTR_NAME2(_type, _name, _name2) \ + XATTR_CEPH_PREFIX #_type "." #_name "." #_name2 #define XATTR_NAME_CEPH(_type, _name) \ { \ @@ -140,6 +182,15 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val, .hidden = false, \ .exists_cb = NULL, \ } +#define XATTR_LAYOUT_FIELD(_type, _name, _field) \ + { \ + .name = CEPH_XATTR_NAME2(_type, _name, _field), \ + .name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \ + .getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \ + .readonly = false, \ + .hidden = true, \ + .exists_cb = ceph_vxattrcb_layout_exists, \ + } static struct ceph_vxattr ceph_dir_vxattrs[] = { { @@ -150,6 +201,10 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = { .hidden = false, .exists_cb = ceph_vxattrcb_layout_exists, }, + XATTR_LAYOUT_FIELD(dir, layout, stripe_unit), + XATTR_LAYOUT_FIELD(dir, layout, stripe_count), + XATTR_LAYOUT_FIELD(dir, layout, object_size), + XATTR_LAYOUT_FIELD(dir, layout, pool), XATTR_NAME_CEPH(dir, entries), XATTR_NAME_CEPH(dir, files), XATTR_NAME_CEPH(dir, subdirs), @@ -173,6 +228,10 @@ static struct ceph_vxattr ceph_file_vxattrs[] = { .hidden = false, .exists_cb = ceph_vxattrcb_layout_exists, }, + XATTR_LAYOUT_FIELD(file, layout, stripe_unit), + XATTR_LAYOUT_FIELD(file, layout, stripe_count), + XATTR_LAYOUT_FIELD(file, layout, object_size), + XATTR_LAYOUT_FIELD(file, layout, pool), { 0 } /* Required table terminator */ }; static size_t ceph_file_vxattrs_name_size; /* total size of all names */ -- cgit v1.1 From 87f979d390f9ecfa3d0038a9f9a002a62f8a1895 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:29 -0600 Subject: ceph: kill ceph_osdc_writepages() "nofail" parameter There is only one caller of ceph_osdc_writepages(), and it always passes the value true as its "nofail" argument. Get rid of that argument and replace its use in ceph_osdc_writepages() with the constant value true. This and a number of cleanup patches that follow resolve: http://tracker.ceph.com/issues/4126 Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- fs/ceph/addr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 064d1a6..c7e401c 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -493,7 +493,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) page_off, len, ci->i_truncate_seq, ci->i_truncate_size, &inode->i_mtime, - &page, 1, 0, 0, true); + &page, 1, 0, 0); if (err < 0) { dout("writepage setting page/mapping error %d %p\n", err, page); SetPageError(page); -- cgit v1.1 From fbf8685fb155e12a9f4d4b966c7b3442ed557687 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:29 -0600 Subject: ceph: kill ceph_osdc_writepages() "dosync" parameter There is only one caller of ceph_osdc_writepages(), and it always passes 0 as its "dosync" argument. Get rid of that argument and replace its use in ceph_osdc_writepages() with 0. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- fs/ceph/addr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index c7e401c..bef5528 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -493,7 +493,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) page_off, len, ci->i_truncate_seq, ci->i_truncate_size, &inode->i_mtime, - &page, 1, 0, 0); + &page, 1, 0); if (err < 0) { dout("writepage setting page/mapping error %d %p\n", err, page); SetPageError(page); -- cgit v1.1 From 2480882611e3ab844563dd3d0a822227604ab8fe Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:29 -0600 Subject: ceph: kill ceph_osdc_writepages() "flags" parameter There is only one caller of ceph_osdc_writepages(), and it always passes 0 as its "flags" argument. Get rid of that argument and replace its use in ceph_osdc_writepages() with 0. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- fs/ceph/addr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index bef5528..8d3240d 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -492,8 +492,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) &ci->i_layout, snapc, page_off, len, ci->i_truncate_seq, ci->i_truncate_size, - &inode->i_mtime, - &page, 1, 0); + &inode->i_mtime, &page, 1); if (err < 0) { dout("writepage setting page/mapping error %d %p\n", err, page); SetPageError(page); -- cgit v1.1 From a3bea47e8bdd51d921e5b2045720d60140612c7c Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:29 -0600 Subject: ceph: kill ceph_osdc_new_request() "num_reply" parameter The "num_reply" parameter to ceph_osdc_new_request() is never used inside that function, so get rid of it. Note that ceph_sync_write() passes 2 for that argument, while all other callers pass 1. It doesn't matter, but perhaps someone should verify this doesn't indicate a problem. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- fs/ceph/addr.c | 4 ++-- fs/ceph/file.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 8d3240d..fc61371 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -315,7 +315,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, 0, ci->i_truncate_seq, ci->i_truncate_size, - NULL, false, 1, 0); + NULL, false, 0); if (IS_ERR(req)) return PTR_ERR(req); @@ -837,7 +837,7 @@ get_more_pages: snapc, do_sync, ci->i_truncate_seq, ci->i_truncate_size, - &inode->i_mtime, true, 1, 0); + &inode->i_mtime, true, 0); if (IS_ERR(req)) { rc = PTR_ERR(req); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index a1e5b81..9c4325e 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -541,7 +541,7 @@ more: ci->i_snap_realm->cached_context, do_sync, ci->i_truncate_seq, ci->i_truncate_size, - &mtime, false, 2, page_align); + &mtime, false, page_align); if (IS_ERR(req)) return PTR_ERR(req); -- cgit v1.1 From 0eb40bf65e2fcc1c23ed7c9a10cd0890ee59e68f Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:30 -0600 Subject: libceph: update ceph_mds_state_name() and ceph_mds_op_name() Update ceph_mds_state_name() and ceph_mds_op_name() to include the newly-added definitions in "ceph_fs.h", and to match its counterpart in the user space code. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- fs/ceph/strings.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c index cd5097d..89fa4a9 100644 --- a/fs/ceph/strings.c +++ b/fs/ceph/strings.c @@ -15,6 +15,7 @@ const char *ceph_mds_state_name(int s) case CEPH_MDS_STATE_BOOT: return "up:boot"; case CEPH_MDS_STATE_STANDBY: return "up:standby"; case CEPH_MDS_STATE_STANDBY_REPLAY: return "up:standby-replay"; + case CEPH_MDS_STATE_REPLAYONCE: return "up:oneshot-replay"; case CEPH_MDS_STATE_CREATING: return "up:creating"; case CEPH_MDS_STATE_STARTING: return "up:starting"; /* up and in */ @@ -50,10 +51,13 @@ const char *ceph_mds_op_name(int op) case CEPH_MDS_OP_LOOKUP: return "lookup"; case CEPH_MDS_OP_LOOKUPHASH: return "lookuphash"; case CEPH_MDS_OP_LOOKUPPARENT: return "lookupparent"; + case CEPH_MDS_OP_LOOKUPINO: return "lookupino"; case CEPH_MDS_OP_GETATTR: return "getattr"; case CEPH_MDS_OP_SETXATTR: return "setxattr"; case CEPH_MDS_OP_SETATTR: return "setattr"; case CEPH_MDS_OP_RMXATTR: return "rmxattr"; + case CEPH_MDS_OP_SETLAYOUT: return "setlayou"; + case CEPH_MDS_OP_SETDIRLAYOUT: return "setdirlayout"; case CEPH_MDS_OP_READDIR: return "readdir"; case CEPH_MDS_OP_MKNOD: return "mknod"; case CEPH_MDS_OP_LINK: return "link"; -- cgit v1.1 From 9e0eb85d5861d512759caf1301670b36d4c221ed Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 6 Feb 2013 13:11:38 -0600 Subject: ceph: remove a few bogus declarations There are three ceph page vector functions declared in "fs/ceph/super.h" that don't belong there. They're probably left over from some long-ago code reorganization. They're properly declared in "include/linux/ceph/libceph.h" so just delete the ones in "super.h". This and the next few commits resolve: http://tracker.ceph.com/issues/4053 Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- fs/ceph/super.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 66ebe72..9861cce 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -798,13 +798,7 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma); /* file.c */ extern const struct file_operations ceph_file_fops; extern const struct address_space_operations ceph_aops; -extern int ceph_copy_to_page_vector(struct page **pages, - const char *data, - loff_t off, size_t len); -extern int ceph_copy_from_page_vector(struct page **pages, - char *data, - loff_t off, size_t len); -extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); + extern int ceph_open(struct inode *inode, struct file *file); extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned flags, umode_t mode, -- cgit v1.1 From 92a49fb0f79f3300e6e50ddf56238e70678e4202 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 22 Feb 2013 15:31:00 -0800 Subject: ceph: fix statvfs fr_size Different versions of glibc are broken in different ways, but the short of it is that for the time being, frsize should == bsize, and be used as the multiple for the blocks, free, and available fields. This mirrors what is done for NFS. The previous reporting of the page size for frsize meant that newer glibc and df would report a very small value for the fs size. Fixes http://tracker.ceph.com/issues/3793. Signed-off-by: Sage Weil Reviewed-by: Greg Farnum --- fs/ceph/super.c | 7 ++++++- fs/ceph/super.h | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index e86aa994..9fe17c6c 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -71,8 +71,14 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) /* * express utilization in terms of large blocks to avoid * overflow on 32-bit machines. + * + * NOTE: for the time being, we make bsize == frsize to humor + * not-yet-ancient versions of glibc that are broken. + * Someday, we will probably want to report a real block + * size... whatever that may mean for a network file system! */ buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; + buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); @@ -80,7 +86,6 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_files = le64_to_cpu(st.num_objects); buf->f_ffree = -1; buf->f_namelen = NAME_MAX; - buf->f_frsize = PAGE_CACHE_SIZE; /* leave fsid little-endian, regardless of host endianness */ fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 9861cce..604526a 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -21,7 +21,7 @@ /* large granularity for statfs utilization stats to facilitate * large volume sizes on 32-bit machines. */ -#define CEPH_BLOCK_SHIFT 20 /* 1 MB */ +#define CEPH_BLOCK_SHIFT 22 /* 4 MB */ #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) #define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */ -- cgit v1.1 From 2c3dd4ff595e604cd4c4c51cff7a208f23148c2d Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 19 Feb 2013 12:25:56 -0600 Subject: ceph: eliminate sparse warnings in fs code Fix the causes for sparse warnings reported in the ceph file system code. Here there are only two (and they're sort of silly but they're easy to fix). This partially resolves: http://tracker.ceph.com/issues/4184 Reported-by: Fengguang Wu Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- fs/ceph/xattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 2135817..9b6b2b6d 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -213,7 +213,7 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = { XATTR_NAME_CEPH(dir, rsubdirs), XATTR_NAME_CEPH(dir, rbytes), XATTR_NAME_CEPH(dir, rctime), - { 0 } /* Required table terminator */ + { .name = NULL, 0 } /* Required table terminator */ }; static size_t ceph_dir_vxattrs_name_size; /* total size of all names */ @@ -232,7 +232,7 @@ static struct ceph_vxattr ceph_file_vxattrs[] = { XATTR_LAYOUT_FIELD(file, layout, stripe_count), XATTR_LAYOUT_FIELD(file, layout, object_size), XATTR_LAYOUT_FIELD(file, layout, pool), - { 0 } /* Required table terminator */ + { .name = NULL, 0 } /* Required table terminator */ }; static size_t ceph_file_vxattrs_name_size; /* total size of all names */ -- cgit v1.1 From 12979354a1d6ef25d86f381e4d5f9e103f29913a Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 8 Jan 2013 09:15:10 -0800 Subject: libceph: rename ceph_pg -> ceph_pg_v1 Rename the old version this type to distinguish it from the new version. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- fs/ceph/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 3b22150..e831436d 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -186,7 +186,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) u64 len = 1, olen; u64 tmp; struct ceph_object_layout ol; - struct ceph_pg pgid; + struct ceph_pg_v1 pgid; int r; /* copy and validate */ -- cgit v1.1 From 5b191d9914eb68257f47de9d5bfe099b77f0687c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sat, 23 Feb 2013 10:38:16 -0800 Subject: libceph: decode into cpu-native ceph_pg type Always decode data into our cpu-native ceph_pg type that has the correct field widths. Limit any remaining uses of ceph_pg_v1 to dealing with the legacy protocol. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- fs/ceph/ioctl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index e831436d..fb036ed 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -186,7 +186,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) u64 len = 1, olen; u64 tmp; struct ceph_object_layout ol; - struct ceph_pg_v1 pgid; + struct ceph_pg pgid; int r; /* copy and validate */ @@ -212,7 +212,8 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) ceph_calc_object_layout(&ol, dl.object_name, &ci->i_layout, osdc->osdmap); - pgid = ol.ol_pgid; + pgid.pool = le32_to_cpu(ol.ol_pgid.pool); + pgid.seed = le16_to_cpu(ol.ol_pgid.ps); dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid); if (dl.osd >= 0) { struct ceph_entity_addr *a = -- cgit v1.1 From 4f6a7e5ee1393ec4b243b39dac9f36992d161540 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sat, 23 Feb 2013 10:41:09 -0800 Subject: ceph: update support for PGID64, PGPOOL3, OSDENC protocol features Support (and require) the PGID64, PGPOOL3, and OSDENC protocol features. These have been present in ceph.git since v0.42, Feb 2012. Require these features to simplify support; nobody is running older userspace. Note that the new request and reply encoding is still not in place, so the new code is not yet functional. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- fs/ceph/mdsmap.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 73b7d44..0d3c924 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -59,6 +59,10 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) return ERR_PTR(-ENOMEM); ceph_decode_16_safe(p, end, version, bad); + if (version > 3) { + pr_warning("got mdsmap version %d > 3, failing", version); + goto bad; + } ceph_decode_need(p, end, 8*sizeof(u32) + sizeof(u64), bad); m->m_epoch = ceph_decode_32(p); @@ -144,13 +148,13 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) /* pg_pools */ ceph_decode_32_safe(p, end, n, bad); m->m_num_data_pg_pools = n; - m->m_data_pg_pools = kcalloc(n, sizeof(u32), GFP_NOFS); + m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS); if (!m->m_data_pg_pools) goto badmem; - ceph_decode_need(p, end, sizeof(u32)*(n+1), bad); + ceph_decode_need(p, end, sizeof(u64)*(n+1), bad); for (i = 0; i < n; i++) - m->m_data_pg_pools[i] = ceph_decode_32(p); - m->m_cas_pg_pool = ceph_decode_32(p); + m->m_data_pg_pools[i] = ceph_decode_64(p); + m->m_cas_pg_pool = ceph_decode_64(p); /* ok, we don't care about the rest. */ dout("mdsmap_decode success epoch %u\n", m->m_epoch); -- cgit v1.1 From 2169aea649c08374bec7d220a3b8f64712275356 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 25 Feb 2013 16:13:08 -0800 Subject: libceph: calculate placement based on the internal data types Instead of using the old ceph_object_layout struct, update our internal ceph_calc_object_layout method to use the ceph_pg type. This allows us to pass the full 32-bit precision of the pgid.seed to the callers. It also allows some callers to avoid reaching into the request structures for the struct ceph_object_layout fields. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- fs/ceph/ioctl.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index fb036ed..7d85991f 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -185,7 +185,6 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) &ceph_sb_to_client(inode->i_sb)->client->osdc; u64 len = 1, olen; u64 tmp; - struct ceph_object_layout ol; struct ceph_pg pgid; int r; @@ -209,11 +208,9 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx", ceph_ino(inode), dl.object_no); - ceph_calc_object_layout(&ol, dl.object_name, &ci->i_layout, + ceph_calc_object_layout(&pgid, dl.object_name, &ci->i_layout, osdc->osdmap); - pgid.pool = le32_to_cpu(ol.ol_pgid.pool); - pgid.seed = le16_to_cpu(ol.ol_pgid.ps); dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid); if (dl.osd >= 0) { struct ceph_entity_addr *a = -- cgit v1.1 From 1b83bef24c6746a146d39915a18fb5425f2facb0 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 25 Feb 2013 16:11:12 -0800 Subject: libceph: update osd request/reply encoding Use the new version of the encoding for osd requests and replies. In the process, update the way we are tracking request ops and reply lengths and results in the struct ceph_osd_request. Update the rbd and fs/ceph users appropriately. The main changes are: - we keep pointers into the request memory for fields we need to update each time the request is sent out over the wire - we keep information about the result in an array in the request struct where the users can easily get at it. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- fs/ceph/addr.c | 31 ++++++------------------------- 1 file changed, 6 insertions(+), 25 deletions(-) (limited to 'fs') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index fc61371..cfef3e0 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -236,16 +236,10 @@ static int ceph_readpage(struct file *filp, struct page *page) static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg) { struct inode *inode = req->r_inode; - struct ceph_osd_reply_head *replyhead; - int rc, bytes; + int rc = req->r_result; + int bytes = le32_to_cpu(msg->hdr.data_len); int i; - /* parse reply */ - replyhead = msg->front.iov_base; - WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); - rc = le32_to_cpu(replyhead->result); - bytes = le32_to_cpu(msg->hdr.data_len); - dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes); /* unlock all pages, zeroing any data we didn't read */ @@ -553,27 +547,18 @@ static void writepages_finish(struct ceph_osd_request *req, struct ceph_msg *msg) { struct inode *inode = req->r_inode; - struct ceph_osd_reply_head *replyhead; - struct ceph_osd_op *op; struct ceph_inode_info *ci = ceph_inode(inode); unsigned wrote; struct page *page; int i; struct ceph_snap_context *snapc = req->r_snapc; struct address_space *mapping = inode->i_mapping; - __s32 rc = -EIO; - u64 bytes = 0; + int rc = req->r_result; + u64 bytes = le64_to_cpu(req->r_request_ops[0].extent.length); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); long writeback_stat; unsigned issued = ceph_caps_issued(ci); - /* parse reply */ - replyhead = msg->front.iov_base; - WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); - op = (void *)(replyhead + 1); - rc = le32_to_cpu(replyhead->result); - bytes = le64_to_cpu(op->extent.length); - if (rc >= 0) { /* * Assume we wrote the pages we originally sent. The @@ -740,8 +725,6 @@ retry: struct page *page; int want; u64 offset, len; - struct ceph_osd_request_head *reqhead; - struct ceph_osd_op *op; long writeback_stat; next = 0; @@ -905,10 +888,8 @@ get_more_pages: /* revise final length, page count */ req->r_num_pages = locked_pages; - reqhead = req->r_request->front.iov_base; - op = (void *)(reqhead + 1); - op->extent.length = cpu_to_le64(len); - op->payload_len = cpu_to_le32(len); + req->r_request_ops[0].extent.length = cpu_to_le64(len); + req->r_request_ops[0].payload_len = cpu_to_le32(len); req->r_request->hdr.data_len = cpu_to_le32(len); rc = ceph_osdc_start_request(&fsc->client->osdc, req, true); -- cgit v1.1