summaryrefslogtreecommitdiffstats
path: root/fs/ceph/export.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-04-07 11:09:13 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-07 11:09:13 -0700
commit240cd6a817bd855e3f1e615ed9ae16407f8cfce6 (patch)
treeda7d6267d549cd0fbdff3f30032720b416d1ff3d /fs/ceph/export.c
parent3021112598d2b722eee54d8a662fea2089abbdbc (diff)
parenta30be7cb2ccb995ad5e67fd4b548f11fe37fc8b1 (diff)
downloadop-kernel-dev-240cd6a817bd855e3f1e615ed9ae16407f8cfce6.zip
op-kernel-dev-240cd6a817bd855e3f1e615ed9ae16407f8cfce6.tar.gz
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph updates from Sage Weil: "The biggest chunk is a series of patches from Ilya that add support for new Ceph osd and crush map features, including some new tunables, primary affinity, and the new encoding that is needed for erasure coding support. This brings things into parity with the server side and the looming firefly release. There is also support for allocation hints in RBD that help limit fragmentation on the server side. There is also a series of patches from Zheng fixing NFS reexport, directory fragmentation support, flock vs fnctl behavior, and some issues with clustered MDS. Finally, there are some miscellaneous fixes from Yunchuan Wen for fscache, Fabian Frederick for ACLs, and from me for fsync(dirfd) behavior" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (79 commits) ceph: skip invalid dentry during dcache readdir libceph: dump pool {read,write}_tier to debugfs libceph: output primary affinity values on osdmap updates ceph: flush cap release queue when trimming session caps ceph: don't grabs open file reference for aborted request ceph: drop extra open file reference in ceph_atomic_open() ceph: preallocate buffer for readdir reply libceph: enable PRIMARY_AFFINITY feature bit libceph: redo ceph_calc_pg_primary() in terms of ceph_calc_pg_acting() libceph: add support for osd primary affinity libceph: add support for primary_temp mappings libceph: return primary from ceph_calc_pg_acting() libceph: switch ceph_calc_pg_acting() to new helpers libceph: introduce apply_temps() helper libceph: introduce pg_to_raw_osds() and raw_to_up_osds() helpers libceph: ceph_can_shift_osds(pool) and pool type defines libceph: ceph_osd_{exists,is_up,is_down}(osd) definitions libceph: enable OSDMAP_ENC feature bit libceph: primary_affinity decode bits libceph: primary_affinity infrastructure ...
Diffstat (limited to 'fs/ceph/export.c')
-rw-r--r--fs/ceph/export.c267
1 files changed, 120 insertions, 147 deletions
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 16796be..00d6af6 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -8,23 +8,6 @@
#include "mds_client.h"
/*
- * NFS export support
- *
- * NFS re-export of a ceph mount is, at present, only semireliable.
- * The basic issue is that the Ceph architectures doesn't lend itself
- * well to generating filehandles that will remain valid forever.
- *
- * So, we do our best. If you're lucky, your inode will be in the
- * client's cache. If it's not, and you have a connectable fh, then
- * the MDS server may be able to find it for you. Otherwise, you get
- * ESTALE.
- *
- * There are ways to this more reliable, but in the non-connectable fh
- * case, we won't every work perfectly, and in the connectable case,
- * some changes are needed on the MDS side to work better.
- */
-
-/*
* Basic fh
*/
struct ceph_nfs_fh {
@@ -32,22 +15,12 @@ struct ceph_nfs_fh {
} __attribute__ ((packed));
/*
- * Larger 'connectable' fh that includes parent ino and name hash.
- * Use this whenever possible, as it works more reliably.
+ * Larger fh that includes parent ino.
*/
struct ceph_nfs_confh {
u64 ino, parent_ino;
- u32 parent_name_hash;
} __attribute__ ((packed));
-/*
- * The presence of @parent_inode here tells us whether NFS wants a
- * connectable file handle. However, we want to make a connectionable
- * file handle unconditionally so that the MDS gets as much of a hint
- * as possible. That means we only use @parent_dentry to indicate
- * whether nfsd wants a connectable fh, and whether we should indicate
- * failure from a too-small @max_len.
- */
static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
struct inode *parent_inode)
{
@@ -56,54 +29,36 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
struct ceph_nfs_confh *cfh = (void *)rawfh;
int connected_handle_length = sizeof(*cfh)/4;
int handle_length = sizeof(*fh)/4;
- struct dentry *dentry;
- struct dentry *parent;
/* don't re-export snaps */
if (ceph_snap(inode) != CEPH_NOSNAP)
return -EINVAL;
- dentry = d_find_alias(inode);
+ if (parent_inode && (*max_len < connected_handle_length)) {
+ *max_len = connected_handle_length;
+ return FILEID_INVALID;
+ } else if (*max_len < handle_length) {
+ *max_len = handle_length;
+ return FILEID_INVALID;
+ }
- /* if we found an alias, generate a connectable fh */
- if (*max_len >= connected_handle_length && dentry) {
- dout("encode_fh %p connectable\n", dentry);
- spin_lock(&dentry->d_lock);
- parent = dentry->d_parent;
+ if (parent_inode) {
+ dout("encode_fh %llx with parent %llx\n",
+ ceph_ino(inode), ceph_ino(parent_inode));
cfh->ino = ceph_ino(inode);
- cfh->parent_ino = ceph_ino(parent->d_inode);
- cfh->parent_name_hash = ceph_dentry_hash(parent->d_inode,
- dentry);
+ cfh->parent_ino = ceph_ino(parent_inode);
*max_len = connected_handle_length;
- type = 2;
- spin_unlock(&dentry->d_lock);
- } else if (*max_len >= handle_length) {
- if (parent_inode) {
- /* nfsd wants connectable */
- *max_len = connected_handle_length;
- type = FILEID_INVALID;
- } else {
- dout("encode_fh %p\n", dentry);
- fh->ino = ceph_ino(inode);
- *max_len = handle_length;
- type = 1;
- }
+ type = FILEID_INO32_GEN_PARENT;
} else {
+ dout("encode_fh %llx\n", ceph_ino(inode));
+ fh->ino = ceph_ino(inode);
*max_len = handle_length;
- type = FILEID_INVALID;
+ type = FILEID_INO32_GEN;
}
- if (dentry)
- dput(dentry);
return type;
}
-/*
- * convert regular fh to dentry
- *
- * FIXME: we should try harder by querying the mds for the ino.
- */
-static struct dentry *__fh_to_dentry(struct super_block *sb,
- struct ceph_nfs_fh *fh, int fh_len)
+static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
{
struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
struct inode *inode;
@@ -111,11 +66,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
struct ceph_vino vino;
int err;
- if (fh_len < sizeof(*fh) / 4)
- return ERR_PTR(-ESTALE);
-
- dout("__fh_to_dentry %llx\n", fh->ino);
- vino.ino = fh->ino;
+ vino.ino = ino;
vino.snap = CEPH_NOSNAP;
inode = ceph_find_inode(sb, vino);
if (!inode) {
@@ -139,139 +90,161 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
dentry = d_obtain_alias(inode);
if (IS_ERR(dentry)) {
- pr_err("fh_to_dentry %llx -- inode %p but ENOMEM\n",
- fh->ino, inode);
iput(inode);
return dentry;
}
err = ceph_init_dentry(dentry);
if (err < 0) {
- iput(inode);
+ dput(dentry);
return ERR_PTR(err);
}
- dout("__fh_to_dentry %llx %p dentry %p\n", fh->ino, inode, dentry);
+ dout("__fh_to_dentry %llx %p dentry %p\n", ino, inode, dentry);
return dentry;
}
/*
- * convert connectable fh to dentry
+ * convert regular fh to dentry
*/
-static struct dentry *__cfh_to_dentry(struct super_block *sb,
- struct ceph_nfs_confh *cfh, int fh_len)
+static struct dentry *ceph_fh_to_dentry(struct super_block *sb,
+ struct fid *fid,
+ int fh_len, int fh_type)
+{
+ struct ceph_nfs_fh *fh = (void *)fid->raw;
+
+ if (fh_type != FILEID_INO32_GEN &&
+ fh_type != FILEID_INO32_GEN_PARENT)
+ return NULL;
+ if (fh_len < sizeof(*fh) / 4)
+ return NULL;
+
+ dout("fh_to_dentry %llx\n", fh->ino);
+ return __fh_to_dentry(sb, fh->ino);
+}
+
+static struct dentry *__get_parent(struct super_block *sb,
+ struct dentry *child, u64 ino)
{
struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
+ struct ceph_mds_request *req;
struct inode *inode;
struct dentry *dentry;
- struct ceph_vino vino;
int err;
- if (fh_len < sizeof(*cfh) / 4)
- return ERR_PTR(-ESTALE);
-
- dout("__cfh_to_dentry %llx (%llx/%x)\n",
- cfh->ino, cfh->parent_ino, cfh->parent_name_hash);
-
- vino.ino = cfh->ino;
- vino.snap = CEPH_NOSNAP;
- inode = ceph_find_inode(sb, vino);
- if (!inode) {
- struct ceph_mds_request *req;
-
- req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPHASH,
- USE_ANY_MDS);
- if (IS_ERR(req))
- return ERR_CAST(req);
+ req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPPARENT,
+ USE_ANY_MDS);
+ if (IS_ERR(req))
+ return ERR_CAST(req);
- req->r_ino1 = vino;
- req->r_ino2.ino = cfh->parent_ino;
- req->r_ino2.snap = CEPH_NOSNAP;
- req->r_path2 = kmalloc(16, GFP_NOFS);
- snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash);
- req->r_num_caps = 1;
- err = ceph_mdsc_do_request(mdsc, NULL, req);
- inode = req->r_target_inode;
- if (inode)
- ihold(inode);
- ceph_mdsc_put_request(req);
- if (!inode)
- return ERR_PTR(err ? err : -ESTALE);
+ if (child) {
+ req->r_inode = child->d_inode;
+ ihold(child->d_inode);
+ } else {
+ req->r_ino1 = (struct ceph_vino) {
+ .ino = ino,
+ .snap = CEPH_NOSNAP,
+ };
}
+ req->r_num_caps = 1;
+ err = ceph_mdsc_do_request(mdsc, NULL, req);
+ inode = req->r_target_inode;
+ if (inode)
+ ihold(inode);
+ ceph_mdsc_put_request(req);
+ if (!inode)
+ return ERR_PTR(-ENOENT);
dentry = d_obtain_alias(inode);
if (IS_ERR(dentry)) {
- pr_err("cfh_to_dentry %llx -- inode %p but ENOMEM\n",
- cfh->ino, inode);
iput(inode);
return dentry;
}
err = ceph_init_dentry(dentry);
if (err < 0) {
- iput(inode);
+ dput(dentry);
return ERR_PTR(err);
}
- dout("__cfh_to_dentry %llx %p dentry %p\n", cfh->ino, inode, dentry);
+ dout("__get_parent ino %llx parent %p ino %llx.%llx\n",
+ child ? ceph_ino(child->d_inode) : ino,
+ dentry, ceph_vinop(inode));
return dentry;
}
-static struct dentry *ceph_fh_to_dentry(struct super_block *sb, struct fid *fid,
- int fh_len, int fh_type)
+struct dentry *ceph_get_parent(struct dentry *child)
{
- if (fh_type == 1)
- return __fh_to_dentry(sb, (struct ceph_nfs_fh *)fid->raw,
- fh_len);
- else
- return __cfh_to_dentry(sb, (struct ceph_nfs_confh *)fid->raw,
- fh_len);
+ /* don't re-export snaps */
+ if (ceph_snap(child->d_inode) != CEPH_NOSNAP)
+ return ERR_PTR(-EINVAL);
+
+ dout("get_parent %p ino %llx.%llx\n",
+ child, ceph_vinop(child->d_inode));
+ return __get_parent(child->d_sb, child, 0);
}
/*
- * get parent, if possible.
- *
- * FIXME: we could do better by querying the mds to discover the
- * parent.
+ * convert regular fh to parent
*/
static struct dentry *ceph_fh_to_parent(struct super_block *sb,
- struct fid *fid,
+ struct fid *fid,
int fh_len, int fh_type)
{
struct ceph_nfs_confh *cfh = (void *)fid->raw;
- struct ceph_vino vino;
- struct inode *inode;
struct dentry *dentry;
- int err;
- if (fh_type == 1)
- return ERR_PTR(-ESTALE);
+ if (fh_type != FILEID_INO32_GEN_PARENT)
+ return NULL;
if (fh_len < sizeof(*cfh) / 4)
- return ERR_PTR(-ESTALE);
+ return NULL;
- pr_debug("fh_to_parent %llx/%d\n", cfh->parent_ino,
- cfh->parent_name_hash);
+ dout("fh_to_parent %llx\n", cfh->parent_ino);
+ dentry = __get_parent(sb, NULL, cfh->ino);
+ if (IS_ERR(dentry) && PTR_ERR(dentry) == -ENOENT)
+ dentry = __fh_to_dentry(sb, cfh->parent_ino);
+ return dentry;
+}
- vino.ino = cfh->ino;
- vino.snap = CEPH_NOSNAP;
- inode = ceph_find_inode(sb, vino);
- if (!inode)
- return ERR_PTR(-ESTALE);
+static int ceph_get_name(struct dentry *parent, char *name,
+ struct dentry *child)
+{
+ struct ceph_mds_client *mdsc;
+ struct ceph_mds_request *req;
+ int err;
- dentry = d_obtain_alias(inode);
- if (IS_ERR(dentry)) {
- pr_err("fh_to_parent %llx -- inode %p but ENOMEM\n",
- cfh->ino, inode);
- iput(inode);
- return dentry;
- }
- err = ceph_init_dentry(dentry);
- if (err < 0) {
- iput(inode);
- return ERR_PTR(err);
+ mdsc = ceph_inode_to_client(child->d_inode)->mdsc;
+ req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME,
+ USE_ANY_MDS);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ mutex_lock(&parent->d_inode->i_mutex);
+
+ req->r_inode = child->d_inode;
+ ihold(child->d_inode);
+ req->r_ino2 = ceph_vino(parent->d_inode);
+ req->r_locked_dir = parent->d_inode;
+ req->r_num_caps = 2;
+ err = ceph_mdsc_do_request(mdsc, NULL, req);
+
+ mutex_unlock(&parent->d_inode->i_mutex);
+
+ if (!err) {
+ struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
+ memcpy(name, rinfo->dname, rinfo->dname_len);
+ name[rinfo->dname_len] = 0;
+ dout("get_name %p ino %llx.%llx name %s\n",
+ child, ceph_vinop(child->d_inode), name);
+ } else {
+ dout("get_name %p ino %llx.%llx err %d\n",
+ child, ceph_vinop(child->d_inode), err);
}
- dout("fh_to_parent %llx %p dentry %p\n", cfh->ino, inode, dentry);
- return dentry;
+
+ ceph_mdsc_put_request(req);
+ return err;
}
const struct export_operations ceph_export_ops = {
.encode_fh = ceph_encode_fh,
.fh_to_dentry = ceph_fh_to_dentry,
.fh_to_parent = ceph_fh_to_parent,
+ .get_parent = ceph_get_parent,
+ .get_name = ceph_get_name,
};
OpenPOWER on IntegriCloud