summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-08-20 18:26:55 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-20 18:26:55 -0700
commit0a78ac4b9bb15b2a00dc5a5aba22b0e48834e1ad (patch)
treef010b008554ceb5f649c735684bdab2f84f894c2
parentbfebeb16722d93caf7870b63aa7d094b6843479a (diff)
parent0fcf6c02b205f80f24eb548b236543ec151cb01c (diff)
downloadop-kernel-dev-0a78ac4b9bb15b2a00dc5a5aba22b0e48834e1ad.zip
op-kernel-dev-0a78ac4b9bb15b2a00dc5a5aba22b0e48834e1ad.tar.gz
Merge tag 'ceph-for-4.19-rc1' of git://github.com/ceph/ceph-client
Pull ceph updates from Ilya Dryomov: "The main things are support for cephx v2 authentication protocol and basic support for rbd images within namespaces (myself). Also included are y2038 conversion patches from Arnd, a pile of miscellaneous fixes from Chengguang and Zheng's feature bit infrastructure for the filesystem" * tag 'ceph-for-4.19-rc1' of git://github.com/ceph/ceph-client: (40 commits) ceph: don't drop message if it contains more data than expected ceph: support cephfs' own feature bits crush: fix using plain integer as NULL warning libceph: remove unnecessary non NULL check for request_key ceph: refactor error handling code in ceph_reserve_caps() ceph: refactor ceph_unreserve_caps() ceph: change to void return type for __do_request() ceph: compare fsc->max_file_size and inode->i_size for max file size limit ceph: add additional size check in ceph_setattr() ceph: add additional offset check in ceph_write_iter() ceph: add additional range check in ceph_fallocate() ceph: add new field max_file_size in ceph_fs_client libceph: weaken sizeof check in ceph_x_verify_authorizer_reply() libceph: check authorizer reply/challenge length before reading libceph: implement CEPHX_V2 calculation mode libceph: add authorizer challenge libceph: factor out encrypt_authorizer() libceph: factor out __ceph_x_decrypt() libceph: factor out __prepare_write_connect() libceph: store ceph_auth_handshake pointer in ceph_connection ...
-rw-r--r--drivers/block/rbd.c125
-rw-r--r--fs/ceph/acl.c30
-rw-r--r--fs/ceph/addr.c74
-rw-r--r--fs/ceph/cache.c11
-rw-r--r--fs/ceph/caps.c138
-rw-r--r--fs/ceph/dir.c20
-rw-r--r--fs/ceph/file.c34
-rw-r--r--fs/ceph/inode.c83
-rw-r--r--fs/ceph/mds_client.c98
-rw-r--r--fs/ceph/mds_client.h14
-rw-r--r--fs/ceph/quota.c2
-rw-r--r--fs/ceph/snap.c6
-rw-r--r--fs/ceph/super.c6
-rw-r--r--fs/ceph/super.h12
-rw-r--r--fs/ceph/xattr.c4
-rw-r--r--include/linux/ceph/auth.h8
-rw-r--r--include/linux/ceph/ceph_features.h7
-rw-r--r--include/linux/ceph/decode.h18
-rw-r--r--include/linux/ceph/messenger.h8
-rw-r--r--include/linux/ceph/msgr.h2
-rw-r--r--include/linux/ceph/osd_client.h10
-rw-r--r--include/linux/ceph/pagelist.h2
-rw-r--r--net/ceph/Kconfig1
-rw-r--r--net/ceph/Makefile1
-rw-r--r--net/ceph/auth.c16
-rw-r--r--net/ceph/auth_none.c1
-rw-r--r--net/ceph/auth_none.h1
-rw-r--r--net/ceph/auth_x.c239
-rw-r--r--net/ceph/auth_x.h3
-rw-r--r--net/ceph/auth_x_protocol.h7
-rw-r--r--net/ceph/ceph_common.c13
-rw-r--r--net/ceph/cls_lock_client.c4
-rw-r--r--net/ceph/crush/mapper.c4
-rw-r--r--net/ceph/messenger.c113
-rw-r--r--net/ceph/mon_client.c2
-rw-r--r--net/ceph/osd_client.c27
-rw-r--r--net/ceph/pagevec.c1
37 files changed, 737 insertions, 408 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index d81c653..7915f3b 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -181,6 +181,7 @@ struct rbd_image_header {
struct rbd_spec {
u64 pool_id;
const char *pool_name;
+ const char *pool_ns; /* NULL if default, never "" */
const char *image_id;
const char *image_name;
@@ -735,6 +736,7 @@ enum {
Opt_lock_timeout,
Opt_last_int,
/* int args above */
+ Opt_pool_ns,
Opt_last_string,
/* string args above */
Opt_read_only,
@@ -749,6 +751,7 @@ static match_table_t rbd_opts_tokens = {
{Opt_queue_depth, "queue_depth=%d"},
{Opt_lock_timeout, "lock_timeout=%d"},
/* int args above */
+ {Opt_pool_ns, "_pool_ns=%s"},
/* string args above */
{Opt_read_only, "read_only"},
{Opt_read_only, "ro"}, /* Alternate spelling */
@@ -776,9 +779,14 @@ struct rbd_options {
#define RBD_EXCLUSIVE_DEFAULT false
#define RBD_TRIM_DEFAULT true
+struct parse_rbd_opts_ctx {
+ struct rbd_spec *spec;
+ struct rbd_options *opts;
+};
+
static int parse_rbd_opts_token(char *c, void *private)
{
- struct rbd_options *rbd_opts = private;
+ struct parse_rbd_opts_ctx *pctx = private;
substring_t argstr[MAX_OPT_ARGS];
int token, intval, ret;
@@ -786,7 +794,7 @@ static int parse_rbd_opts_token(char *c, void *private)
if (token < Opt_last_int) {
ret = match_int(&argstr[0], &intval);
if (ret < 0) {
- pr_err("bad mount option arg (not int) at '%s'\n", c);
+ pr_err("bad option arg (not int) at '%s'\n", c);
return ret;
}
dout("got int token %d val %d\n", token, intval);
@@ -802,7 +810,7 @@ static int parse_rbd_opts_token(char *c, void *private)
pr_err("queue_depth out of range\n");
return -EINVAL;
}
- rbd_opts->queue_depth = intval;
+ pctx->opts->queue_depth = intval;
break;
case Opt_lock_timeout:
/* 0 is "wait forever" (i.e. infinite timeout) */
@@ -810,22 +818,28 @@ static int parse_rbd_opts_token(char *c, void *private)
pr_err("lock_timeout out of range\n");
return -EINVAL;
}
- rbd_opts->lock_timeout = msecs_to_jiffies(intval * 1000);
+ pctx->opts->lock_timeout = msecs_to_jiffies(intval * 1000);
+ break;
+ case Opt_pool_ns:
+ kfree(pctx->spec->pool_ns);
+ pctx->spec->pool_ns = match_strdup(argstr);
+ if (!pctx->spec->pool_ns)
+ return -ENOMEM;
break;
case Opt_read_only:
- rbd_opts->read_only = true;
+ pctx->opts->read_only = true;
break;
case Opt_read_write:
- rbd_opts->read_only = false;
+ pctx->opts->read_only = false;
break;
case Opt_lock_on_read:
- rbd_opts->lock_on_read = true;
+ pctx->opts->lock_on_read = true;
break;
case Opt_exclusive:
- rbd_opts->exclusive = true;
+ pctx->opts->exclusive = true;
break;
case Opt_notrim:
- rbd_opts->trim = false;
+ pctx->opts->trim = false;
break;
default:
/* libceph prints "bad option" msg */
@@ -1452,7 +1466,7 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request)
struct ceph_osd_request *osd_req = obj_request->osd_req;
osd_req->r_flags = CEPH_OSD_FLAG_WRITE;
- ktime_get_real_ts(&osd_req->r_mtime);
+ ktime_get_real_ts64(&osd_req->r_mtime);
osd_req->r_data_offset = obj_request->ex.oe_off;
}
@@ -1475,7 +1489,13 @@ rbd_osd_req_create(struct rbd_obj_request *obj_req, unsigned int num_ops)
req->r_callback = rbd_osd_req_callback;
req->r_priv = obj_req;
+ /*
+ * Data objects may be stored in a separate pool, but always in
+ * the same namespace in that pool as the header in its pool.
+ */
+ ceph_oloc_copy(&req->r_base_oloc, &rbd_dev->header_oloc);
req->r_base_oloc.pool = rbd_dev->layout.pool_id;
+
if (ceph_oid_aprintf(&req->r_base_oid, GFP_NOIO, name_format,
rbd_dev->header.object_prefix, obj_req->ex.oe_objno))
goto err_req;
@@ -4119,6 +4139,14 @@ static ssize_t rbd_pool_id_show(struct device *dev,
(unsigned long long) rbd_dev->spec->pool_id);
}
+static ssize_t rbd_pool_ns_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+
+ return sprintf(buf, "%s\n", rbd_dev->spec->pool_ns ?: "");
+}
+
static ssize_t rbd_name_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -4217,6 +4245,7 @@ static DEVICE_ATTR(cluster_fsid, 0444, rbd_cluster_fsid_show, NULL);
static DEVICE_ATTR(config_info, 0400, rbd_config_info_show, NULL);
static DEVICE_ATTR(pool, 0444, rbd_pool_show, NULL);
static DEVICE_ATTR(pool_id, 0444, rbd_pool_id_show, NULL);
+static DEVICE_ATTR(pool_ns, 0444, rbd_pool_ns_show, NULL);
static DEVICE_ATTR(name, 0444, rbd_name_show, NULL);
static DEVICE_ATTR(image_id, 0444, rbd_image_id_show, NULL);
static DEVICE_ATTR(refresh, 0200, NULL, rbd_image_refresh);
@@ -4235,6 +4264,7 @@ static struct attribute *rbd_attrs[] = {
&dev_attr_config_info.attr,
&dev_attr_pool.attr,
&dev_attr_pool_id.attr,
+ &dev_attr_pool_ns.attr,
&dev_attr_name.attr,
&dev_attr_image_id.attr,
&dev_attr_current_snap.attr,
@@ -4295,6 +4325,7 @@ static void rbd_spec_free(struct kref *kref)
struct rbd_spec *spec = container_of(kref, struct rbd_spec, kref);
kfree(spec->pool_name);
+ kfree(spec->pool_ns);
kfree(spec->image_id);
kfree(spec->image_name);
kfree(spec->snap_name);
@@ -4353,6 +4384,12 @@ static struct rbd_device *__rbd_dev_create(struct rbd_client *rbdc,
rbd_dev->header.data_pool_id = CEPH_NOPOOL;
ceph_oid_init(&rbd_dev->header_oid);
rbd_dev->header_oloc.pool = spec->pool_id;
+ if (spec->pool_ns) {
+ WARN_ON(!*spec->pool_ns);
+ rbd_dev->header_oloc.pool_ns =
+ ceph_find_or_create_string(spec->pool_ns,
+ strlen(spec->pool_ns));
+ }
mutex_init(&rbd_dev->watch_mutex);
rbd_dev->watch_state = RBD_WATCH_STATE_UNREGISTERED;
@@ -4633,6 +4670,17 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
parent_spec->pool_id = pool_id;
parent_spec->image_id = image_id;
parent_spec->snap_id = snap_id;
+
+ /* TODO: support cloning across namespaces */
+ if (rbd_dev->spec->pool_ns) {
+ parent_spec->pool_ns = kstrdup(rbd_dev->spec->pool_ns,
+ GFP_KERNEL);
+ if (!parent_spec->pool_ns) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+ }
+
rbd_dev->parent_spec = parent_spec;
parent_spec = NULL; /* rbd_dev now owns this */
} else {
@@ -5146,8 +5194,7 @@ static int rbd_add_parse_args(const char *buf,
const char *mon_addrs;
char *snap_name;
size_t mon_addrs_size;
- struct rbd_spec *spec = NULL;
- struct rbd_options *rbd_opts = NULL;
+ struct parse_rbd_opts_ctx pctx = { 0 };
struct ceph_options *copts;
int ret;
@@ -5171,22 +5218,22 @@ static int rbd_add_parse_args(const char *buf,
goto out_err;
}
- spec = rbd_spec_alloc();
- if (!spec)
+ pctx.spec = rbd_spec_alloc();
+ if (!pctx.spec)
goto out_mem;
- spec->pool_name = dup_token(&buf, NULL);
- if (!spec->pool_name)
+ pctx.spec->pool_name = dup_token(&buf, NULL);
+ if (!pctx.spec->pool_name)
goto out_mem;
- if (!*spec->pool_name) {
+ if (!*pctx.spec->pool_name) {
rbd_warn(NULL, "no pool name provided");
goto out_err;
}
- spec->image_name = dup_token(&buf, NULL);
- if (!spec->image_name)
+ pctx.spec->image_name = dup_token(&buf, NULL);
+ if (!pctx.spec->image_name)
goto out_mem;
- if (!*spec->image_name) {
+ if (!*pctx.spec->image_name) {
rbd_warn(NULL, "no image name provided");
goto out_err;
}
@@ -5207,24 +5254,24 @@ static int rbd_add_parse_args(const char *buf,
if (!snap_name)
goto out_mem;
*(snap_name + len) = '\0';
- spec->snap_name = snap_name;
+ pctx.spec->snap_name = snap_name;
/* Initialize all rbd options to the defaults */
- rbd_opts = kzalloc(sizeof (*rbd_opts), GFP_KERNEL);
- if (!rbd_opts)
+ pctx.opts = kzalloc(sizeof(*pctx.opts), GFP_KERNEL);
+ if (!pctx.opts)
goto out_mem;
- rbd_opts->read_only = RBD_READ_ONLY_DEFAULT;
- rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT;
- rbd_opts->lock_timeout = RBD_LOCK_TIMEOUT_DEFAULT;
- rbd_opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT;
- rbd_opts->exclusive = RBD_EXCLUSIVE_DEFAULT;
- rbd_opts->trim = RBD_TRIM_DEFAULT;
+ pctx.opts->read_only = RBD_READ_ONLY_DEFAULT;
+ pctx.opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT;
+ pctx.opts->lock_timeout = RBD_LOCK_TIMEOUT_DEFAULT;
+ pctx.opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT;
+ pctx.opts->exclusive = RBD_EXCLUSIVE_DEFAULT;
+ pctx.opts->trim = RBD_TRIM_DEFAULT;
copts = ceph_parse_options(options, mon_addrs,
- mon_addrs + mon_addrs_size - 1,
- parse_rbd_opts_token, rbd_opts);
+ mon_addrs + mon_addrs_size - 1,
+ parse_rbd_opts_token, &pctx);
if (IS_ERR(copts)) {
ret = PTR_ERR(copts);
goto out_err;
@@ -5232,15 +5279,15 @@ static int rbd_add_parse_args(const char *buf,
kfree(options);
*ceph_opts = copts;
- *opts = rbd_opts;
- *rbd_spec = spec;
+ *opts = pctx.opts;
+ *rbd_spec = pctx.spec;
return 0;
out_mem:
ret = -ENOMEM;
out_err:
- kfree(rbd_opts);
- rbd_spec_put(spec);
+ kfree(pctx.opts);
+ rbd_spec_put(pctx.spec);
kfree(options);
return ret;
@@ -5586,8 +5633,10 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
ret = rbd_register_watch(rbd_dev);
if (ret) {
if (ret == -ENOENT)
- pr_info("image %s/%s does not exist\n",
+ pr_info("image %s/%s%s%s does not exist\n",
rbd_dev->spec->pool_name,
+ rbd_dev->spec->pool_ns ?: "",
+ rbd_dev->spec->pool_ns ? "/" : "",
rbd_dev->spec->image_name);
goto err_out_format;
}
@@ -5609,8 +5658,10 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
ret = rbd_spec_fill_names(rbd_dev);
if (ret) {
if (ret == -ENOENT)
- pr_info("snap %s/%s@%s does not exist\n",
+ pr_info("snap %s/%s%s%s@%s does not exist\n",
rbd_dev->spec->pool_name,
+ rbd_dev->spec->pool_ns ?: "",
+ rbd_dev->spec->pool_ns ? "/" : "",
rbd_dev->spec->image_name,
rbd_dev->spec->snap_name);
goto err_out_probe;
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 59cb307..027408d 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -45,6 +45,7 @@ static inline void ceph_set_cached_acl(struct inode *inode,
struct posix_acl *ceph_get_acl(struct inode *inode, int type)
{
int size;
+ unsigned int retry_cnt = 0;
const char *name;
char *value = NULL;
struct posix_acl *acl;
@@ -60,6 +61,7 @@ struct posix_acl *ceph_get_acl(struct inode *inode, int type)
BUG();
}
+retry:
size = __ceph_getxattr(inode, name, "", 0);
if (size > 0) {
value = kzalloc(size, GFP_NOFS);
@@ -68,12 +70,22 @@ struct posix_acl *ceph_get_acl(struct inode *inode, int type)
size = __ceph_getxattr(inode, name, value, size);
}
- if (size > 0)
+ if (size == -ERANGE && retry_cnt < 10) {
+ retry_cnt++;
+ kfree(value);
+ value = NULL;
+ goto retry;
+ }
+
+ if (size > 0) {
acl = posix_acl_from_xattr(&init_user_ns, value, size);
- else if (size == -ERANGE || size == -ENODATA || size == 0)
+ } else if (size == -ENODATA || size == 0) {
acl = NULL;
- else
+ } else {
+ pr_err_ratelimited("get acl %llx.%llx failed, err=%d\n",
+ ceph_vinop(inode), size);
acl = ERR_PTR(-EIO);
+ }
kfree(value);
@@ -89,6 +101,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
const char *name = NULL;
char *value = NULL;
struct iattr newattrs;
+ struct timespec64 old_ctime = inode->i_ctime;
umode_t new_mode = inode->i_mode, old_mode = inode->i_mode;
switch (type) {
@@ -133,7 +146,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
if (new_mode != old_mode) {
newattrs.ia_ctime = current_time(inode);
newattrs.ia_mode = new_mode;
- newattrs.ia_valid = ATTR_MODE;
+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
ret = __ceph_setattr(inode, &newattrs);
if (ret)
goto out_free;
@@ -142,8 +155,9 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
ret = __ceph_setxattr(inode, name, value, size, 0);
if (ret) {
if (new_mode != old_mode) {
+ newattrs.ia_ctime = old_ctime;
newattrs.ia_mode = old_mode;
- newattrs.ia_valid = ATTR_MODE;
+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
__ceph_setattr(inode, &newattrs);
}
goto out_free;
@@ -171,10 +185,10 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
return err;
if (acl) {
- int ret = posix_acl_equiv_mode(acl, mode);
- if (ret < 0)
+ err = posix_acl_equiv_mode(acl, mode);
+ if (err < 0)
goto out_err;
- if (ret == 0) {
+ if (err == 0) {
posix_acl_release(acl);
acl = NULL;
}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 292b3d7..9c332a6 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -574,7 +574,6 @@ static u64 get_writepages_data_length(struct inode *inode,
*/
static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
{
- struct timespec ts;
struct inode *inode;
struct ceph_inode_info *ci;
struct ceph_fs_client *fsc;
@@ -625,12 +624,11 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
set_page_writeback(page);
- ts = timespec64_to_timespec(inode->i_mtime);
err = ceph_osdc_writepages(&fsc->client->osdc, ceph_vino(inode),
&ci->i_layout, snapc, page_off, len,
ceph_wbc.truncate_seq,
ceph_wbc.truncate_size,
- &ts, &page, 1);
+ &inode->i_mtime, &page, 1);
if (err < 0) {
struct writeback_control tmp_wbc;
if (!wbc)
@@ -1134,7 +1132,7 @@ new_request:
pages = NULL;
}
- req->r_mtime = timespec64_to_timespec(inode->i_mtime);
+ req->r_mtime = inode->i_mtime;
rc = ceph_osdc_start_request(&fsc->client->osdc, req, true);
BUG_ON(rc);
req = NULL;
@@ -1431,7 +1429,7 @@ static void ceph_restore_sigs(sigset_t *oldset)
/*
* vm ops
*/
-static int ceph_filemap_fault(struct vm_fault *vmf)
+static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct inode *inode = file_inode(vma->vm_file);
@@ -1439,8 +1437,9 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
struct ceph_file_info *fi = vma->vm_file->private_data;
struct page *pinned_page = NULL;
loff_t off = vmf->pgoff << PAGE_SHIFT;
- int want, got, ret;
+ int want, got, err;
sigset_t oldset;
+ vm_fault_t ret = VM_FAULT_SIGBUS;
ceph_block_sigs(&oldset);
@@ -1452,8 +1451,8 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
want = CEPH_CAP_FILE_CACHE;
got = 0;
- ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page);
- if (ret < 0)
+ err = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page);
+ if (err < 0)
goto out_restore;
dout("filemap_fault %p %llu~%zd got cap refs on %s\n",
@@ -1465,16 +1464,17 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
ceph_add_rw_context(fi, &rw_ctx);
ret = filemap_fault(vmf);
ceph_del_rw_context(fi, &rw_ctx);
+ dout("filemap_fault %p %llu~%zd drop cap refs %s ret %x\n",
+ inode, off, (size_t)PAGE_SIZE,
+ ceph_cap_string(got), ret);
} else
- ret = -EAGAIN;
+ err = -EAGAIN;
- dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n",
- inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got), ret);
if (pinned_page)
put_page(pinned_page);
ceph_put_cap_refs(ci, got);
- if (ret != -EAGAIN)
+ if (err != -EAGAIN)
goto out_restore;
/* read inline data */
@@ -1482,7 +1482,6 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
/* does not support inline data > PAGE_SIZE */
ret = VM_FAULT_SIGBUS;
} else {
- int ret1;
struct address_space *mapping = inode->i_mapping;
struct page *page = find_or_create_page(mapping, 0,
mapping_gfp_constraint(mapping,
@@ -1491,32 +1490,32 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
ret = VM_FAULT_OOM;
goto out_inline;
}
- ret1 = __ceph_do_getattr(inode, page,
+ err = __ceph_do_getattr(inode, page,
CEPH_STAT_CAP_INLINE_DATA, true);
- if (ret1 < 0 || off >= i_size_read(inode)) {
+ if (err < 0 || off >= i_size_read(inode)) {
unlock_page(page);
put_page(page);
- if (ret1 < 0)
- ret = ret1;
+ if (err == -ENOMEM)
+ ret = VM_FAULT_OOM;
else
ret = VM_FAULT_SIGBUS;
goto out_inline;
}
- if (ret1 < PAGE_SIZE)
- zero_user_segment(page, ret1, PAGE_SIZE);
+ if (err < PAGE_SIZE)
+ zero_user_segment(page, err, PAGE_SIZE);
else
flush_dcache_page(page);
SetPageUptodate(page);
vmf->page = page;
ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED;
out_inline:
- dout("filemap_fault %p %llu~%zd read inline data ret %d\n",
+ dout("filemap_fault %p %llu~%zd read inline data ret %x\n",
inode, off, (size_t)PAGE_SIZE, ret);
}
out_restore:
ceph_restore_sigs(&oldset);
- if (ret < 0)
- ret = (ret == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS;
+ if (err < 0)
+ ret = vmf_error(err);
return ret;
}
@@ -1524,7 +1523,7 @@ out_restore:
/*
* Reuse write_begin here for simplicity.
*/
-static int ceph_page_mkwrite(struct vm_fault *vmf)
+static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct inode *inode = file_inode(vma->vm_file);
@@ -1535,8 +1534,9 @@ static int ceph_page_mkwrite(struct vm_fault *vmf)
loff_t off = page_offset(page);
loff_t size = i_size_read(inode);
size_t len;
- int want, got, ret;
+ int want, got, err;
sigset_t oldset;
+ vm_fault_t ret = VM_FAULT_SIGBUS;
prealloc_cf = ceph_alloc_cap_flush();
if (!prealloc_cf)
@@ -1550,10 +1550,10 @@ static int ceph_page_mkwrite(struct vm_fault *vmf)
lock_page(page);
locked_page = page;
}
- ret = ceph_uninline_data(vma->vm_file, locked_page);
+ err = ceph_uninline_data(vma->vm_file, locked_page);
if (locked_page)
unlock_page(locked_page);
- if (ret < 0)
+ if (err < 0)
goto out_free;
}
@@ -1570,9 +1570,9 @@ static int ceph_page_mkwrite(struct vm_fault *vmf)
want = CEPH_CAP_FILE_BUFFER;
got = 0;
- ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len,
+ err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len,
&got, NULL);
- if (ret < 0)
+ if (err < 0)
goto out_free;
dout("page_mkwrite %p %llu~%zd got cap refs on %s\n",
@@ -1590,13 +1590,13 @@ static int ceph_page_mkwrite(struct vm_fault *vmf)
break;
}
- ret = ceph_update_writeable_page(vma->vm_file, off, len, page);
- if (ret >= 0) {
+ err = ceph_update_writeable_page(vma->vm_file, off, len, page);
+ if (err >= 0) {
/* success. we'll keep the page locked. */
set_page_dirty(page);
ret = VM_FAULT_LOCKED;
}
- } while (ret == -EAGAIN);
+ } while (err == -EAGAIN);
if (ret == VM_FAULT_LOCKED ||
ci->i_inline_version != CEPH_INLINE_NONE) {
@@ -1610,14 +1610,14 @@ static int ceph_page_mkwrite(struct vm_fault *vmf)
__mark_inode_dirty(inode, dirty);
}
- dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %d\n",
+ dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %x\n",
inode, off, len, ceph_cap_string(got), ret);
ceph_put_cap_refs(ci, got);
out_free:
ceph_restore_sigs(&oldset);
ceph_free_cap_flush(prealloc_cf);
- if (ret < 0)
- ret = (ret == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS;
+ if (err < 0)
+ ret = vmf_error(err);
return ret;
}
@@ -1734,7 +1734,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
goto out;
}
- req->r_mtime = timespec64_to_timespec(inode->i_mtime);
+ req->r_mtime = inode->i_mtime;
err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
if (!err)
err = ceph_osdc_wait_request(&fsc->client->osdc, req);
@@ -1776,7 +1776,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
goto out_put;
}
- req->r_mtime = timespec64_to_timespec(inode->i_mtime);
+ req->r_mtime = inode->i_mtime;
err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
if (!err)
err = ceph_osdc_wait_request(&fsc->client->osdc, req);
@@ -1937,7 +1937,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
0, false, true);
err = ceph_osdc_start_request(&fsc->client->osdc, rd_req, false);
- wr_req->r_mtime = timespec64_to_timespec(ci->vfs_inode.i_mtime);
+ wr_req->r_mtime = ci->vfs_inode.i_mtime;
err2 = ceph_osdc_start_request(&fsc->client->osdc, wr_req, false);
if (!err)
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 362900e..1bf3502 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -25,8 +25,9 @@
#include "cache.h"
struct ceph_aux_inode {
- u64 version;
- struct timespec mtime;
+ u64 version;
+ u64 mtime_sec;
+ u64 mtime_nsec;
};
struct fscache_netfs ceph_cache_netfs = {
@@ -130,7 +131,8 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux(
memset(&aux, 0, sizeof(aux));
aux.version = ci->i_version;
- aux.mtime = timespec64_to_timespec(inode->i_mtime);
+ aux.mtime_sec = inode->i_mtime.tv_sec;
+ aux.mtime_nsec = inode->i_mtime.tv_nsec;
if (memcmp(data, &aux, sizeof(aux)) != 0)
return FSCACHE_CHECKAUX_OBSOLETE;
@@ -163,7 +165,8 @@ void ceph_fscache_register_inode_cookie(struct inode *inode)
if (!ci->fscache) {
memset(&aux, 0, sizeof(aux));
aux.version = ci->i_version;
- aux.mtime = timespec64_to_timespec(inode->i_mtime);
+ aux.mtime_sec = inode->i_mtime.tv_sec;
+ aux.mtime_nsec = inode->i_mtime.tv_nsec;
ci->fscache = fscache_acquire_cookie(fsc->fscache,
&ceph_fscache_inode_object_def,
&ci->i_vino, sizeof(ci->i_vino),
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 990258c..dd7dfdd 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -156,6 +156,37 @@ void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta)
spin_unlock(&mdsc->caps_list_lock);
}
+static void __ceph_unreserve_caps(struct ceph_mds_client *mdsc, int nr_caps)
+{
+ struct ceph_cap *cap;
+ int i;
+
+ if (nr_caps) {
+ BUG_ON(mdsc->caps_reserve_count < nr_caps);
+ mdsc->caps_reserve_count -= nr_caps;
+ if (mdsc->caps_avail_count >=
+ mdsc->caps_reserve_count + mdsc->caps_min_count) {
+ mdsc->caps_total_count -= nr_caps;
+ for (i = 0; i < nr_caps; i++) {
+ cap = list_first_entry(&mdsc->caps_list,
+ struct ceph_cap, caps_item);
+ list_del(&cap->caps_item);
+ kmem_cache_free(ceph_cap_cachep, cap);
+ }
+ } else {
+ mdsc->caps_avail_count += nr_caps;
+ }
+
+ dout("%s: caps %d = %d used + %d resv + %d avail\n",
+ __func__,
+ mdsc->caps_total_count, mdsc->caps_use_count,
+ mdsc->caps_reserve_count, mdsc->caps_avail_count);
+ BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
+ mdsc->caps_reserve_count +
+ mdsc->caps_avail_count);
+ }
+}
+
/*
* Called under mdsc->mutex.
*/
@@ -167,6 +198,7 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc,
int have;
int alloc = 0;
int max_caps;
+ int err = 0;
bool trimmed = false;
struct ceph_mds_session *s;
LIST_HEAD(newcaps);
@@ -233,9 +265,14 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc,
pr_warn("reserve caps ctx=%p ENOMEM need=%d got=%d\n",
ctx, need, have + alloc);
- goto out_nomem;
+ err = -ENOMEM;
+ break;
+ }
+
+ if (!err) {
+ BUG_ON(have + alloc != need);
+ ctx->count = need;
}
- BUG_ON(have + alloc != need);
spin_lock(&mdsc->caps_list_lock);
mdsc->caps_total_count += alloc;
@@ -245,77 +282,26 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc,
BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
mdsc->caps_reserve_count +
mdsc->caps_avail_count);
+
+ if (err)
+ __ceph_unreserve_caps(mdsc, have + alloc);
+
spin_unlock(&mdsc->caps_list_lock);
- ctx->count = need;
dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n",
ctx, mdsc->caps_total_count, mdsc->caps_use_count,
mdsc->caps_reserve_count, mdsc->caps_avail_count);
- return 0;
-
-out_nomem:
-
- spin_lock(&mdsc->caps_list_lock);
- mdsc->caps_avail_count += have;
- mdsc->caps_reserve_count -= have;
-
- while (!list_empty(&newcaps)) {
- cap = list_first_entry(&newcaps,
- struct ceph_cap, caps_item);
- list_del(&cap->caps_item);
-
- /* Keep some preallocated caps around (ceph_min_count), to
- * avoid lots of free/alloc churn. */
- if (mdsc->caps_avail_count >=
- mdsc->caps_reserve_count + mdsc->caps_min_count) {
- kmem_cache_free(ceph_cap_cachep, cap);
- } else {
- mdsc->caps_avail_count++;
- mdsc->caps_total_count++;
- list_add(&cap->caps_item, &mdsc->caps_list);
- }
- }
-
- BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
- mdsc->caps_reserve_count +
- mdsc->caps_avail_count);
- spin_unlock(&mdsc->caps_list_lock);
- return -ENOMEM;
+ return err;
}
-int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
+void ceph_unreserve_caps(struct ceph_mds_client *mdsc,
struct ceph_cap_reservation *ctx)
{
- int i;
- struct ceph_cap *cap;
-
dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count);
- if (ctx->count) {
- spin_lock(&mdsc->caps_list_lock);
- BUG_ON(mdsc->caps_reserve_count < ctx->count);
- mdsc->caps_reserve_count -= ctx->count;
- if (mdsc->caps_avail_count >=
- mdsc->caps_reserve_count + mdsc->caps_min_count) {
- mdsc->caps_total_count -= ctx->count;
- for (i = 0; i < ctx->count; i++) {
- cap = list_first_entry(&mdsc->caps_list,
- struct ceph_cap, caps_item);
- list_del(&cap->caps_item);
- kmem_cache_free(ceph_cap_cachep, cap);
- }
- } else {
- mdsc->caps_avail_count += ctx->count;
- }
- ctx->count = 0;
- dout("unreserve caps %d = %d used + %d resv + %d avail\n",
- mdsc->caps_total_count, mdsc->caps_use_count,
- mdsc->caps_reserve_count, mdsc->caps_avail_count);
- BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
- mdsc->caps_reserve_count +
- mdsc->caps_avail_count);
- spin_unlock(&mdsc->caps_list_lock);
- }
- return 0;
+ spin_lock(&mdsc->caps_list_lock);
+ __ceph_unreserve_caps(mdsc, ctx->count);
+ ctx->count = 0;
+ spin_unlock(&mdsc->caps_list_lock);
}
struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
@@ -1125,7 +1111,7 @@ struct cap_msg_args {
u64 flush_tid, oldest_flush_tid, size, max_size;
u64 xattr_version;
struct ceph_buffer *xattr_buf;
- struct timespec atime, mtime, ctime;
+ struct timespec64 atime, mtime, ctime;
int op, caps, wanted, dirty;
u32 seq, issue_seq, mseq, time_warp_seq;
u32 flags;
@@ -1146,7 +1132,7 @@ static int send_cap_msg(struct cap_msg_args *arg)
struct ceph_msg *msg;
void *p;
size_t extra_len;
- struct timespec zerotime = {0};
+ struct timespec64 zerotime = {0};
struct ceph_osd_client *osdc = &arg->session->s_mdsc->fsc->client->osdc;
dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s"
@@ -1186,9 +1172,9 @@ static int send_cap_msg(struct cap_msg_args *arg)
fc->size = cpu_to_le64(arg->size);
fc->max_size = cpu_to_le64(arg->max_size);
- ceph_encode_timespec(&fc->mtime, &arg->mtime);
- ceph_encode_timespec(&fc->atime, &arg->atime);
- ceph_encode_timespec(&fc->ctime, &arg->ctime);
+ ceph_encode_timespec64(&fc->mtime, &arg->mtime);
+ ceph_encode_timespec64(&fc->atime, &arg->atime);
+ ceph_encode_timespec64(&fc->ctime, &arg->ctime);
fc->time_warp_seq = cpu_to_le32(arg->time_warp_seq);
fc->uid = cpu_to_le32(from_kuid(&init_user_ns, arg->uid));
@@ -1237,7 +1223,7 @@ static int send_cap_msg(struct cap_msg_args *arg)
* We just zero these out for now, as the MDS ignores them unless
* the requisite feature flags are set (which we don't do yet).
*/
- ceph_encode_timespec(p, &zerotime);
+ ceph_encode_timespec64(p, &zerotime);
p += sizeof(struct ceph_timespec);
ceph_encode_64(&p, 0);
@@ -1360,9 +1346,9 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
arg.xattr_buf = NULL;
}
- arg.mtime = timespec64_to_timespec(inode->i_mtime);
- arg.atime = timespec64_to_timespec(inode->i_atime);
- arg.ctime = timespec64_to_timespec(inode->i_ctime);
+ arg.mtime = inode->i_mtime;
+ arg.atime = inode->i_atime;
+ arg.ctime = inode->i_ctime;
arg.op = op;
arg.caps = cap->implemented;
@@ -3148,11 +3134,11 @@ static void handle_cap_grant(struct inode *inode,
}
if (newcaps & CEPH_CAP_ANY_RD) {
- struct timespec mtime, atime, ctime;
+ struct timespec64 mtime, atime, ctime;
/* ctime/mtime/atime? */
- ceph_decode_timespec(&mtime, &grant->mtime);
- ceph_decode_timespec(&atime, &grant->atime);
- ceph_decode_timespec(&ctime, &grant->ctime);
+ ceph_decode_timespec64(&mtime, &grant->mtime);
+ ceph_decode_timespec64(&atime, &grant->atime);
+ ceph_decode_timespec64(&ctime, &grant->ctime);
ceph_fill_file_time(inode, extra_info->issued,
le32_to_cpu(grant->time_warp_seq),
&ctime, &mtime, &atime);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 036ac0f..82928ce 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -827,12 +827,14 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
if (ceph_snap(dir) != CEPH_NOSNAP)
return -EROFS;
- if (ceph_quota_is_max_files_exceeded(dir))
- return -EDQUOT;
+ if (ceph_quota_is_max_files_exceeded(dir)) {
+ err = -EDQUOT;
+ goto out;
+ }
err = ceph_pre_init_acls(dir, &mode, &acls);
if (err < 0)
- return err;
+ goto out;
dout("mknod in dir %p dentry %p mode 0%ho rdev %d\n",
dir, dentry, mode, rdev);
@@ -883,8 +885,10 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
if (ceph_snap(dir) != CEPH_NOSNAP)
return -EROFS;
- if (ceph_quota_is_max_files_exceeded(dir))
- return -EDQUOT;
+ if (ceph_quota_is_max_files_exceeded(dir)) {
+ err = -EDQUOT;
+ goto out;
+ }
dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest);
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS);
@@ -1393,7 +1397,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
" rfiles: %20lld\n"
" rsubdirs: %20lld\n"
"rbytes: %20lld\n"
- "rctime: %10ld.%09ld\n",
+ "rctime: %10lld.%09ld\n",
ci->i_files + ci->i_subdirs,
ci->i_files,
ci->i_subdirs,
@@ -1401,8 +1405,8 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
ci->i_rfiles,
ci->i_rsubdirs,
ci->i_rbytes,
- (long)ci->i_rctime.tv_sec,
- (long)ci->i_rctime.tv_nsec);
+ ci->i_rctime.tv_sec,
+ ci->i_rctime.tv_nsec);
}
if (*ppos >= dfi->dir_info_len)
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index e2679e8..92ab204 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -720,7 +720,7 @@ struct ceph_aio_request {
struct list_head osd_reqs;
unsigned num_reqs;
atomic_t pending_reqs;
- struct timespec mtime;
+ struct timespec64 mtime;
struct ceph_cap_flush *prealloc_cf;
};
@@ -922,7 +922,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
int num_pages = 0;
int flags;
int ret;
- struct timespec mtime = timespec64_to_timespec(current_time(inode));
+ struct timespec64 mtime = current_time(inode);
size_t count = iov_iter_count(iter);
loff_t pos = iocb->ki_pos;
bool write = iov_iter_rw(iter) == WRITE;
@@ -1130,7 +1130,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
int flags;
int ret;
bool check_caps = false;
- struct timespec mtime = timespec64_to_timespec(current_time(inode));
+ struct timespec64 mtime = current_time(inode);
size_t count = iov_iter_count(from);
if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
@@ -1383,12 +1383,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct ceph_file_info *fi = file->private_data;
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
- struct ceph_osd_client *osdc =
- &ceph_sb_to_client(inode->i_sb)->client->osdc;
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_cap_flush *prealloc_cf;
ssize_t count, written = 0;
int err, want, got;
loff_t pos;
+ loff_t limit = max(i_size_read(inode), fsc->max_file_size);
if (ceph_snap(inode) != CEPH_NOSNAP)
return -EROFS;
@@ -1414,6 +1414,13 @@ retry_snap:
goto out;
pos = iocb->ki_pos;
+ if (unlikely(pos >= limit)) {
+ err = -EFBIG;
+ goto out;
+ } else {
+ iov_iter_truncate(from, limit - pos);
+ }
+
count = iov_iter_count(from);
if (ceph_quota_is_max_bytes_exceeded(inode, pos + count)) {
err = -EDQUOT;
@@ -1435,7 +1442,7 @@ retry_snap:
}
/* FIXME: not complete since it doesn't account for being at quota */
- if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL)) {
+ if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL)) {
err = -ENOSPC;
goto out;
}
@@ -1525,7 +1532,7 @@ retry_snap:
}
if (written >= 0) {
- if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_NEARFULL))
+ if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_NEARFULL))
iocb->ki_flags |= IOCB_DSYNC;
written = generic_write_sync(iocb, written);
}
@@ -1546,6 +1553,7 @@ out_unlocked:
static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
{
struct inode *inode = file->f_mapping->host;
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
loff_t i_size;
loff_t ret;
@@ -1590,7 +1598,7 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
break;
}
- ret = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
+ ret = vfs_setpos(file, offset, max(i_size, fsc->max_file_size));
out:
inode_unlock(inode);
@@ -1662,7 +1670,7 @@ static int ceph_zero_partial_object(struct inode *inode,
goto out;
}
- req->r_mtime = timespec64_to_timespec(inode->i_mtime);
+ req->r_mtime = inode->i_mtime;
ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
if (!ret) {
ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
@@ -1727,8 +1735,7 @@ static long ceph_fallocate(struct file *file, int mode,
struct ceph_file_info *fi = file->private_data;
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
- struct ceph_osd_client *osdc =
- &ceph_inode_to_client(inode)->client->osdc;
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_cap_flush *prealloc_cf;
int want, got = 0;
int dirty;
@@ -1736,6 +1743,9 @@ static long ceph_fallocate(struct file *file, int mode,
loff_t endoff = 0;
loff_t size;
+ if ((offset + length) > max(i_size_read(inode), fsc->max_file_size))
+ return -EFBIG;
+
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
return -EOPNOTSUPP;
@@ -1759,7 +1769,7 @@ static long ceph_fallocate(struct file *file, int mode,
goto unlock;
}
- if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) &&
+ if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL) &&
!(mode & FALLOC_FL_PUNCH_HOLE)) {
ret = -ENOSPC;
goto unlock;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index a866be9..ebc7bdae 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -658,13 +658,10 @@ int ceph_fill_file_size(struct inode *inode, int issued,
}
void ceph_fill_file_time(struct inode *inode, int issued,
- u64 time_warp_seq, struct timespec *ctime,
- struct timespec *mtime, struct timespec *atime)
+ u64 time_warp_seq, struct timespec64 *ctime,
+ struct timespec64 *mtime, struct timespec64 *atime)
{
struct ceph_inode_info *ci = ceph_inode(inode);
- struct timespec64 ctime64 = timespec_to_timespec64(*ctime);
- struct timespec64 mtime64 = timespec_to_timespec64(*mtime);
- struct timespec64 atime64 = timespec_to_timespec64(*atime);
int warn = 0;
if (issued & (CEPH_CAP_FILE_EXCL|
@@ -673,39 +670,39 @@ void ceph_fill_file_time(struct inode *inode, int issued,
CEPH_CAP_AUTH_EXCL|
CEPH_CAP_XATTR_EXCL)) {
if (ci->i_version == 0 ||
- timespec64_compare(&ctime64, &inode->i_ctime) > 0) {
+ timespec64_compare(ctime, &inode->i_ctime) > 0) {
dout("ctime %lld.%09ld -> %lld.%09ld inc w/ cap\n",
- (long long)inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
- (long long)ctime->tv_sec, ctime->tv_nsec);
- inode->i_ctime = ctime64;
+ inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
+ ctime->tv_sec, ctime->tv_nsec);
+ inode->i_ctime = *ctime;
}
if (ci->i_version == 0 ||
ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) {
/* the MDS did a utimes() */
dout("mtime %lld.%09ld -> %lld.%09ld "
"tw %d -> %d\n",
- (long long)inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
- (long long)mtime->tv_sec, mtime->tv_nsec,
+ inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
+ mtime->tv_sec, mtime->tv_nsec,
ci->i_time_warp_seq, (int)time_warp_seq);
- inode->i_mtime = mtime64;
- inode->i_atime = atime64;
+ inode->i_mtime = *mtime;
+ inode->i_atime = *atime;
ci->i_time_warp_seq = time_warp_seq;
} else if (time_warp_seq == ci->i_time_warp_seq) {
/* nobody did utimes(); take the max */
- if (timespec64_compare(&mtime64, &inode->i_mtime) > 0) {
+ if (timespec64_compare(mtime, &inode->i_mtime) > 0) {
dout("mtime %lld.%09ld -> %lld.%09ld inc\n",
- (long long)inode->i_mtime.tv_sec,
+ inode->i_mtime.tv_sec,
inode->i_mtime.tv_nsec,
- (long long)mtime->tv_sec, mtime->tv_nsec);
- inode->i_mtime = mtime64;
+ mtime->tv_sec, mtime->tv_nsec);
+ inode->i_mtime = *mtime;
}
- if (timespec64_compare(&atime64, &inode->i_atime) > 0) {
+ if (timespec64_compare(atime, &inode->i_atime) > 0) {
dout("atime %lld.%09ld -> %lld.%09ld inc\n",
- (long long)inode->i_atime.tv_sec,
+ inode->i_atime.tv_sec,
inode->i_atime.tv_nsec,
- (long long)atime->tv_sec, atime->tv_nsec);
- inode->i_atime = atime64;
+ atime->tv_sec, atime->tv_nsec);
+ inode->i_atime = *atime;
}
} else if (issued & CEPH_CAP_FILE_EXCL) {
/* we did a utimes(); ignore mds values */
@@ -715,9 +712,9 @@ void ceph_fill_file_time(struct inode *inode, int issued,
} else {
/* we have no write|excl caps; whatever the MDS says is true */
if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) {
- inode->i_ctime = ctime64;
- inode->i_mtime = mtime64;
- inode->i_atime = atime64;
+ inode->i_ctime = *ctime;
+ inode->i_mtime = *mtime;
+ inode->i_atime = *atime;
ci->i_time_warp_seq = time_warp_seq;
} else {
warn = 1;
@@ -743,7 +740,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
struct ceph_mds_reply_inode *info = iinfo->in;
struct ceph_inode_info *ci = ceph_inode(inode);
int issued, new_issued, info_caps;
- struct timespec mtime, atime, ctime;
+ struct timespec64 mtime, atime, ctime;
struct ceph_buffer *xattr_blob = NULL;
struct ceph_string *pool_ns = NULL;
struct ceph_cap *new_cap = NULL;
@@ -823,9 +820,9 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
if (new_version || (new_issued & CEPH_CAP_ANY_RD)) {
/* be careful with mtime, atime, size */
- ceph_decode_timespec(&atime, &info->atime);
- ceph_decode_timespec(&mtime, &info->mtime);
- ceph_decode_timespec(&ctime, &info->ctime);
+ ceph_decode_timespec64(&atime, &info->atime);
+ ceph_decode_timespec64(&mtime, &info->mtime);
+ ceph_decode_timespec64(&ctime, &info->ctime);
ceph_fill_file_time(inode, issued,
le32_to_cpu(info->time_warp_seq),
&ctime, &mtime, &atime);
@@ -872,7 +869,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
ci->i_rbytes = le64_to_cpu(info->rbytes);
ci->i_rfiles = le64_to_cpu(info->rfiles);
ci->i_rsubdirs = le64_to_cpu(info->rsubdirs);
- ceph_decode_timespec(&ci->i_rctime, &info->rctime);
+ ceph_decode_timespec64(&ci->i_rctime, &info->rctime);
}
}
@@ -1954,7 +1951,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
int err = 0;
int inode_dirty_flags = 0;
bool lock_snap_rwsem = false;
- struct timespec ts;
prealloc_cf = ceph_alloc_cap_flush();
if (!prealloc_cf)
@@ -2030,8 +2026,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
if (ia_valid & ATTR_ATIME) {
dout("setattr %p atime %lld.%ld -> %lld.%ld\n", inode,
- (long long)inode->i_atime.tv_sec, inode->i_atime.tv_nsec,
- (long long)attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec);
+ inode->i_atime.tv_sec, inode->i_atime.tv_nsec,
+ attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec);
if (issued & CEPH_CAP_FILE_EXCL) {
ci->i_time_warp_seq++;
inode->i_atime = attr->ia_atime;
@@ -2043,8 +2039,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
dirtied |= CEPH_CAP_FILE_WR;
} else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
!timespec64_equal(&inode->i_atime, &attr->ia_atime)) {
- ts = timespec64_to_timespec(attr->ia_atime);
- ceph_encode_timespec(&req->r_args.setattr.atime, &ts);
+ ceph_encode_timespec64(&req->r_args.setattr.atime,
+ &attr->ia_atime);
mask |= CEPH_SETATTR_ATIME;
release |= CEPH_CAP_FILE_SHARED |
CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
@@ -2052,8 +2048,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
}
if (ia_valid & ATTR_MTIME) {
dout("setattr %p mtime %lld.%ld -> %lld.%ld\n", inode,
- (long long)inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
- (long long)attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec);
+ inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
+ attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec);
if (issued & CEPH_CAP_FILE_EXCL) {
ci->i_time_warp_seq++;
inode->i_mtime = attr->ia_mtime;
@@ -2065,8 +2061,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
dirtied |= CEPH_CAP_FILE_WR;
} else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
!timespec64_equal(&inode->i_mtime, &attr->ia_mtime)) {
- ts = timespec64_to_timespec(attr->ia_mtime);
- ceph_encode_timespec(&req->r_args.setattr.mtime, &ts);
+ ceph_encode_timespec64(&req->r_args.setattr.mtime,
+ &attr->ia_mtime);
mask |= CEPH_SETATTR_MTIME;
release |= CEPH_CAP_FILE_SHARED |
CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
@@ -2097,8 +2093,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
bool only = (ia_valid & (ATTR_SIZE|ATTR_MTIME|ATTR_ATIME|
ATTR_MODE|ATTR_UID|ATTR_GID)) == 0;
dout("setattr %p ctime %lld.%ld -> %lld.%ld (%s)\n", inode,
- (long long)inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
- (long long)attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec,
+ inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
+ attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec,
only ? "ctime only" : "ignored");
if (only) {
/*
@@ -2140,7 +2136,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
req->r_inode_drop = release;
req->r_args.setattr.mask = cpu_to_le32(mask);
req->r_num_caps = 1;
- req->r_stamp = timespec64_to_timespec(attr->ia_ctime);
+ req->r_stamp = attr->ia_ctime;
err = ceph_mdsc_do_request(mdsc, NULL, req);
}
dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err,
@@ -2161,6 +2157,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
int ceph_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
int err;
if (ceph_snap(inode) != CEPH_NOSNAP)
@@ -2171,6 +2168,10 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
return err;
if ((attr->ia_valid & ATTR_SIZE) &&
+ attr->ia_size > max(inode->i_size, fsc->max_file_size))
+ return -EFBIG;
+
+ if ((attr->ia_valid & ATTR_SIZE) &&
ceph_quota_is_max_bytes_exceeded(inode, attr->ia_size))
return -EDQUOT;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index dc8bc66..bc43c82 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -902,6 +902,27 @@ static struct ceph_msg *create_session_msg(u32 op, u64 seq)
return msg;
}
+static void encode_supported_features(void **p, void *end)
+{
+ static const unsigned char bits[] = CEPHFS_FEATURES_CLIENT_SUPPORTED;
+ static const size_t count = ARRAY_SIZE(bits);
+
+ if (count > 0) {
+ size_t i;
+ size_t size = ((size_t)bits[count - 1] + 64) / 64 * 8;
+
+ BUG_ON(*p + 4 + size > end);
+ ceph_encode_32(p, size);
+ memset(*p, 0, size);
+ for (i = 0; i < count; i++)
+ ((unsigned char*)(*p))[i / 8] |= 1 << (bits[i] % 8);
+ *p += size;
+ } else {
+ BUG_ON(*p + 4 > end);
+ ceph_encode_32(p, 0);
+ }
+}
+
/*
* session message, specialization for CEPH_SESSION_REQUEST_OPEN
* to include additional client metadata fields.
@@ -911,11 +932,11 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
struct ceph_msg *msg;
struct ceph_mds_session_head *h;
int i = -1;
- int metadata_bytes = 0;
+ int extra_bytes = 0;
int metadata_key_count = 0;
struct ceph_options *opt = mdsc->fsc->client->options;
struct ceph_mount_options *fsopt = mdsc->fsc->mount_options;
- void *p;
+ void *p, *end;
const char* metadata[][2] = {
{"hostname", mdsc->nodename},
@@ -926,21 +947,26 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
};
/* Calculate serialized length of metadata */
- metadata_bytes = 4; /* map length */
+ extra_bytes = 4; /* map length */
for (i = 0; metadata[i][0]; ++i) {
- metadata_bytes += 8 + strlen(metadata[i][0]) +
+ extra_bytes += 8 + strlen(metadata[i][0]) +
strlen(metadata[i][1]);
metadata_key_count++;
}
+ /* supported feature */
+ extra_bytes += 4 + 8;
/* Allocate the message */
- msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + metadata_bytes,
+ msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes,
GFP_NOFS, false);
if (!msg) {
pr_err("create_session_msg ENOMEM creating msg\n");
return NULL;
}
- h = msg->front.iov_base;
+ p = msg->front.iov_base;
+ end = p + msg->front.iov_len;
+
+ h = p;
h->op = cpu_to_le32(CEPH_SESSION_REQUEST_OPEN);
h->seq = cpu_to_le64(seq);
@@ -950,11 +976,11 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
*
* ClientSession messages with metadata are v2
*/
- msg->hdr.version = cpu_to_le16(2);
+ msg->hdr.version = cpu_to_le16(3);
msg->hdr.compat_version = cpu_to_le16(1);
/* The write pointer, following the session_head structure */
- p = msg->front.iov_base + sizeof(*h);
+ p += sizeof(*h);
/* Number of entries in the map */
ceph_encode_32(&p, metadata_key_count);
@@ -972,6 +998,10 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
p += val_len;
}
+ encode_supported_features(&p, end);
+ msg->front.iov_len = p - msg->front.iov_base;
+ msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+
return msg;
}
@@ -1779,6 +1809,7 @@ struct ceph_mds_request *
ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
{
struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS);
+ struct timespec64 ts;
if (!req)
return ERR_PTR(-ENOMEM);
@@ -1797,7 +1828,8 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
init_completion(&req->r_safe_completion);
INIT_LIST_HEAD(&req->r_unsafe_item);
- req->r_stamp = timespec_trunc(current_kernel_time(), mdsc->fsc->sb->s_time_gran);
+ ktime_get_coarse_real_ts64(&ts);
+ req->r_stamp = timespec64_trunc(ts, mdsc->fsc->sb->s_time_gran);
req->r_op = op;
req->r_direct_mode = mode;
@@ -2094,7 +2126,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
/* time stamp */
{
struct ceph_timespec ts;
- ceph_encode_timespec(&ts, &req->r_stamp);
+ ceph_encode_timespec64(&ts, &req->r_stamp);
ceph_encode_copy(&p, &ts, sizeof(ts));
}
@@ -2187,7 +2219,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
p = msg->front.iov_base + req->r_request_release_offset;
{
struct ceph_timespec ts;
- ceph_encode_timespec(&ts, &req->r_stamp);
+ ceph_encode_timespec64(&ts, &req->r_stamp);
ceph_encode_copy(&p, &ts, sizeof(ts));
}
@@ -2225,7 +2257,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
/*
* send request, or put it on the appropriate wait list.
*/
-static int __do_request(struct ceph_mds_client *mdsc,
+static void __do_request(struct ceph_mds_client *mdsc,
struct ceph_mds_request *req)
{
struct ceph_mds_session *session = NULL;
@@ -2235,7 +2267,7 @@ static int __do_request(struct ceph_mds_client *mdsc,
if (req->r_err || test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags))
__unregister_request(mdsc, req);
- goto out;
+ return;
}
if (req->r_timeout &&
@@ -2258,7 +2290,7 @@ static int __do_request(struct ceph_mds_client *mdsc,
if (mdsc->mdsmap->m_epoch == 0) {
dout("do_request no mdsmap, waiting for map\n");
list_add(&req->r_wait, &mdsc->waiting_for_map);
- goto finish;
+ return;
}
if (!(mdsc->fsc->mount_options->flags &
CEPH_MOUNT_OPT_MOUNTWAIT) &&
@@ -2276,7 +2308,7 @@ static int __do_request(struct ceph_mds_client *mdsc,
ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) {
dout("do_request no mds or not active, waiting for map\n");
list_add(&req->r_wait, &mdsc->waiting_for_map);
- goto out;
+ return;
}
/* get, open session */
@@ -2326,8 +2358,7 @@ finish:
complete_request(mdsc, req);
__unregister_request(mdsc, req);
}
-out:
- return err;
+ return;
}
/*
@@ -2748,7 +2779,7 @@ static void handle_session(struct ceph_mds_session *session,
int wake = 0;
/* decode */
- if (msg->front.iov_len != sizeof(*h))
+ if (msg->front.iov_len < sizeof(*h))
goto bad;
op = le32_to_cpu(h->op);
seq = le64_to_cpu(h->seq);
@@ -2958,15 +2989,12 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
rec.v2.flock_len = (__force __le32)
((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1);
} else {
- struct timespec ts;
rec.v1.cap_id = cpu_to_le64(cap->cap_id);
rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
rec.v1.issued = cpu_to_le32(cap->issued);
rec.v1.size = cpu_to_le64(inode->i_size);
- ts = timespec64_to_timespec(inode->i_mtime);
- ceph_encode_timespec(&rec.v1.mtime, &ts);
- ts = timespec64_to_timespec(inode->i_atime);
- ceph_encode_timespec(&rec.v1.atime, &ts);
+ ceph_encode_timespec64(&rec.v1.mtime, &inode->i_mtime);
+ ceph_encode_timespec64(&rec.v1.atime, &inode->i_atime);
rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
rec.v1.pathbase = cpu_to_le64(pathbase);
}
@@ -3378,10 +3406,10 @@ static void handle_lease(struct ceph_mds_client *mdsc,
vino.ino = le64_to_cpu(h->ino);
vino.snap = CEPH_NOSNAP;
seq = le32_to_cpu(h->seq);
- dname.name = (void *)h + sizeof(*h) + sizeof(u32);
- dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32);
- if (dname.len != get_unaligned_le32(h+1))
+ dname.len = get_unaligned_le32(h + 1);
+ if (msg->front.iov_len < sizeof(*h) + sizeof(u32) + dname.len)
goto bad;
+ dname.name = (void *)(h + 1) + sizeof(u32);
/* lookup inode */
inode = ceph_find_inode(sb, vino);
@@ -3644,8 +3672,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
init_rwsem(&mdsc->pool_perm_rwsem);
mdsc->pool_perm_tree = RB_ROOT;
- strncpy(mdsc->nodename, utsname()->nodename,
- sizeof(mdsc->nodename) - 1);
+ strscpy(mdsc->nodename, utsname()->nodename,
+ sizeof(mdsc->nodename));
return 0;
}
@@ -4019,7 +4047,8 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
} else {
mdsc->mdsmap = newmap; /* first mds map */
}
- mdsc->fsc->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size;
+ mdsc->fsc->max_file_size = min((loff_t)mdsc->mdsmap->m_max_file_size,
+ MAX_LFS_FILESIZE);
__wake_requests(mdsc, &mdsc->waiting_for_map);
ceph_monc_got_map(&mdsc->fsc->client->monc, CEPH_SUB_MDSMAP,
@@ -4155,6 +4184,16 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
return auth;
}
+static int add_authorizer_challenge(struct ceph_connection *con,
+ void *challenge_buf, int challenge_buf_len)
+{
+ struct ceph_mds_session *s = con->private;
+ struct ceph_mds_client *mdsc = s->s_mdsc;
+ struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
+
+ return ceph_auth_add_authorizer_challenge(ac, s->s_auth.authorizer,
+ challenge_buf, challenge_buf_len);
+}
static int verify_authorizer_reply(struct ceph_connection *con)
{
@@ -4218,6 +4257,7 @@ static const struct ceph_connection_operations mds_con_ops = {
.put = con_put,
.dispatch = dispatch,
.get_authorizer = get_authorizer,
+ .add_authorizer_challenge = add_authorizer_challenge,
.verify_authorizer_reply = verify_authorizer_reply,
.invalidate_authorizer = invalidate_authorizer,
.peer_reset = peer_reset,
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 2ec3b5b..32fcce0 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -16,6 +16,18 @@
#include <linux/ceph/mdsmap.h>
#include <linux/ceph/auth.h>
+/* The first 8 bits are reserved for old ceph releases */
+#define CEPHFS_FEATURE_MIMIC 8
+
+#define CEPHFS_FEATURES_ALL { \
+ 0, 1, 2, 3, 4, 5, 6, 7, \
+ CEPHFS_FEATURE_MIMIC, \
+}
+
+#define CEPHFS_FEATURES_CLIENT_SUPPORTED CEPHFS_FEATURES_ALL
+#define CEPHFS_FEATURES_CLIENT_REQUIRED {}
+
+
/*
* Some lock dependencies:
*
@@ -229,7 +241,7 @@ struct ceph_mds_request {
int r_fmode; /* file mode, if expecting cap */
kuid_t r_uid;
kgid_t r_gid;
- struct timespec r_stamp;
+ struct timespec64 r_stamp;
/* for choosing which mds to send this request to */
int r_direct_mode;
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index 242bfa5..32d4f13 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -48,7 +48,7 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
struct inode *inode;
struct ceph_inode_info *ci;
- if (msg->front.iov_len != sizeof(*h)) {
+ if (msg->front.iov_len < sizeof(*h)) {
pr_err("%s corrupt message mds%d len %d\n", __func__,
session->s_mds, (int)msg->front.iov_len);
ceph_msg_dump(msg);
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index af81555..041c27e 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -594,9 +594,9 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
BUG_ON(capsnap->writing);
capsnap->size = inode->i_size;
- capsnap->mtime = timespec64_to_timespec(inode->i_mtime);
- capsnap->atime = timespec64_to_timespec(inode->i_atime);
- capsnap->ctime = timespec64_to_timespec(inode->i_ctime);
+ capsnap->mtime = inode->i_mtime;
+ capsnap->atime = inode->i_atime;
+ capsnap->ctime = inode->i_ctime;
capsnap->time_warp_seq = ci->i_time_warp_seq;
capsnap->truncate_size = ci->i_truncate_size;
capsnap->truncate_seq = ci->i_truncate_seq;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 95a3b3a..43ca3b7 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -219,8 +219,7 @@ static int parse_fsopt_token(char *c, void *private)
if (token < Opt_last_int) {
ret = match_int(&argstr[0], &intval);
if (ret < 0) {
- pr_err("bad mount option arg (not int) "
- "at '%s'\n", c);
+ pr_err("bad option arg (not int) at '%s'\n", c);
return ret;
}
dout("got int token %d val %d\n", token, intval);
@@ -941,11 +940,12 @@ static int ceph_set_super(struct super_block *s, void *data)
dout("set_super %p data %p\n", s, data);
s->s_flags = fsc->mount_options->sb_flags;
- s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */
+ s->s_maxbytes = MAX_LFS_FILESIZE;
s->s_xattr = ceph_xattr_handlers;
s->s_fs_info = fsc;
fsc->sb = s;
+ fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */
s->s_op = &ceph_super_ops;
s->s_d_op = &ceph_dentry_ops;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 971328b..582e28f 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -98,6 +98,7 @@ struct ceph_fs_client {
unsigned long mount_state;
int min_caps; /* min caps i added */
+ loff_t max_file_size;
struct ceph_mds_client *mdsc;
@@ -193,7 +194,7 @@ struct ceph_cap_snap {
u64 xattr_version;
u64 size;
- struct timespec mtime, atime, ctime;
+ struct timespec64 mtime, atime, ctime;
u64 time_warp_seq;
u64 truncate_size;
u32 truncate_seq;
@@ -307,7 +308,7 @@ struct ceph_inode_info {
char *i_symlink;
/* for dirs */
- struct timespec i_rctime;
+ struct timespec64 i_rctime;
u64 i_rbytes, i_rfiles, i_rsubdirs;
u64 i_files, i_subdirs;
@@ -655,7 +656,7 @@ extern void ceph_caps_finalize(struct ceph_mds_client *mdsc);
extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta);
extern int ceph_reserve_caps(struct ceph_mds_client *mdsc,
struct ceph_cap_reservation *ctx, int need);
-extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
+extern void ceph_unreserve_caps(struct ceph_mds_client *mdsc,
struct ceph_cap_reservation *ctx);
extern void ceph_reservation_status(struct ceph_fs_client *client,
int *total, int *avail, int *used,
@@ -857,8 +858,9 @@ extern struct inode *ceph_get_snapdir(struct inode *parent);
extern int ceph_fill_file_size(struct inode *inode, int issued,
u32 truncate_seq, u64 truncate_size, u64 size);
extern void ceph_fill_file_time(struct inode *inode, int issued,
- u64 time_warp_seq, struct timespec *ctime,
- struct timespec *mtime, struct timespec *atime);
+ u64 time_warp_seq, struct timespec64 *ctime,
+ struct timespec64 *mtime,
+ struct timespec64 *atime);
extern int ceph_fill_trace(struct super_block *sb,
struct ceph_mds_request *req);
extern int ceph_readdir_prepopulate(struct ceph_mds_request *req,
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 5bc8edb..5cc8b94 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -224,8 +224,8 @@ static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val,
static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
size_t size)
{
- return snprintf(val, size, "%ld.09%ld", (long)ci->i_rctime.tv_sec,
- (long)ci->i_rctime.tv_nsec);
+ return snprintf(val, size, "%lld.09%ld", ci->i_rctime.tv_sec,
+ ci->i_rctime.tv_nsec);
}
/* quotas */
diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h
index e931da8..6728c2e 100644
--- a/include/linux/ceph/auth.h
+++ b/include/linux/ceph/auth.h
@@ -64,6 +64,10 @@ struct ceph_auth_client_ops {
/* ensure that an existing authorizer is up to date */
int (*update_authorizer)(struct ceph_auth_client *ac, int peer_type,
struct ceph_auth_handshake *auth);
+ int (*add_authorizer_challenge)(struct ceph_auth_client *ac,
+ struct ceph_authorizer *a,
+ void *challenge_buf,
+ int challenge_buf_len);
int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
struct ceph_authorizer *a);
void (*invalidate_authorizer)(struct ceph_auth_client *ac,
@@ -118,6 +122,10 @@ void ceph_auth_destroy_authorizer(struct ceph_authorizer *a);
extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
int peer_type,
struct ceph_auth_handshake *a);
+int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
+ struct ceph_authorizer *a,
+ void *challenge_buf,
+ int challenge_buf_len);
extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_authorizer *a);
extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac,
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 3901927..6b92b33 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -165,9 +165,9 @@ DEFINE_CEPH_FEATURE(58, 1, FS_FILE_LAYOUT_V2) // overlap
DEFINE_CEPH_FEATURE(59, 1, FS_BTIME)
DEFINE_CEPH_FEATURE(59, 1, FS_CHANGE_ATTR) // overlap
DEFINE_CEPH_FEATURE(59, 1, MSG_ADDR2) // overlap
-DEFINE_CEPH_FEATURE(60, 1, BLKIN_TRACING) // *do not share this bit*
+DEFINE_CEPH_FEATURE(60, 1, OSD_RECOVERY_DELETES) // *do not share this bit*
+DEFINE_CEPH_FEATURE(61, 1, CEPHX_V2) // *do not share this bit*
-DEFINE_CEPH_FEATURE(61, 1, RESERVED2) // unused, but slow down!
DEFINE_CEPH_FEATURE(62, 1, RESERVED) // do not use; used as a sentinal
DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facing
@@ -210,7 +210,8 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
CEPH_FEATURE_SERVER_JEWEL | \
CEPH_FEATURE_MON_STATEFUL_SUB | \
CEPH_FEATURE_CRUSH_TUNABLES5 | \
- CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING)
+ CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING | \
+ CEPH_FEATURE_CEPHX_V2)
#define CEPH_FEATURES_REQUIRED_DEFAULT \
(CEPH_FEATURE_NOSRCADDR | \
diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
index d143ac8..a6c2a48 100644
--- a/include/linux/ceph/decode.h
+++ b/include/linux/ceph/decode.h
@@ -194,16 +194,22 @@ ceph_decode_skip_n(p, end, sizeof(u8), bad)
} while (0)
/*
- * struct ceph_timespec <-> struct timespec
+ * struct ceph_timespec <-> struct timespec64
*/
-static inline void ceph_decode_timespec(struct timespec *ts,
- const struct ceph_timespec *tv)
+static inline void ceph_decode_timespec64(struct timespec64 *ts,
+ const struct ceph_timespec *tv)
{
- ts->tv_sec = (__kernel_time_t)le32_to_cpu(tv->tv_sec);
+ /*
+ * This will still overflow in year 2106. We could extend
+ * the protocol to steal two more bits from tv_nsec to
+ * add three more 136 year epochs after that the way ext4
+ * does if necessary.
+ */
+ ts->tv_sec = (time64_t)le32_to_cpu(tv->tv_sec);
ts->tv_nsec = (long)le32_to_cpu(tv->tv_nsec);
}
-static inline void ceph_encode_timespec(struct ceph_timespec *tv,
- const struct timespec *ts)
+static inline void ceph_encode_timespec64(struct ceph_timespec *tv,
+ const struct timespec64 *ts)
{
tv->tv_sec = cpu_to_le32((u32)ts->tv_sec);
tv->tv_nsec = cpu_to_le32((u32)ts->tv_nsec);
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index c7dfcb8..fc2b449 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -31,6 +31,9 @@ struct ceph_connection_operations {
struct ceph_auth_handshake *(*get_authorizer) (
struct ceph_connection *con,
int *proto, int force_new);
+ int (*add_authorizer_challenge)(struct ceph_connection *con,
+ void *challenge_buf,
+ int challenge_buf_len);
int (*verify_authorizer_reply) (struct ceph_connection *con);
int (*invalidate_authorizer)(struct ceph_connection *con);
@@ -286,9 +289,8 @@ struct ceph_connection {
attempt for this connection, client */
u32 peer_global_seq; /* peer's global seq for this connection */
+ struct ceph_auth_handshake *auth;
int auth_retry; /* true if we need a newer authorizer */
- void *auth_reply_buf; /* where to put the authorizer reply */
- int auth_reply_buf_len;
struct mutex mutex;
@@ -330,7 +332,7 @@ struct ceph_connection {
int in_base_pos; /* bytes read */
__le64 in_temp_ack; /* for reading an ack */
- struct timespec last_keepalive_ack; /* keepalive2 ack stamp */
+ struct timespec64 last_keepalive_ack; /* keepalive2 ack stamp */
struct delayed_work work; /* send|recv work */
unsigned long delay; /* current delay interval */
diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h
index 73ae2a9..9e50aed 100644
--- a/include/linux/ceph/msgr.h
+++ b/include/linux/ceph/msgr.h
@@ -91,7 +91,7 @@ struct ceph_entity_inst {
#define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */
#define CEPH_MSGR_TAG_KEEPALIVE2 14 /* keepalive2 byte + ceph_timespec */
#define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */
-
+#define CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER 16 /* cephx v2 doing server challenge */
/*
* connection negotiation
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 0d6ee04..02096da 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -199,7 +199,7 @@ struct ceph_osd_request {
/* set by submitter */
u64 r_snapid; /* for reads, CEPH_NOSNAP o/w */
struct ceph_snap_context *r_snapc; /* for writes */
- struct timespec r_mtime; /* ditto */
+ struct timespec64 r_mtime; /* ditto */
u64 r_data_offset; /* ditto */
bool r_linger; /* don't resend on failure */
@@ -253,7 +253,7 @@ struct ceph_osd_linger_request {
struct ceph_osd_request_target t;
u32 map_dne_bound;
- struct timespec mtime;
+ struct timespec64 mtime;
struct kref kref;
struct mutex lock;
@@ -508,7 +508,7 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
struct ceph_snap_context *sc,
u64 off, u64 len,
u32 truncate_seq, u64 truncate_size,
- struct timespec *mtime,
+ struct timespec64 *mtime,
struct page **pages, int nr_pages);
/* watch/notify */
@@ -528,12 +528,12 @@ int ceph_osdc_notify_ack(struct ceph_osd_client *osdc,
u64 notify_id,
u64 cookie,
void *payload,
- size_t payload_len);
+ u32 payload_len);
int ceph_osdc_notify(struct ceph_osd_client *osdc,
struct ceph_object_id *oid,
struct ceph_object_locator *oloc,
void *payload,
- size_t payload_len,
+ u32 payload_len,
u32 timeout,
struct page ***preply_pages,
size_t *preply_len);
diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h
index 7edcded..d022336 100644
--- a/include/linux/ceph/pagelist.h
+++ b/include/linux/ceph/pagelist.h
@@ -68,7 +68,7 @@ static inline int ceph_pagelist_encode_8(struct ceph_pagelist *pl, u8 v)
return ceph_pagelist_append(pl, &v, 1);
}
static inline int ceph_pagelist_encode_string(struct ceph_pagelist *pl,
- char *s, size_t len)
+ char *s, u32 len)
{
int ret = ceph_pagelist_encode_32(pl, len);
if (ret)
diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig
index f8cceb9..cd2d5b9 100644
--- a/net/ceph/Kconfig
+++ b/net/ceph/Kconfig
@@ -41,4 +41,3 @@ config CEPH_LIB_USE_DNS_RESOLVER
Documentation/networking/dns_resolver.txt
If unsure, say N.
-
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index 12bf497..db09defe 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -15,4 +15,3 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
auth_x.o \
ceph_fs.o ceph_strings.o ceph_hash.o \
pagevec.o snapshot.o string_table.o
-
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index dbde2b3..fbeee06 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -315,6 +315,22 @@ int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
}
EXPORT_SYMBOL(ceph_auth_update_authorizer);
+int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
+ struct ceph_authorizer *a,
+ void *challenge_buf,
+ int challenge_buf_len)
+{
+ int ret = 0;
+
+ mutex_lock(&ac->mutex);
+ if (ac->ops && ac->ops->add_authorizer_challenge)
+ ret = ac->ops->add_authorizer_challenge(ac, a, challenge_buf,
+ challenge_buf_len);
+ mutex_unlock(&ac->mutex);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_auth_add_authorizer_challenge);
+
int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_authorizer *a)
{
diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c
index 41d2a0c..edb7042 100644
--- a/net/ceph/auth_none.c
+++ b/net/ceph/auth_none.c
@@ -142,4 +142,3 @@ int ceph_auth_none_init(struct ceph_auth_client *ac)
ac->ops = &ceph_auth_none_ops;
return 0;
}
-
diff --git a/net/ceph/auth_none.h b/net/ceph/auth_none.h
index 860ed98..4158f06 100644
--- a/net/ceph/auth_none.h
+++ b/net/ceph/auth_none.h
@@ -26,4 +26,3 @@ struct ceph_auth_none_info {
int ceph_auth_none_init(struct ceph_auth_client *ac);
#endif
-
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 2f4a1ba..b527323 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -9,6 +9,7 @@
#include <linux/ceph/decode.h>
#include <linux/ceph/auth.h>
+#include <linux/ceph/ceph_features.h>
#include <linux/ceph/libceph.h>
#include <linux/ceph/messenger.h>
@@ -70,25 +71,40 @@ static int ceph_x_encrypt(struct ceph_crypto_key *secret, void *buf,
return sizeof(u32) + ciphertext_len;
}
+static int __ceph_x_decrypt(struct ceph_crypto_key *secret, void *p,
+ int ciphertext_len)
+{
+ struct ceph_x_encrypt_header *hdr = p;
+ int plaintext_len;
+ int ret;
+
+ ret = ceph_crypt(secret, false, p, ciphertext_len, ciphertext_len,
+ &plaintext_len);
+ if (ret)
+ return ret;
+
+ if (le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC) {
+ pr_err("%s bad magic\n", __func__);
+ return -EINVAL;
+ }
+
+ return plaintext_len - sizeof(*hdr);
+}
+
static int ceph_x_decrypt(struct ceph_crypto_key *secret, void **p, void *end)
{
- struct ceph_x_encrypt_header *hdr = *p + sizeof(u32);
- int ciphertext_len, plaintext_len;
+ int ciphertext_len;
int ret;
ceph_decode_32_safe(p, end, ciphertext_len, e_inval);
ceph_decode_need(p, end, ciphertext_len, e_inval);
- ret = ceph_crypt(secret, false, *p, end - *p, ciphertext_len,
- &plaintext_len);
- if (ret)
+ ret = __ceph_x_decrypt(secret, *p, ciphertext_len);
+ if (ret < 0)
return ret;
- if (hdr->struct_v != 1 || le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC)
- return -EPERM;
-
*p += ciphertext_len;
- return plaintext_len - sizeof(struct ceph_x_encrypt_header);
+ return ret;
e_inval:
return -EINVAL;
@@ -149,12 +165,12 @@ static int process_one_ticket(struct ceph_auth_client *ac,
void *dp, *dend;
int dlen;
char is_enc;
- struct timespec validity;
+ struct timespec64 validity;
void *tp, *tpend;
void **ptp;
struct ceph_crypto_key new_session_key = { 0 };
struct ceph_buffer *new_ticket_blob;
- unsigned long new_expires, new_renew_after;
+ time64_t new_expires, new_renew_after;
u64 new_secret_id;
int ret;
@@ -189,11 +205,11 @@ static int process_one_ticket(struct ceph_auth_client *ac,
if (ret)
goto out;
- ceph_decode_timespec(&validity, dp);
+ ceph_decode_timespec64(&validity, dp);
dp += sizeof(struct ceph_timespec);
- new_expires = get_seconds() + validity.tv_sec;
+ new_expires = ktime_get_real_seconds() + validity.tv_sec;
new_renew_after = new_expires - (validity.tv_sec / 4);
- dout(" expires=%lu renew_after=%lu\n", new_expires,
+ dout(" expires=%llu renew_after=%llu\n", new_expires,
new_renew_after);
/* ticket blob for service */
@@ -275,6 +291,51 @@ bad:
return -EINVAL;
}
+/*
+ * Encode and encrypt the second part (ceph_x_authorize_b) of the
+ * authorizer. The first part (ceph_x_authorize_a) should already be
+ * encoded.
+ */
+static int encrypt_authorizer(struct ceph_x_authorizer *au,
+ u64 *server_challenge)
+{
+ struct ceph_x_authorize_a *msg_a;
+ struct ceph_x_authorize_b *msg_b;
+ void *p, *end;
+ int ret;
+
+ msg_a = au->buf->vec.iov_base;
+ WARN_ON(msg_a->ticket_blob.secret_id != cpu_to_le64(au->secret_id));
+ p = (void *)(msg_a + 1) + le32_to_cpu(msg_a->ticket_blob.blob_len);
+ end = au->buf->vec.iov_base + au->buf->vec.iov_len;
+
+ msg_b = p + ceph_x_encrypt_offset();
+ msg_b->struct_v = 2;
+ msg_b->nonce = cpu_to_le64(au->nonce);
+ if (server_challenge) {
+ msg_b->have_challenge = 1;
+ msg_b->server_challenge_plus_one =
+ cpu_to_le64(*server_challenge + 1);
+ } else {
+ msg_b->have_challenge = 0;
+ msg_b->server_challenge_plus_one = 0;
+ }
+
+ ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b));
+ if (ret < 0)
+ return ret;
+
+ p += ret;
+ if (server_challenge) {
+ WARN_ON(p != end);
+ } else {
+ WARN_ON(p > end);
+ au->buf->vec.iov_len = p - au->buf->vec.iov_base;
+ }
+
+ return 0;
+}
+
static void ceph_x_authorizer_cleanup(struct ceph_x_authorizer *au)
{
ceph_crypto_key_destroy(&au->session_key);
@@ -291,7 +352,6 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
int maxlen;
struct ceph_x_authorize_a *msg_a;
struct ceph_x_authorize_b *msg_b;
- void *p, *end;
int ret;
int ticket_blob_len =
(th->ticket_blob ? th->ticket_blob->vec.iov_len : 0);
@@ -335,21 +395,13 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
dout(" th %p secret_id %lld %lld\n", th, th->secret_id,
le64_to_cpu(msg_a->ticket_blob.secret_id));
- p = msg_a + 1;
- p += ticket_blob_len;
- end = au->buf->vec.iov_base + au->buf->vec.iov_len;
-
- msg_b = p + ceph_x_encrypt_offset();
- msg_b->struct_v = 1;
get_random_bytes(&au->nonce, sizeof(au->nonce));
- msg_b->nonce = cpu_to_le64(au->nonce);
- ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b));
- if (ret < 0)
+ ret = encrypt_authorizer(au, NULL);
+ if (ret) {
+ pr_err("failed to encrypt authorizer: %d", ret);
goto out_au;
+ }
- p += ret;
- WARN_ON(p > end);
- au->buf->vec.iov_len = p - au->buf->vec.iov_base;
dout(" built authorizer nonce %llx len %d\n", au->nonce,
(int)au->buf->vec.iov_len);
return 0;
@@ -385,13 +437,13 @@ static bool need_key(struct ceph_x_ticket_handler *th)
if (!th->have_key)
return true;
- return get_seconds() >= th->renew_after;
+ return ktime_get_real_seconds() >= th->renew_after;
}
static bool have_key(struct ceph_x_ticket_handler *th)
{
if (th->have_key) {
- if (get_seconds() >= th->expires)
+ if (ktime_get_real_seconds() >= th->expires)
th->have_key = false;
}
@@ -626,6 +678,54 @@ static int ceph_x_update_authorizer(
return 0;
}
+static int decrypt_authorize_challenge(struct ceph_x_authorizer *au,
+ void *challenge_buf,
+ int challenge_buf_len,
+ u64 *server_challenge)
+{
+ struct ceph_x_authorize_challenge *ch =
+ challenge_buf + sizeof(struct ceph_x_encrypt_header);
+ int ret;
+
+ /* no leading len */
+ ret = __ceph_x_decrypt(&au->session_key, challenge_buf,
+ challenge_buf_len);
+ if (ret < 0)
+ return ret;
+ if (ret < sizeof(*ch)) {
+ pr_err("bad size %d for ceph_x_authorize_challenge\n", ret);
+ return -EINVAL;
+ }
+
+ *server_challenge = le64_to_cpu(ch->server_challenge);
+ return 0;
+}
+
+static int ceph_x_add_authorizer_challenge(struct ceph_auth_client *ac,
+ struct ceph_authorizer *a,
+ void *challenge_buf,
+ int challenge_buf_len)
+{
+ struct ceph_x_authorizer *au = (void *)a;
+ u64 server_challenge;
+ int ret;
+
+ ret = decrypt_authorize_challenge(au, challenge_buf, challenge_buf_len,
+ &server_challenge);
+ if (ret) {
+ pr_err("failed to decrypt authorize challenge: %d", ret);
+ return ret;
+ }
+
+ ret = encrypt_authorizer(au, &server_challenge);
+ if (ret) {
+ pr_err("failed to encrypt authorizer w/ challenge: %d", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_authorizer *a)
{
@@ -637,8 +737,10 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
ret = ceph_x_decrypt(&au->session_key, &p, p + CEPHX_AU_ENC_BUF_LEN);
if (ret < 0)
return ret;
- if (ret != sizeof(*reply))
- return -EPERM;
+ if (ret < sizeof(*reply)) {
+ pr_err("bad size %d for ceph_x_authorize_reply\n", ret);
+ return -EINVAL;
+ }
if (au->nonce + 1 != le64_to_cpu(reply->nonce_plus_one))
ret = -EPERM;
@@ -704,26 +806,64 @@ static int calc_signature(struct ceph_x_authorizer *au, struct ceph_msg *msg,
__le64 *psig)
{
void *enc_buf = au->enc_buf;
- struct {
- __le32 len;
- __le32 header_crc;
- __le32 front_crc;
- __le32 middle_crc;
- __le32 data_crc;
- } __packed *sigblock = enc_buf + ceph_x_encrypt_offset();
int ret;
- sigblock->len = cpu_to_le32(4*sizeof(u32));
- sigblock->header_crc = msg->hdr.crc;
- sigblock->front_crc = msg->footer.front_crc;
- sigblock->middle_crc = msg->footer.middle_crc;
- sigblock->data_crc = msg->footer.data_crc;
- ret = ceph_x_encrypt(&au->session_key, enc_buf, CEPHX_AU_ENC_BUF_LEN,
- sizeof(*sigblock));
- if (ret < 0)
- return ret;
+ if (!CEPH_HAVE_FEATURE(msg->con->peer_features, CEPHX_V2)) {
+ struct {
+ __le32 len;
+ __le32 header_crc;
+ __le32 front_crc;
+ __le32 middle_crc;
+ __le32 data_crc;
+ } __packed *sigblock = enc_buf + ceph_x_encrypt_offset();
+
+ sigblock->len = cpu_to_le32(4*sizeof(u32));
+ sigblock->header_crc = msg->hdr.crc;
+ sigblock->front_crc = msg->footer.front_crc;
+ sigblock->middle_crc = msg->footer.middle_crc;
+ sigblock->data_crc = msg->footer.data_crc;
+
+ ret = ceph_x_encrypt(&au->session_key, enc_buf,
+ CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock));
+ if (ret < 0)
+ return ret;
+
+ *psig = *(__le64 *)(enc_buf + sizeof(u32));
+ } else {
+ struct {
+ __le32 header_crc;
+ __le32 front_crc;
+ __le32 front_len;
+ __le32 middle_crc;
+ __le32 middle_len;
+ __le32 data_crc;
+ __le32 data_len;
+ __le32 seq_lower_word;
+ } __packed *sigblock = enc_buf;
+ struct {
+ __le64 a, b, c, d;
+ } __packed *penc = enc_buf;
+ int ciphertext_len;
+
+ sigblock->header_crc = msg->hdr.crc;
+ sigblock->front_crc = msg->footer.front_crc;
+ sigblock->front_len = msg->hdr.front_len;
+ sigblock->middle_crc = msg->footer.middle_crc;
+ sigblock->middle_len = msg->hdr.middle_len;
+ sigblock->data_crc = msg->footer.data_crc;
+ sigblock->data_len = msg->hdr.data_len;
+ sigblock->seq_lower_word = *(__le32 *)&msg->hdr.seq;
+
+ /* no leading len, no ceph_x_encrypt_header */
+ ret = ceph_crypt(&au->session_key, true, enc_buf,
+ CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock),
+ &ciphertext_len);
+ if (ret)
+ return ret;
+
+ *psig = penc->a ^ penc->b ^ penc->c ^ penc->d;
+ }
- *psig = *(__le64 *)(enc_buf + sizeof(u32));
return 0;
}
@@ -778,6 +918,7 @@ static const struct ceph_auth_client_ops ceph_x_ops = {
.handle_reply = ceph_x_handle_reply,
.create_authorizer = ceph_x_create_authorizer,
.update_authorizer = ceph_x_update_authorizer,
+ .add_authorizer_challenge = ceph_x_add_authorizer_challenge,
.verify_authorizer_reply = ceph_x_verify_authorizer_reply,
.invalidate_authorizer = ceph_x_invalidate_authorizer,
.reset = ceph_x_reset,
@@ -823,5 +964,3 @@ out_nomem:
out:
return ret;
}
-
-
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
index 454cb54..c03735f 100644
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h
@@ -22,7 +22,7 @@ struct ceph_x_ticket_handler {
u64 secret_id;
struct ceph_buffer *ticket_blob;
- unsigned long renew_after, expires;
+ time64_t renew_after, expires;
};
#define CEPHX_AU_ENC_BUF_LEN 128 /* big enough for encrypted blob */
@@ -52,4 +52,3 @@ struct ceph_x_info {
int ceph_x_init(struct ceph_auth_client *ac);
#endif
-
diff --git a/net/ceph/auth_x_protocol.h b/net/ceph/auth_x_protocol.h
index 32c13d7..24b0b74 100644
--- a/net/ceph/auth_x_protocol.h
+++ b/net/ceph/auth_x_protocol.h
@@ -70,6 +70,13 @@ struct ceph_x_authorize_a {
struct ceph_x_authorize_b {
__u8 struct_v;
__le64 nonce;
+ __u8 have_challenge;
+ __le64 server_challenge_plus_one;
+} __attribute__ ((packed));
+
+struct ceph_x_authorize_challenge {
+ __u8 struct_v;
+ __le64 server_challenge;
} __attribute__ ((packed));
struct ceph_x_authorize_reply {
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 584fdbe..87afb9e 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -304,7 +304,7 @@ static int get_secret(struct ceph_crypto_key *dst, const char *name) {
struct ceph_crypto_key *ckey;
ukey = request_key(&key_type_ceph, name, NULL);
- if (!ukey || IS_ERR(ukey)) {
+ if (IS_ERR(ukey)) {
/* request_key errors don't map nicely to mount(2)
errors; don't even try, but still printk */
key_err = PTR_ERR(ukey);
@@ -379,7 +379,7 @@ ceph_parse_options(char *options, const char *dev_name,
/* parse mount options */
while ((c = strsep(&options, ",")) != NULL) {
- int token, intval, ret;
+ int token, intval;
if (!*c)
continue;
err = -EINVAL;
@@ -394,11 +394,10 @@ ceph_parse_options(char *options, const char *dev_name,
continue;
}
if (token < Opt_last_int) {
- ret = match_int(&argstr[0], &intval);
- if (ret < 0) {
- pr_err("bad mount option arg (not int) "
- "at '%s'\n", c);
- continue;
+ err = match_int(&argstr[0], &intval);
+ if (err < 0) {
+ pr_err("bad option arg (not int) at '%s'\n", c);
+ goto out;
}
dout("got int token %d val %d\n", token, intval);
} else if (token > Opt_last_int && token < Opt_last_string) {
diff --git a/net/ceph/cls_lock_client.c b/net/ceph/cls_lock_client.c
index 8d2032b..2105a6e 100644
--- a/net/ceph/cls_lock_client.c
+++ b/net/ceph/cls_lock_client.c
@@ -32,7 +32,7 @@ int ceph_cls_lock(struct ceph_osd_client *osdc,
int desc_len = strlen(desc);
void *p, *end;
struct page *lock_op_page;
- struct timespec mtime;
+ struct timespec64 mtime;
int ret;
lock_op_buf_size = name_len + sizeof(__le32) +
@@ -63,7 +63,7 @@ int ceph_cls_lock(struct ceph_osd_client *osdc,
ceph_encode_string(&p, end, desc, desc_len);
/* only support infinite duration */
memset(&mtime, 0, sizeof(mtime));
- ceph_encode_timespec(p, &mtime);
+ ceph_encode_timespec64(p, &mtime);
p += sizeof(struct ceph_timespec);
ceph_encode_8(&p, flags);
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 417df67..3f323ed 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -514,7 +514,7 @@ static int crush_choose_firstn(const struct crush_map *map,
in, work->work[-1-in->id],
x, r,
(choose_args ?
- &choose_args[-1-in->id] : 0),
+ &choose_args[-1-in->id] : NULL),
outpos);
if (item >= map->max_devices) {
dprintk(" bad item %d\n", item);
@@ -725,7 +725,7 @@ static void crush_choose_indep(const struct crush_map *map,
in, work->work[-1-in->id],
x, r,
(choose_args ?
- &choose_args[-1-in->id] : 0),
+ &choose_args[-1-in->id] : NULL),
outpos);
if (item >= map->max_devices) {
dprintk(" bad item %d\n", item);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index c6413c3..0a18719 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1417,11 +1417,11 @@ static void prepare_write_keepalive(struct ceph_connection *con)
dout("prepare_write_keepalive %p\n", con);
con_out_kvec_reset(con);
if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) {
- struct timespec now;
+ struct timespec64 now;
- ktime_get_real_ts(&now);
+ ktime_get_real_ts64(&now);
con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2);
- ceph_encode_timespec(&con->out_temp_keepalive2, &now);
+ ceph_encode_timespec64(&con->out_temp_keepalive2, &now);
con_out_kvec_add(con, sizeof(con->out_temp_keepalive2),
&con->out_temp_keepalive2);
} else {
@@ -1434,24 +1434,26 @@ static void prepare_write_keepalive(struct ceph_connection *con)
* Connection negotiation.
*/
-static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection *con,
- int *auth_proto)
+static int get_connect_authorizer(struct ceph_connection *con)
{
struct ceph_auth_handshake *auth;
+ int auth_proto;
if (!con->ops->get_authorizer) {
+ con->auth = NULL;
con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN;
con->out_connect.authorizer_len = 0;
- return NULL;
+ return 0;
}
- auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry);
+ auth = con->ops->get_authorizer(con, &auth_proto, con->auth_retry);
if (IS_ERR(auth))
- return auth;
+ return PTR_ERR(auth);
- con->auth_reply_buf = auth->authorizer_reply_buf;
- con->auth_reply_buf_len = auth->authorizer_reply_buf_len;
- return auth;
+ con->auth = auth;
+ con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
+ con->out_connect.authorizer_len = cpu_to_le32(auth->authorizer_buf_len);
+ return 0;
}
/*
@@ -1467,12 +1469,22 @@ static void prepare_write_banner(struct ceph_connection *con)
con_flag_set(con, CON_FLAG_WRITE_PENDING);
}
+static void __prepare_write_connect(struct ceph_connection *con)
+{
+ con_out_kvec_add(con, sizeof(con->out_connect), &con->out_connect);
+ if (con->auth)
+ con_out_kvec_add(con, con->auth->authorizer_buf_len,
+ con->auth->authorizer_buf);
+
+ con->out_more = 0;
+ con_flag_set(con, CON_FLAG_WRITE_PENDING);
+}
+
static int prepare_write_connect(struct ceph_connection *con)
{
unsigned int global_seq = get_global_seq(con->msgr, 0);
int proto;
- int auth_proto;
- struct ceph_auth_handshake *auth;
+ int ret;
switch (con->peer_name.type) {
case CEPH_ENTITY_TYPE_MON:
@@ -1499,24 +1511,11 @@ static int prepare_write_connect(struct ceph_connection *con)
con->out_connect.protocol_version = cpu_to_le32(proto);
con->out_connect.flags = 0;
- auth_proto = CEPH_AUTH_UNKNOWN;
- auth = get_connect_authorizer(con, &auth_proto);
- if (IS_ERR(auth))
- return PTR_ERR(auth);
-
- con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
- con->out_connect.authorizer_len = auth ?
- cpu_to_le32(auth->authorizer_buf_len) : 0;
-
- con_out_kvec_add(con, sizeof (con->out_connect),
- &con->out_connect);
- if (auth && auth->authorizer_buf_len)
- con_out_kvec_add(con, auth->authorizer_buf_len,
- auth->authorizer_buf);
-
- con->out_more = 0;
- con_flag_set(con, CON_FLAG_WRITE_PENDING);
+ ret = get_connect_authorizer(con);
+ if (ret)
+ return ret;
+ __prepare_write_connect(con);
return 0;
}
@@ -1781,11 +1780,21 @@ static int read_partial_connect(struct ceph_connection *con)
if (ret <= 0)
goto out;
- size = le32_to_cpu(con->in_reply.authorizer_len);
- end += size;
- ret = read_partial(con, end, size, con->auth_reply_buf);
- if (ret <= 0)
- goto out;
+ if (con->auth) {
+ size = le32_to_cpu(con->in_reply.authorizer_len);
+ if (size > con->auth->authorizer_reply_buf_len) {
+ pr_err("authorizer reply too big: %d > %zu\n", size,
+ con->auth->authorizer_reply_buf_len);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ end += size;
+ ret = read_partial(con, end, size,
+ con->auth->authorizer_reply_buf);
+ if (ret <= 0)
+ goto out;
+ }
dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n",
con, (int)con->in_reply.tag,
@@ -1793,7 +1802,6 @@ static int read_partial_connect(struct ceph_connection *con)
le32_to_cpu(con->in_reply.global_seq));
out:
return ret;
-
}
/*
@@ -2076,12 +2084,27 @@ static int process_connect(struct ceph_connection *con)
dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
- if (con->auth_reply_buf) {
+ if (con->auth) {
/*
* Any connection that defines ->get_authorizer()
- * should also define ->verify_authorizer_reply().
+ * should also define ->add_authorizer_challenge() and
+ * ->verify_authorizer_reply().
+ *
* See get_connect_authorizer().
*/
+ if (con->in_reply.tag == CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) {
+ ret = con->ops->add_authorizer_challenge(
+ con, con->auth->authorizer_reply_buf,
+ le32_to_cpu(con->in_reply.authorizer_len));
+ if (ret < 0)
+ return ret;
+
+ con_out_kvec_reset(con);
+ __prepare_write_connect(con);
+ prepare_read_connect(con);
+ return 0;
+ }
+
ret = con->ops->verify_authorizer_reply(con);
if (ret < 0) {
con->error_msg = "bad authorize reply";
@@ -2555,7 +2578,7 @@ static int read_keepalive_ack(struct ceph_connection *con)
int ret = read_partial(con, size, size, &ceph_ts);
if (ret <= 0)
return ret;
- ceph_decode_timespec(&con->last_keepalive_ack, &ceph_ts);
+ ceph_decode_timespec64(&con->last_keepalive_ack, &ceph_ts);
prepare_read_tag(con);
return 1;
}
@@ -3223,12 +3246,12 @@ bool ceph_con_keepalive_expired(struct ceph_connection *con,
{
if (interval > 0 &&
(con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2)) {
- struct timespec now;
- struct timespec ts;
- ktime_get_real_ts(&now);
- jiffies_to_timespec(interval, &ts);
- ts = timespec_add(con->last_keepalive_ack, ts);
- return timespec_compare(&now, &ts) >= 0;
+ struct timespec64 now;
+ struct timespec64 ts;
+ ktime_get_real_ts64(&now);
+ jiffies_to_timespec64(interval, &ts);
+ ts = timespec64_add(con->last_keepalive_ack, ts);
+ return timespec64_compare(&now, &ts) >= 0;
}
return false;
}
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index d7a7a23..18deb3d 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1249,7 +1249,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
if (monc->client->extra_mon_dispatch &&
monc->client->extra_mon_dispatch(monc->client, msg) == 0)
break;
-
+
pr_err("received unknown message type %d %s\n", type,
ceph_msg_type_name(type));
}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index a00c74f..60934bd 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1978,7 +1978,7 @@ static void encode_request_partial(struct ceph_osd_request *req,
p += sizeof(struct ceph_blkin_trace_info);
ceph_encode_32(&p, 0); /* client_inc, always 0 */
- ceph_encode_timespec(p, &req->r_mtime);
+ ceph_encode_timespec64(p, &req->r_mtime);
p += sizeof(struct ceph_timespec);
encode_oloc(&p, end, &req->r_t.target_oloc);
@@ -4512,7 +4512,7 @@ ceph_osdc_watch(struct ceph_osd_client *osdc,
ceph_oid_copy(&lreq->t.base_oid, oid);
ceph_oloc_copy(&lreq->t.base_oloc, oloc);
lreq->t.flags = CEPH_OSD_FLAG_WRITE;
- ktime_get_real_ts(&lreq->mtime);
+ ktime_get_real_ts64(&lreq->mtime);
lreq->reg_req = alloc_linger_request(lreq);
if (!lreq->reg_req) {
@@ -4570,7 +4570,7 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid);
ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
req->r_flags = CEPH_OSD_FLAG_WRITE;
- ktime_get_real_ts(&req->r_mtime);
+ ktime_get_real_ts64(&req->r_mtime);
osd_req_op_watch_init(req, 0, lreq->linger_id,
CEPH_OSD_WATCH_OP_UNWATCH);
@@ -4591,7 +4591,7 @@ EXPORT_SYMBOL(ceph_osdc_unwatch);
static int osd_req_op_notify_ack_init(struct ceph_osd_request *req, int which,
u64 notify_id, u64 cookie, void *payload,
- size_t payload_len)
+ u32 payload_len)
{
struct ceph_osd_req_op *op;
struct ceph_pagelist *pl;
@@ -4628,7 +4628,7 @@ int ceph_osdc_notify_ack(struct ceph_osd_client *osdc,
u64 notify_id,
u64 cookie,
void *payload,
- size_t payload_len)
+ u32 payload_len)
{
struct ceph_osd_request *req;
int ret;
@@ -4661,7 +4661,7 @@ EXPORT_SYMBOL(ceph_osdc_notify_ack);
static int osd_req_op_notify_init(struct ceph_osd_request *req, int which,
u64 cookie, u32 prot_ver, u32 timeout,
- void *payload, size_t payload_len)
+ void *payload, u32 payload_len)
{
struct ceph_osd_req_op *op;
struct ceph_pagelist *pl;
@@ -4701,7 +4701,7 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
struct ceph_object_id *oid,
struct ceph_object_locator *oloc,
void *payload,
- size_t payload_len,
+ u32 payload_len,
u32 timeout,
struct page ***preply_pages,
size_t *preply_len)
@@ -5136,7 +5136,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
struct ceph_snap_context *snapc,
u64 off, u64 len,
u32 truncate_seq, u64 truncate_size,
- struct timespec *mtime,
+ struct timespec64 *mtime,
struct page **pages, int num_pages)
{
struct ceph_osd_request *req;
@@ -5393,6 +5393,16 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
return auth;
}
+static int add_authorizer_challenge(struct ceph_connection *con,
+ void *challenge_buf, int challenge_buf_len)
+{
+ struct ceph_osd *o = con->private;
+ struct ceph_osd_client *osdc = o->o_osdc;
+ struct ceph_auth_client *ac = osdc->client->monc.auth;
+
+ return ceph_auth_add_authorizer_challenge(ac, o->o_auth.authorizer,
+ challenge_buf, challenge_buf_len);
+}
static int verify_authorizer_reply(struct ceph_connection *con)
{
@@ -5442,6 +5452,7 @@ static const struct ceph_connection_operations osd_con_ops = {
.put = put_osd_con,
.dispatch = dispatch,
.get_authorizer = get_authorizer,
+ .add_authorizer_challenge = add_authorizer_challenge,
.verify_authorizer_reply = verify_authorizer_reply,
.invalidate_authorizer = invalidate_authorizer,
.alloc_msg = alloc_msg,
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index e560d39..d3736f5 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -197,4 +197,3 @@ void ceph_zero_page_vector_range(int off, int len, struct page **pages)
}
}
EXPORT_SYMBOL(ceph_zero_page_vector_range);
-
OpenPOWER on IntegriCloud