From 28c0254ede13ab575d2df5c6585ed3d4817c3e6b Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 28 May 2012 14:44:30 +0800 Subject: ceph: check PG_Private flag before accessing page->private I got lots of NULL pointer dereference Oops when compiling kernel on ceph. The bug is because the kernel page migration routine replaces some pages in the page cache with new pages, these new pages' private can be non-zero. Signed-off-by: Zheng Yan Signed-off-by: Sage Weil --- fs/ceph/addr.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 173b1d2..8b67304 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -54,7 +54,12 @@ (CONGESTION_ON_THRESH(congestion_kb) - \ (CONGESTION_ON_THRESH(congestion_kb) >> 2)) - +static inline struct ceph_snap_context *page_snap_context(struct page *page) +{ + if (PagePrivate(page)) + return (void *)page->private; + return NULL; +} /* * Dirty a page. Optimistically adjust accounting, on the assumption @@ -142,10 +147,9 @@ static void ceph_invalidatepage(struct page *page, unsigned long offset) { struct inode *inode; struct ceph_inode_info *ci; - struct ceph_snap_context *snapc = (void *)page->private; + struct ceph_snap_context *snapc = page_snap_context(page); BUG_ON(!PageLocked(page)); - BUG_ON(!page->private); BUG_ON(!PagePrivate(page)); BUG_ON(!page->mapping); @@ -182,7 +186,6 @@ static int ceph_releasepage(struct page *page, gfp_t g) struct inode *inode = page->mapping ? page->mapping->host : NULL; dout("%p releasepage %p idx %lu\n", inode, page, page->index); WARN_ON(PageDirty(page)); - WARN_ON(page->private); WARN_ON(PagePrivate(page)); return 0; } @@ -443,7 +446,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) osdc = &fsc->client->osdc; /* verify this is a writeable snap context */ - snapc = (void *)page->private; + snapc = page_snap_context(page); if (snapc == NULL) { dout("writepage %p page %p not dirty?\n", inode, page); goto out; @@ -451,7 +454,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) oldest = get_oldest_context(inode, &snap_size); if (snapc->seq > oldest->seq) { dout("writepage %p page %p snapc %p not writeable - noop\n", - inode, page, (void *)page->private); + inode, page, snapc); /* we should only noop if called by kswapd */ WARN_ON((current->flags & PF_MEMALLOC) == 0); ceph_put_snap_context(oldest); @@ -591,7 +594,7 @@ static void writepages_finish(struct ceph_osd_request *req, clear_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC); - ceph_put_snap_context((void *)page->private); + ceph_put_snap_context(page_snap_context(page)); page->private = 0; ClearPagePrivate(page); dout("unlocking %d %p\n", i, page); @@ -795,7 +798,7 @@ get_more_pages: } /* only if matching snap context */ - pgsnapc = (void *)page->private; + pgsnapc = page_snap_context(page); if (pgsnapc->seq > snapc->seq) { dout("page snapc %p %lld > oldest %p %lld\n", pgsnapc, pgsnapc->seq, snapc, snapc->seq); @@ -984,7 +987,7 @@ retry_locked: BUG_ON(!ci->i_snap_realm); down_read(&mdsc->snap_rwsem); BUG_ON(!ci->i_snap_realm->cached_context); - snapc = (void *)page->private; + snapc = page_snap_context(page); if (snapc && snapc != ci->i_head_snapc) { /* * this page is already dirty in another (older) snap -- cgit v1.1 From 15d9882c336db2db73ccf9871ae2398e452f694c Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Sat, 26 May 2012 23:26:43 -0500 Subject: libceph: embed ceph messenger structure in ceph_client A ceph client has a pointer to a ceph messenger structure in it. There is always exactly one ceph messenger for a ceph client, so there is no need to allocate it separate from the ceph client structure. Switch the ceph_client structure to embed its ceph_messenger structure. Signed-off-by: Alex Elder Reviewed-by: Yehuda Sadeh Reviewed-by: Sage Weil --- fs/ceph/mds_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 200bc87..ad30261 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -394,7 +394,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, s->s_seq = 0; mutex_init(&s->s_mutex); - ceph_con_init(mdsc->fsc->client->msgr, &s->s_con); + ceph_con_init(&mdsc->fsc->client->msgr, &s->s_con); s->s_con.private = s; s->s_con.ops = &mds_con_ops; s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS; -- cgit v1.1 From 1bfd89f4e6e1adc6a782d94aa5d4c53be1e404d7 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Sat, 26 May 2012 23:26:43 -0500 Subject: libceph: fully initialize connection in con_init() Move the initialization of a ceph connection's private pointer, operations vector pointer, and peer name information into ceph_con_init(). Rearrange the arguments so the connection pointer is first. Hide the byte-swapping of the peer entity number inside ceph_con_init() Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- fs/ceph/mds_client.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index ad30261..ecd7f15 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -394,11 +394,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, s->s_seq = 0; mutex_init(&s->s_mutex); - ceph_con_init(&mdsc->fsc->client->msgr, &s->s_con); - s->s_con.private = s; - s->s_con.ops = &mds_con_ops; - s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS; - s->s_con.peer_name.num = cpu_to_le64(mds); + ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr, + CEPH_ENTITY_TYPE_MDS, mds); spin_lock_init(&s->s_gen_ttl_lock); s->s_cap_gen = 0; -- cgit v1.1 From b7a9e5dd40f17a48a72f249b8bbc989b63bae5fd Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 27 Jun 2012 12:24:08 -0700 Subject: libceph: set peer name on con_open, not init The peer name may change on each open attempt, even when the connection is reused. Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index ecd7f15..5ac6434 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -394,8 +394,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, s->s_seq = 0; mutex_init(&s->s_mutex); - ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr, - CEPH_ENTITY_TYPE_MDS, mds); + ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr); spin_lock_init(&s->s_gen_ttl_lock); s->s_cap_gen = 0; @@ -437,7 +436,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, mdsc->sessions[mds] = s; atomic_inc(&s->s_ref); /* one ref to sessions[], one to caller */ - ceph_con_open(&s->s_con, ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); + ceph_con_open(&s->s_con, CEPH_ENTITY_TYPE_MDS, mds, + ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); return s; @@ -2529,6 +2529,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, session->s_seq = 0; ceph_con_open(&session->s_con, + CEPH_ENTITY_TYPE_MDS, mds, ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); /* replay unsafe requests */ -- cgit v1.1 From 8842b3be96c376f174ae0d4f282d14728ad5febf Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 7 Jun 2012 13:43:35 -0700 Subject: ceph: clean up useless d_parent checks d_parent is never NULL, and IS_ROOT() is the proper way to check for a (non-self-referential) parent. Reported-by: Al Viro Signed-off-by: Sage Weil --- fs/ceph/dir.c | 7 +++---- fs/ceph/mds_client.c | 11 ----------- 2 files changed, 3 insertions(+), 15 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 3e8094b..6a66bd2 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -51,8 +51,7 @@ int ceph_init_dentry(struct dentry *dentry) goto out_unlock; } - if (dentry->d_parent == NULL || /* nfs fh_to_dentry */ - ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) + if (ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) d_set_d_op(dentry, &ceph_dentry_ops); else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR) d_set_d_op(dentry, &ceph_snapdir_dentry_ops); @@ -79,7 +78,7 @@ struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry) return NULL; spin_lock(&dentry->d_lock); - if (dentry->d_parent) { + if (!IS_ROOT(dentry)) { inode = dentry->d_parent->d_inode; ihold(inode); } @@ -1140,7 +1139,7 @@ static void ceph_d_prune(struct dentry *dentry) dout("ceph_d_prune %p\n", dentry); /* do we have a valid parent? */ - if (!dentry->d_parent || IS_ROOT(dentry)) + if (IS_ROOT(dentry)) return; /* if we are not hashed, we don't affect D_COMPLETE */ diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 5ac6434..418f6a8 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1469,11 +1469,6 @@ retry: else len += 1 + temp->d_name.len; temp = temp->d_parent; - if (temp == NULL) { - rcu_read_unlock(); - pr_err("build_path corrupt dentry %p\n", dentry); - return ERR_PTR(-EINVAL); - } } rcu_read_unlock(); if (len) @@ -1510,12 +1505,6 @@ retry: if (pos) path[--pos] = '/'; temp = temp->d_parent; - if (temp == NULL) { - rcu_read_unlock(); - pr_err("build_path corrupt dentry\n"); - kfree(path); - return ERR_PTR(-EINVAL); - } } rcu_read_unlock(); if (pos != 0 || read_seqretry(&rename_lock, seq)) { -- cgit v1.1 From 1fe60e51a3744528f3939b1b1167ca909133d9ae Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 30 Jul 2012 16:23:22 -0700 Subject: libceph: move feature bits to separate header This is simply cleanup that will keep things more closely synced with the userland code. Signed-off-by: Sage Weil Reviewed-by: Alex Elder Reviewed-by: Yehuda Sadeh --- fs/ceph/mds_client.c | 1 + fs/ceph/super.c | 1 + 2 files changed, 2 insertions(+) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 418f6a8..39b76d6 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -10,6 +10,7 @@ #include "super.h" #include "mds_client.h" +#include #include #include #include diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 1e67dd7..2c47ecf 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -18,6 +18,7 @@ #include "super.h" #include "mds_client.h" +#include #include #include #include -- cgit v1.1 From a53aab645c82f0146e35684b34692c69b5118121 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 30 Jul 2012 16:21:17 -0700 Subject: ceph: close old con before reopening on mds reconnect When we detect a mds session reset, close the old ceph_connection before reopening it. This ensures we clean up the old socket properly and keep the ceph_connection state correct. Signed-off-by: Sage Weil Reviewed-by: Alex Elder Reviewed-by: Yehuda Sadeh --- fs/ceph/mds_client.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 39b76d6..a5a7354 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2518,6 +2518,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, session->s_state = CEPH_MDS_SESSION_RECONNECTING; session->s_seq = 0; + ceph_con_close(&session->s_con); ceph_con_open(&session->s_con, CEPH_ENTITY_TYPE_MDS, mds, ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); -- cgit v1.1 From 21ec6ffa46719a4ed45531b5b01014c26f0416c4 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 20 Jul 2012 08:18:36 -0500 Subject: ceph: fix potential double free We re-run the loop but we don't re-set the attrs pointer back to NULL. Signed-off-by: Alan Cox Reviewed-by: Alex Elder --- fs/ceph/xattr.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ceph') diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 785cb30..2c2ae5b 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -457,6 +457,7 @@ start: for (i = 0; i < numattr; i++) kfree(xattrs[i]); kfree(xattrs); + xattrs = NULL; goto start; } err = -EIO; -- cgit v1.1 From aa711ee3402ad10ffd5b70ce0417fadc9a95cccf Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 13 Jul 2012 20:35:11 -0500 Subject: ceph: define snap counts as u32 everywhere There are two structures in which a count of snapshots are maintained: struct ceph_snap_context { ... u32 num_snaps; ... } and struct ceph_snap_realm { ... u32 num_prior_parent_snaps; /* had prior to parent_since */ ... u32 num_snaps; ... } These fields never take on negative values (e.g., to hold special meaning), and so are really inherently unsigned. Furthermore they take their value from over-the-wire or on-disk formatted 32-bit values. So change their definition to have type u32, and change some spots elsewhere in the code to account for this change. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- fs/ceph/snap.c | 18 ++++++++++-------- fs/ceph/super.h | 4 ++-- 2 files changed, 12 insertions(+), 10 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index e5206fc..cbb2f54 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -296,8 +296,7 @@ static int build_snap_context(struct ceph_snap_realm *realm) struct ceph_snap_realm *parent = realm->parent; struct ceph_snap_context *snapc; int err = 0; - int i; - int num = realm->num_prior_parent_snaps + realm->num_snaps; + u32 num = realm->num_prior_parent_snaps + realm->num_snaps; /* * build parent context, if it hasn't been built. @@ -321,11 +320,11 @@ static int build_snap_context(struct ceph_snap_realm *realm) realm->cached_context->seq == realm->seq && (!parent || realm->cached_context->seq >= parent->cached_context->seq)) { - dout("build_snap_context %llx %p: %p seq %lld (%d snaps)" + dout("build_snap_context %llx %p: %p seq %lld (%u snaps)" " (unchanged)\n", realm->ino, realm, realm->cached_context, realm->cached_context->seq, - realm->cached_context->num_snaps); + (unsigned int) realm->cached_context->num_snaps); return 0; } @@ -342,6 +341,8 @@ static int build_snap_context(struct ceph_snap_realm *realm) num = 0; snapc->seq = realm->seq; if (parent) { + u32 i; + /* include any of parent's snaps occurring _after_ my parent became my parent */ for (i = 0; i < parent->cached_context->num_snaps; i++) @@ -361,8 +362,9 @@ static int build_snap_context(struct ceph_snap_realm *realm) sort(snapc->snaps, num, sizeof(u64), cmpu64_rev, NULL); snapc->num_snaps = num; - dout("build_snap_context %llx %p: %p seq %lld (%d snaps)\n", - realm->ino, realm, snapc, snapc->seq, snapc->num_snaps); + dout("build_snap_context %llx %p: %p seq %lld (%u snaps)\n", + realm->ino, realm, snapc, snapc->seq, + (unsigned int) snapc->num_snaps); if (realm->cached_context) ceph_put_snap_context(realm->cached_context); @@ -402,9 +404,9 @@ static void rebuild_snap_realms(struct ceph_snap_realm *realm) * helper to allocate and decode an array of snapids. free prior * instance, if any. */ -static int dup_array(u64 **dst, __le64 *src, int num) +static int dup_array(u64 **dst, __le64 *src, u32 num) { - int i; + u32 i; kfree(*dst); if (num) { diff --git a/fs/ceph/super.h b/fs/ceph/super.h index fc35036..3ea48b7 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -612,9 +612,9 @@ struct ceph_snap_realm { u64 parent_since; /* snapid when our current parent became so */ u64 *prior_parent_snaps; /* snaps inherited from any parents we */ - int num_prior_parent_snaps; /* had prior to parent_since */ + u32 num_prior_parent_snaps; /* had prior to parent_since */ u64 *snaps; /* snaps specific to this realm */ - int num_snaps; + u32 num_snaps; struct ceph_snap_realm *parent; struct list_head children; /* list of child realms */ -- cgit v1.1