diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-17 10:55:51 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-17 10:55:51 -0700 |
commit | 5b664cb235e97afbf34db9c4d77f08ebd725335e (patch) | |
tree | 518540649c38342209790de8e0b575ac1a6fa722 /fs | |
parent | f39548a6ad1dbdfaab552419386ec5bb1d76fa0d (diff) | |
parent | c0420ad2ca514551ca086510b0e7d17a05c70492 (diff) | |
download | op-kernel-dev-5b664cb235e97afbf34db9c4d77f08ebd725335e.zip op-kernel-dev-5b664cb235e97afbf34db9c4d77f08ebd725335e.tar.gz |
Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2:
[PATCH] ocfs2: fix oops in mmap_truncate testing
configfs: call drop_link() to cleanup after create_link() failure
configfs: Allow ->make_item() and ->make_group() to return detailed errors.
configfs: Fix failing mkdir() making racing rmdir() fail
configfs: Fix deadlock with racing rmdir() and rename()
configfs: Make configfs_new_dirent() return error code instead of NULL
configfs: Protect configfs_dirent s_links list mutations
configfs: Introduce configfs_dirent_lock
ocfs2: Don't snprintf() without a format.
ocfs2: Fix CONFIG_OCFS2_DEBUG_FS #ifdefs
ocfs2/net: Silence build warnings on sparc64
ocfs2: Handle error during journal load
ocfs2: Silence an error message in ocfs2_file_aio_read()
ocfs2: use simple_read_from_buffer()
ocfs2: fix printk format warnings with OCFS2_FS_STATS=n
[PATCH 2/2] ocfs2: Instrument fs cluster locks
[PATCH 1/2] ocfs2: Add CONFIG_OCFS2_FS_STATS config option
Diffstat (limited to 'fs')
-rw-r--r-- | fs/Kconfig | 8 | ||||
-rw-r--r-- | fs/configfs/configfs_internal.h | 4 | ||||
-rw-r--r-- | fs/configfs/dir.c | 147 | ||||
-rw-r--r-- | fs/configfs/inode.c | 2 | ||||
-rw-r--r-- | fs/configfs/symlink.c | 16 | ||||
-rw-r--r-- | fs/dlm/config.c | 45 | ||||
-rw-r--r-- | fs/ocfs2/aops.c | 13 | ||||
-rw-r--r-- | fs/ocfs2/cluster/heartbeat.c | 17 | ||||
-rw-r--r-- | fs/ocfs2/cluster/netdebug.c | 8 | ||||
-rw-r--r-- | fs/ocfs2/cluster/nodemanager.c | 45 | ||||
-rw-r--r-- | fs/ocfs2/dlmglue.c | 122 | ||||
-rw-r--r-- | fs/ocfs2/file.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/journal.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/localalloc.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/ocfs2.h | 12 | ||||
-rw-r--r-- | fs/ocfs2/ocfs2_fs.h | 2 | ||||
-rw-r--r-- | fs/ocfs2/stack_user.c | 19 | ||||
-rw-r--r-- | fs/ocfs2/super.c | 6 |
18 files changed, 361 insertions, 111 deletions
@@ -470,6 +470,14 @@ config OCFS2_FS_USERSPACE_CLUSTER It is safe to say Y, as the clustering method is run-time selectable. +config OCFS2_FS_STATS + bool "OCFS2 statistics" + depends on OCFS2_FS + default y + help + This option allows some fs statistics to be captured. Enabling + this option may increase the memory consumption. + config OCFS2_DEBUG_MASKLOG bool "OCFS2 logging support" depends on OCFS2_FS diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index cca9860..da015c1 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h @@ -26,6 +26,7 @@ #include <linux/slab.h> #include <linux/list.h> +#include <linux/spinlock.h> struct configfs_dirent { atomic_t s_count; @@ -47,8 +48,11 @@ struct configfs_dirent { #define CONFIGFS_USET_DIR 0x0040 #define CONFIGFS_USET_DEFAULT 0x0080 #define CONFIGFS_USET_DROPPING 0x0100 +#define CONFIGFS_USET_IN_MKDIR 0x0200 #define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR) +extern spinlock_t configfs_dirent_lock; + extern struct vfsmount * configfs_mount; extern struct kmem_cache *configfs_dir_cachep; diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index a48dc7d..0e64312 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -30,11 +30,25 @@ #include <linux/mount.h> #include <linux/module.h> #include <linux/slab.h> +#include <linux/err.h> #include <linux/configfs.h> #include "configfs_internal.h" DECLARE_RWSEM(configfs_rename_sem); +/* + * Protects mutations of configfs_dirent linkage together with proper i_mutex + * Also protects mutations of symlinks linkage to target configfs_dirent + * Mutators of configfs_dirent linkage must *both* have the proper inode locked + * and configfs_dirent_lock locked, in that order. + * This allows one to safely traverse configfs_dirent trees and symlinks without + * having to lock inodes. + * + * Protects setting of CONFIGFS_USET_DROPPING: checking the flag + * unlocked is not reliable unless in detach_groups() called from + * rmdir()/unregister() and from configfs_attach_group() + */ +DEFINE_SPINLOCK(configfs_dirent_lock); static void configfs_d_iput(struct dentry * dentry, struct inode * inode) @@ -74,13 +88,20 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare sd = kmem_cache_zalloc(configfs_dir_cachep, GFP_KERNEL); if (!sd) - return NULL; + return ERR_PTR(-ENOMEM); atomic_set(&sd->s_count, 1); INIT_LIST_HEAD(&sd->s_links); INIT_LIST_HEAD(&sd->s_children); - list_add(&sd->s_sibling, &parent_sd->s_children); sd->s_element = element; + spin_lock(&configfs_dirent_lock); + if (parent_sd->s_type & CONFIGFS_USET_DROPPING) { + spin_unlock(&configfs_dirent_lock); + kmem_cache_free(configfs_dir_cachep, sd); + return ERR_PTR(-ENOENT); + } + list_add(&sd->s_sibling, &parent_sd->s_children); + spin_unlock(&configfs_dirent_lock); return sd; } @@ -118,8 +139,8 @@ int configfs_make_dirent(struct configfs_dirent * parent_sd, struct configfs_dirent * sd; sd = configfs_new_dirent(parent_sd, element); - if (!sd) - return -ENOMEM; + if (IS_ERR(sd)) + return PTR_ERR(sd); sd->s_mode = mode; sd->s_type = type; @@ -173,7 +194,9 @@ static int create_dir(struct config_item * k, struct dentry * p, } else { struct configfs_dirent *sd = d->d_fsdata; if (sd) { + spin_lock(&configfs_dirent_lock); list_del_init(&sd->s_sibling); + spin_unlock(&configfs_dirent_lock); configfs_put(sd); } } @@ -224,7 +247,9 @@ int configfs_create_link(struct configfs_symlink *sl, else { struct configfs_dirent *sd = dentry->d_fsdata; if (sd) { + spin_lock(&configfs_dirent_lock); list_del_init(&sd->s_sibling); + spin_unlock(&configfs_dirent_lock); configfs_put(sd); } } @@ -238,7 +263,9 @@ static void remove_dir(struct dentry * d) struct configfs_dirent * sd; sd = d->d_fsdata; + spin_lock(&configfs_dirent_lock); list_del_init(&sd->s_sibling); + spin_unlock(&configfs_dirent_lock); configfs_put(sd); if (d->d_inode) simple_rmdir(parent->d_inode,d); @@ -331,13 +358,13 @@ static struct dentry * configfs_lookup(struct inode *dir, /* * Only subdirectories count here. Files (CONFIGFS_NOT_PINNED) are - * attributes and are removed by rmdir(). We recurse, taking i_mutex - * on all children that are candidates for default detach. If the - * result is clean, then configfs_detach_group() will handle dropping - * i_mutex. If there is an error, the caller will clean up the i_mutex - * holders via configfs_detach_rollback(). + * attributes and are removed by rmdir(). We recurse, setting + * CONFIGFS_USET_DROPPING on all children that are candidates for + * default detach. + * If there is an error, the caller will reset the flags via + * configfs_detach_rollback(). */ -static int configfs_detach_prep(struct dentry *dentry) +static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex) { struct configfs_dirent *parent_sd = dentry->d_fsdata; struct configfs_dirent *sd; @@ -352,15 +379,20 @@ static int configfs_detach_prep(struct dentry *dentry) if (sd->s_type & CONFIGFS_NOT_PINNED) continue; if (sd->s_type & CONFIGFS_USET_DEFAULT) { - mutex_lock(&sd->s_dentry->d_inode->i_mutex); - /* Mark that we've taken i_mutex */ + /* Abort if racing with mkdir() */ + if (sd->s_type & CONFIGFS_USET_IN_MKDIR) { + if (wait_mutex) + *wait_mutex = &sd->s_dentry->d_inode->i_mutex; + return -EAGAIN; + } + /* Mark that we're trying to drop the group */ sd->s_type |= CONFIGFS_USET_DROPPING; /* * Yup, recursive. If there's a problem, blame * deep nesting of default_groups */ - ret = configfs_detach_prep(sd->s_dentry); + ret = configfs_detach_prep(sd->s_dentry, wait_mutex); if (!ret) continue; } else @@ -374,7 +406,7 @@ out: } /* - * Walk the tree, dropping i_mutex wherever CONFIGFS_USET_DROPPING is + * Walk the tree, resetting CONFIGFS_USET_DROPPING wherever it was * set. */ static void configfs_detach_rollback(struct dentry *dentry) @@ -385,11 +417,7 @@ static void configfs_detach_rollback(struct dentry *dentry) list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { if (sd->s_type & CONFIGFS_USET_DEFAULT) { configfs_detach_rollback(sd->s_dentry); - - if (sd->s_type & CONFIGFS_USET_DROPPING) { - sd->s_type &= ~CONFIGFS_USET_DROPPING; - mutex_unlock(&sd->s_dentry->d_inode->i_mutex); - } + sd->s_type &= ~CONFIGFS_USET_DROPPING; } } } @@ -410,7 +438,9 @@ static void detach_attrs(struct config_item * item) list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) { if (!sd->s_element || !(sd->s_type & CONFIGFS_NOT_PINNED)) continue; + spin_lock(&configfs_dirent_lock); list_del_init(&sd->s_sibling); + spin_unlock(&configfs_dirent_lock); configfs_drop_dentry(sd, dentry); configfs_put(sd); } @@ -466,16 +496,12 @@ static void detach_groups(struct config_group *group) child = sd->s_dentry; + mutex_lock(&child->d_inode->i_mutex); + configfs_detach_group(sd->s_element); child->d_inode->i_flags |= S_DEAD; - /* - * From rmdir/unregister, a configfs_detach_prep() pass - * has taken our i_mutex for us. Drop it. - * From mkdir/register cleanup, there is no sem held. - */ - if (sd->s_type & CONFIGFS_USET_DROPPING) - mutex_unlock(&child->d_inode->i_mutex); + mutex_unlock(&child->d_inode->i_mutex); d_delete(child); dput(child); @@ -1047,25 +1073,24 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) group = NULL; item = NULL; if (type->ct_group_ops->make_group) { - group = type->ct_group_ops->make_group(to_config_group(parent_item), name); - if (group) { + ret = type->ct_group_ops->make_group(to_config_group(parent_item), name, &group); + if (!ret) { link_group(to_config_group(parent_item), group); item = &group->cg_item; } } else { - item = type->ct_group_ops->make_item(to_config_group(parent_item), name); - if (item) + ret = type->ct_group_ops->make_item(to_config_group(parent_item), name, &item); + if (!ret) link_obj(parent_item, item); } mutex_unlock(&subsys->su_mutex); kfree(name); - if (!item) { + if (ret) { /* - * If item == NULL, then link_obj() was never called. + * If ret != 0, then link_obj() was never called. * There are no extra references to clean up. */ - ret = -ENOMEM; goto out_put; } @@ -1093,11 +1118,26 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) */ module_got = 1; + /* + * Make racing rmdir() fail if it did not tag parent with + * CONFIGFS_USET_DROPPING + * Note: if CONFIGFS_USET_DROPPING is already set, attach_group() will + * fail and let rmdir() terminate correctly + */ + spin_lock(&configfs_dirent_lock); + /* This will make configfs_detach_prep() fail */ + sd->s_type |= CONFIGFS_USET_IN_MKDIR; + spin_unlock(&configfs_dirent_lock); + if (group) ret = configfs_attach_group(parent_item, item, dentry); else ret = configfs_attach_item(parent_item, item, dentry); + spin_lock(&configfs_dirent_lock); + sd->s_type &= ~CONFIGFS_USET_IN_MKDIR; + spin_unlock(&configfs_dirent_lock); + out_unlink: if (ret) { /* Tear down everything we built up */ @@ -1161,12 +1201,27 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) return -EINVAL; } - ret = configfs_detach_prep(dentry); - if (ret) { - configfs_detach_rollback(dentry); - config_item_put(parent_item); - return ret; - } + spin_lock(&configfs_dirent_lock); + do { + struct mutex *wait_mutex; + + ret = configfs_detach_prep(dentry, &wait_mutex); + if (ret) { + configfs_detach_rollback(dentry); + spin_unlock(&configfs_dirent_lock); + if (ret != -EAGAIN) { + config_item_put(parent_item); + return ret; + } + + /* Wait until the racing operation terminates */ + mutex_lock(wait_mutex); + mutex_unlock(wait_mutex); + + spin_lock(&configfs_dirent_lock); + } + } while (ret == -EAGAIN); + spin_unlock(&configfs_dirent_lock); /* Get a working ref for the duration of this function */ item = configfs_get_config_item(dentry); @@ -1258,7 +1313,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file) file->private_data = configfs_new_dirent(parent_sd, NULL); mutex_unlock(&dentry->d_inode->i_mutex); - return file->private_data ? 0 : -ENOMEM; + return IS_ERR(file->private_data) ? PTR_ERR(file->private_data) : 0; } @@ -1268,7 +1323,9 @@ static int configfs_dir_close(struct inode *inode, struct file *file) struct configfs_dirent * cursor = file->private_data; mutex_lock(&dentry->d_inode->i_mutex); + spin_lock(&configfs_dirent_lock); list_del_init(&cursor->s_sibling); + spin_unlock(&configfs_dirent_lock); mutex_unlock(&dentry->d_inode->i_mutex); release_configfs_dirent(cursor); @@ -1308,7 +1365,9 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir /* fallthrough */ default: if (filp->f_pos == 2) { + spin_lock(&configfs_dirent_lock); list_move(q, &parent_sd->s_children); + spin_unlock(&configfs_dirent_lock); } for (p=q->next; p!= &parent_sd->s_children; p=p->next) { struct configfs_dirent *next; @@ -1331,7 +1390,9 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir dt_type(next)) < 0) return 0; + spin_lock(&configfs_dirent_lock); list_move(q, p); + spin_unlock(&configfs_dirent_lock); p = q; filp->f_pos++; } @@ -1362,6 +1423,7 @@ static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin) struct list_head *p; loff_t n = file->f_pos - 2; + spin_lock(&configfs_dirent_lock); list_del(&cursor->s_sibling); p = sd->s_children.next; while (n && p != &sd->s_children) { @@ -1373,6 +1435,7 @@ static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin) p = p->next; } list_add_tail(&cursor->s_sibling, p); + spin_unlock(&configfs_dirent_lock); } } mutex_unlock(&dentry->d_inode->i_mutex); @@ -1448,9 +1511,11 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex, I_MUTEX_PARENT); mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); - if (configfs_detach_prep(dentry)) { + spin_lock(&configfs_dirent_lock); + if (configfs_detach_prep(dentry, NULL)) { printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n"); } + spin_unlock(&configfs_dirent_lock); configfs_detach_group(&group->cg_item); dentry->d_inode->i_flags |= S_DEAD; mutex_unlock(&dentry->d_inode->i_mutex); diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index b9a1d81..4803ccc 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -247,7 +247,9 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name) if (!sd->s_element) continue; if (!strcmp(configfs_get_name(sd), name)) { + spin_lock(&configfs_dirent_lock); list_del_init(&sd->s_sibling); + spin_unlock(&configfs_dirent_lock); configfs_drop_dentry(sd, dir); configfs_put(sd); break; diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index 2a731ef..0004d18 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -77,12 +77,15 @@ static int create_link(struct config_item *parent_item, sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL); if (sl) { sl->sl_target = config_item_get(item); - /* FIXME: needs a lock, I'd bet */ + spin_lock(&configfs_dirent_lock); list_add(&sl->sl_list, &target_sd->s_links); + spin_unlock(&configfs_dirent_lock); ret = configfs_create_link(sl, parent_item->ci_dentry, dentry); if (ret) { + spin_lock(&configfs_dirent_lock); list_del_init(&sl->sl_list); + spin_unlock(&configfs_dirent_lock); config_item_put(item); kfree(sl); } @@ -137,8 +140,12 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna goto out_put; ret = type->ct_item_ops->allow_link(parent_item, target_item); - if (!ret) + if (!ret) { ret = create_link(parent_item, target_item, dentry); + if (ret && type->ct_item_ops->drop_link) + type->ct_item_ops->drop_link(parent_item, + target_item); + } config_item_put(target_item); path_put(&nd.path); @@ -169,7 +176,9 @@ int configfs_unlink(struct inode *dir, struct dentry *dentry) parent_item = configfs_get_config_item(dentry->d_parent); type = parent_item->ci_type; + spin_lock(&configfs_dirent_lock); list_del_init(&sd->s_sibling); + spin_unlock(&configfs_dirent_lock); configfs_drop_dentry(sd, dentry->d_parent); dput(dentry); configfs_put(sd); @@ -184,8 +193,9 @@ int configfs_unlink(struct inode *dir, struct dentry *dentry) type->ct_item_ops->drop_link(parent_item, sl->sl_target); - /* FIXME: Needs lock */ + spin_lock(&configfs_dirent_lock); list_del_init(&sl->sl_list); + spin_unlock(&configfs_dirent_lock); /* Put reference from create_link() */ config_item_put(sl->sl_target); diff --git a/fs/dlm/config.c b/fs/dlm/config.c index eac23bd..492d8ca 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -41,16 +41,20 @@ struct comm; struct nodes; struct node; -static struct config_group *make_cluster(struct config_group *, const char *); +static int make_cluster(struct config_group *, const char *, + struct config_group **); static void drop_cluster(struct config_group *, struct config_item *); static void release_cluster(struct config_item *); -static struct config_group *make_space(struct config_group *, const char *); +static int make_space(struct config_group *, const char *, + struct config_group **); static void drop_space(struct config_group *, struct config_item *); static void release_space(struct config_item *); -static struct config_item *make_comm(struct config_group *, const char *); +static int make_comm(struct config_group *, const char *, + struct config_item **); static void drop_comm(struct config_group *, struct config_item *); static void release_comm(struct config_item *); -static struct config_item *make_node(struct config_group *, const char *); +static int make_node(struct config_group *, const char *, + struct config_item **); static void drop_node(struct config_group *, struct config_item *); static void release_node(struct config_item *); @@ -392,8 +396,8 @@ static struct node *to_node(struct config_item *i) return i ? container_of(i, struct node, item) : NULL; } -static struct config_group *make_cluster(struct config_group *g, - const char *name) +static int make_cluster(struct config_group *g, const char *name, + struct config_group **new_g) { struct cluster *cl = NULL; struct spaces *sps = NULL; @@ -431,14 +435,15 @@ static struct config_group *make_cluster(struct config_group *g, space_list = &sps->ss_group; comm_list = &cms->cs_group; - return &cl->group; + *new_g = &cl->group; + return 0; fail: kfree(cl); kfree(gps); kfree(sps); kfree(cms); - return NULL; + return -ENOMEM; } static void drop_cluster(struct config_group *g, struct config_item *i) @@ -466,7 +471,8 @@ static void release_cluster(struct config_item *i) kfree(cl); } -static struct config_group *make_space(struct config_group *g, const char *name) +static int make_space(struct config_group *g, const char *name, + struct config_group **new_g) { struct space *sp = NULL; struct nodes *nds = NULL; @@ -489,13 +495,14 @@ static struct config_group *make_space(struct config_group *g, const char *name) INIT_LIST_HEAD(&sp->members); mutex_init(&sp->members_lock); sp->members_count = 0; - return &sp->group; + *new_g = &sp->group; + return 0; fail: kfree(sp); kfree(gps); kfree(nds); - return NULL; + return -ENOMEM; } static void drop_space(struct config_group *g, struct config_item *i) @@ -522,19 +529,21 @@ static void release_space(struct config_item *i) kfree(sp); } -static struct config_item *make_comm(struct config_group *g, const char *name) +static int make_comm(struct config_group *g, const char *name, + struct config_item **new_i) { struct comm *cm; cm = kzalloc(sizeof(struct comm), GFP_KERNEL); if (!cm) - return NULL; + return -ENOMEM; config_item_init_type_name(&cm->item, name, &comm_type); cm->nodeid = -1; cm->local = 0; cm->addr_count = 0; - return &cm->item; + *new_i = &cm->item; + return 0; } static void drop_comm(struct config_group *g, struct config_item *i) @@ -554,14 +563,15 @@ static void release_comm(struct config_item *i) kfree(cm); } -static struct config_item *make_node(struct config_group *g, const char *name) +static int make_node(struct config_group *g, const char *name, + struct config_item **new_i) { struct space *sp = to_space(g->cg_item.ci_parent); struct node *nd; nd = kzalloc(sizeof(struct node), GFP_KERNEL); if (!nd) - return NULL; + return -ENOMEM; config_item_init_type_name(&nd->item, name, &node_type); nd->nodeid = -1; @@ -573,7 +583,8 @@ static struct config_item *make_node(struct config_group *g, const char *name) sp->members_count++; mutex_unlock(&sp->members_lock); - return &nd->item; + *new_i = &nd->item; + return 0; } static void drop_node(struct config_group *g, struct config_item *i) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 17964c0..1db0801 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -174,10 +174,17 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock, * need to use BH_New is when we're extending i_size on a file * system which doesn't support holes, in which case BH_New * allows block_prepare_write() to zero. + * + * If we see this on a sparse file system, then a truncate has + * raced us and removed the cluster. In this case, we clear + * the buffers dirty and uptodate bits and let the buffer code + * ignore it as a hole. */ - mlog_bug_on_msg(create && p_blkno == 0 && ocfs2_sparse_alloc(osb), - "ino %lu, iblock %llu\n", inode->i_ino, - (unsigned long long)iblock); + if (create && p_blkno == 0 && ocfs2_sparse_alloc(osb)) { + clear_buffer_dirty(bh_result); + clear_buffer_uptodate(bh_result); + goto bail; + } /* Treat the unwritten extent as a hole for zeroing purposes. */ if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index f02ccb34..443d108 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1489,25 +1489,28 @@ static struct o2hb_heartbeat_group *to_o2hb_heartbeat_group(struct config_group : NULL; } -static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *group, - const char *name) +static int o2hb_heartbeat_group_make_item(struct config_group *group, + const char *name, + struct config_item **new_item) { struct o2hb_region *reg = NULL; - struct config_item *ret = NULL; + int ret = 0; reg = kzalloc(sizeof(struct o2hb_region), GFP_KERNEL); - if (reg == NULL) - goto out; /* ENOMEM */ + if (reg == NULL) { + ret = -ENOMEM; + goto out; + } config_item_init_type_name(®->hr_item, name, &o2hb_region_type); - ret = ®->hr_item; + *new_item = ®->hr_item; spin_lock(&o2hb_live_lock); list_add_tail(®->hr_all_item, &o2hb_all_regions); spin_unlock(&o2hb_live_lock); out: - if (ret == NULL) + if (ret) kfree(reg); return ret; diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c index 7bf3c0e..d8bfa0e 100644 --- a/fs/ocfs2/cluster/netdebug.c +++ b/fs/ocfs2/cluster/netdebug.c @@ -146,8 +146,10 @@ static int nst_seq_show(struct seq_file *seq, void *v) nst->st_task->comm, nst->st_node, nst->st_sc, nst->st_id, nst->st_msg_type, nst->st_msg_key, - nst->st_sock_time.tv_sec, nst->st_sock_time.tv_usec, - nst->st_send_time.tv_sec, nst->st_send_time.tv_usec, + nst->st_sock_time.tv_sec, + (unsigned long)nst->st_sock_time.tv_usec, + nst->st_send_time.tv_sec, + (unsigned long)nst->st_send_time.tv_usec, nst->st_status_time.tv_sec, nst->st_status_time.tv_usec); } @@ -274,7 +276,7 @@ static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos) return sc; /* unused, just needs to be null when done */ } -#define TV_SEC_USEC(TV) TV.tv_sec, TV.tv_usec +#define TV_SEC_USEC(TV) TV.tv_sec, (unsigned long)TV.tv_usec static int sc_seq_show(struct seq_file *seq, void *v) { diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index cfdb08b..b364b70 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c @@ -644,27 +644,32 @@ out: return ret; } -static struct config_item *o2nm_node_group_make_item(struct config_group *group, - const char *name) +static int o2nm_node_group_make_item(struct config_group *group, + const char *name, + struct config_item **new_item) { struct o2nm_node *node = NULL; - struct config_item *ret = NULL; + int ret = 0; - if (strlen(name) > O2NM_MAX_NAME_LEN) - goto out; /* ENAMETOOLONG */ + if (strlen(name) > O2NM_MAX_NAME_LEN) { + ret = -ENAMETOOLONG; + goto out; + } node = kzalloc(sizeof(struct o2nm_node), GFP_KERNEL); - if (node == NULL) - goto out; /* ENOMEM */ + if (node == NULL) { + ret = -ENOMEM; + goto out; + } strcpy(node->nd_name, name); /* use item.ci_namebuf instead? */ config_item_init_type_name(&node->nd_item, name, &o2nm_node_type); spin_lock_init(&node->nd_lock); - ret = &node->nd_item; + *new_item = &node->nd_item; out: - if (ret == NULL) + if (ret) kfree(node); return ret; @@ -751,25 +756,31 @@ static struct o2nm_cluster_group *to_o2nm_cluster_group(struct config_group *gro } #endif -static struct config_group *o2nm_cluster_group_make_group(struct config_group *group, - const char *name) +static int o2nm_cluster_group_make_group(struct config_group *group, + const char *name, + struct config_group **new_group) { struct o2nm_cluster *cluster = NULL; struct o2nm_node_group *ns = NULL; - struct config_group *o2hb_group = NULL, *ret = NULL; + struct config_group *o2hb_group = NULL; void *defs = NULL; + int ret = 0; /* this runs under the parent dir's i_mutex; there can be only * one caller in here at a time */ - if (o2nm_single_cluster) - goto out; /* ENOSPC */ + if (o2nm_single_cluster) { + ret = -ENOSPC; + goto out; + } cluster = kzalloc(sizeof(struct o2nm_cluster), GFP_KERNEL); ns = kzalloc(sizeof(struct o2nm_node_group), GFP_KERNEL); defs = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL); o2hb_group = o2hb_alloc_hb_set(); - if (cluster == NULL || ns == NULL || o2hb_group == NULL || defs == NULL) + if (cluster == NULL || ns == NULL || o2hb_group == NULL || defs == NULL) { + ret = -ENOMEM; goto out; + } config_group_init_type_name(&cluster->cl_group, name, &o2nm_cluster_type); @@ -786,11 +797,11 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT; cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT; - ret = &cluster->cl_group; + *new_group = &cluster->cl_group; o2nm_single_cluster = cluster; out: - if (ret == NULL) { + if (ret) { kfree(cluster); kfree(ns); o2hb_free_hb_set(o2hb_group); diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 80e20d9..eae3d64 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -31,6 +31,7 @@ #include <linux/pagemap.h> #include <linux/debugfs.h> #include <linux/seq_file.h> +#include <linux/time.h> #define MLOG_MASK_PREFIX ML_DLM_GLUE #include <cluster/masklog.h> @@ -59,6 +60,9 @@ struct ocfs2_mask_waiter { struct completion mw_complete; unsigned long mw_mask; unsigned long mw_goal; +#ifdef CONFIG_OCFS2_FS_STATS + unsigned long long mw_lock_start; +#endif }; static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); @@ -366,6 +370,75 @@ static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res) spin_unlock(&ocfs2_dlm_tracking_lock); } +#ifdef CONFIG_OCFS2_FS_STATS +static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) +{ + res->l_lock_num_prmode = 0; + res->l_lock_num_prmode_failed = 0; + res->l_lock_total_prmode = 0; + res->l_lock_max_prmode = 0; + res->l_lock_num_exmode = 0; + res->l_lock_num_exmode_failed = 0; + res->l_lock_total_exmode = 0; + res->l_lock_max_exmode = 0; + res->l_lock_refresh = 0; +} + +static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level, + struct ocfs2_mask_waiter *mw, int ret) +{ + unsigned long long *num, *sum; + unsigned int *max, *failed; + struct timespec ts = current_kernel_time(); + unsigned long long time = timespec_to_ns(&ts) - mw->mw_lock_start; + + if (level == LKM_PRMODE) { + num = &res->l_lock_num_prmode; + sum = &res->l_lock_total_prmode; + max = &res->l_lock_max_prmode; + failed = &res->l_lock_num_prmode_failed; + } else if (level == LKM_EXMODE) { + num = &res->l_lock_num_exmode; + sum = &res->l_lock_total_exmode; + max = &res->l_lock_max_exmode; + failed = &res->l_lock_num_exmode_failed; + } else + return; + + (*num)++; + (*sum) += time; + if (time > *max) + *max = time; + if (ret) + (*failed)++; +} + +static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) +{ + lockres->l_lock_refresh++; +} + +static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) +{ + struct timespec ts = current_kernel_time(); + mw->mw_lock_start = timespec_to_ns(&ts); +} +#else +static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) +{ +} +static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, + int level, struct ocfs2_mask_waiter *mw, int ret) +{ +} +static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) +{ +} +static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) +{ +} +#endif + static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, struct ocfs2_lock_res *res, enum ocfs2_lock_type type, @@ -385,6 +458,8 @@ static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, res->l_flags = OCFS2_LOCK_INITIALIZED; ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug); + + ocfs2_init_lock_stats(res); } void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) @@ -1048,6 +1123,7 @@ static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw) { INIT_LIST_HEAD(&mw->mw_item); init_completion(&mw->mw_complete); + ocfs2_init_start_time(mw); } static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw) @@ -1254,6 +1330,7 @@ out: goto again; mlog_errno(ret); } + ocfs2_update_lock_stats(lockres, level, &mw, ret); mlog_exit(ret); return ret; @@ -1983,6 +2060,7 @@ static int ocfs2_inode_lock_update(struct inode *inode, le32_to_cpu(fe->i_flags)); ocfs2_refresh_inode(inode, fe); + ocfs2_track_lock_refresh(lockres); } status = 0; @@ -2267,6 +2345,7 @@ int ocfs2_super_lock(struct ocfs2_super *osb, if (status < 0) mlog_errno(status); + ocfs2_track_lock_refresh(lockres); } bail: mlog_exit(status); @@ -2461,7 +2540,7 @@ static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos) } /* So that debugfs.ocfs2 can determine which format is being used */ -#define OCFS2_DLM_DEBUG_STR_VERSION 1 +#define OCFS2_DLM_DEBUG_STR_VERSION 2 static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) { int i; @@ -2502,6 +2581,47 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) for(i = 0; i < DLM_LVB_LEN; i++) seq_printf(m, "0x%x\t", lvb[i]); +#ifdef CONFIG_OCFS2_FS_STATS +# define lock_num_prmode(_l) (_l)->l_lock_num_prmode +# define lock_num_exmode(_l) (_l)->l_lock_num_exmode +# define lock_num_prmode_failed(_l) (_l)->l_lock_num_prmode_failed +# define lock_num_exmode_failed(_l) (_l)->l_lock_num_exmode_failed +# define lock_total_prmode(_l) (_l)->l_lock_total_prmode +# define lock_total_exmode(_l) (_l)->l_lock_total_exmode +# define lock_max_prmode(_l) (_l)->l_lock_max_prmode +# define lock_max_exmode(_l) (_l)->l_lock_max_exmode +# define lock_refresh(_l) (_l)->l_lock_refresh +#else +# define lock_num_prmode(_l) (0ULL) +# define lock_num_exmode(_l) (0ULL) +# define lock_num_prmode_failed(_l) (0) +# define lock_num_exmode_failed(_l) (0) +# define lock_total_prmode(_l) (0ULL) +# define lock_total_exmode(_l) (0ULL) +# define lock_max_prmode(_l) (0) +# define lock_max_exmode(_l) (0) +# define lock_refresh(_l) (0) +#endif + /* The following seq_print was added in version 2 of this output */ + seq_printf(m, "%llu\t" + "%llu\t" + "%u\t" + "%u\t" + "%llu\t" + "%llu\t" + "%u\t" + "%u\t" + "%u\t", + lock_num_prmode(lockres), + lock_num_exmode(lockres), + lock_num_prmode_failed(lockres), + lock_num_exmode_failed(lockres), + lock_total_prmode(lockres), + lock_total_exmode(lockres), + lock_max_prmode(lockres), + lock_max_exmode(lockres), + lock_refresh(lockres)); + /* End the line */ seq_printf(m, "\n"); return 0; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 57e0d30..e8514e8 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2202,7 +2202,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos); if (ret == -EINVAL) - mlog(ML_ERROR, "generic_file_aio_read returned -EINVAL\n"); + mlog(0, "generic_file_aio_read returned -EINVAL\n"); /* buffered aio wouldn't have proper lock coverage today */ BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 9698338..a8c19cb 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -329,7 +329,7 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks) mlog(0, "Trying to extend transaction by %d blocks\n", nblocks); -#ifdef OCFS2_DEBUG_FS +#ifdef CONFIG_OCFS2_DEBUG_FS status = 1; #else status = journal_extend(handle, nblocks); diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index be774bd..28e492e 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -498,7 +498,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; -#ifdef OCFS2_DEBUG_FS +#ifdef CONFIG_OCFS2_DEBUG_FS if (le32_to_cpu(alloc->id1.bitmap1.i_used) != ocfs2_local_alloc_count_bits(alloc)) { ocfs2_error(osb->sb, "local alloc inode %llu says it has " diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 3169237..1cb814b 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -132,6 +132,18 @@ struct ocfs2_lock_res { wait_queue_head_t l_event; struct list_head l_debug_list; + +#ifdef CONFIG_OCFS2_FS_STATS + unsigned long long l_lock_num_prmode; /* PR acquires */ + unsigned long long l_lock_num_exmode; /* EX acquires */ + unsigned int l_lock_num_prmode_failed; /* Failed PR gets */ + unsigned int l_lock_num_exmode_failed; /* Failed EX gets */ + unsigned long long l_lock_total_prmode; /* Tot wait for PR */ + unsigned long long l_lock_total_exmode; /* Tot wait for EX */ + unsigned int l_lock_max_prmode; /* Max wait for PR */ + unsigned int l_lock_max_exmode; /* Max wait for EX */ + unsigned int l_lock_refresh; /* Disk refreshes */ +#endif }; struct ocfs2_dlm_debug { diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 52c4266..3f19451 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -901,7 +901,7 @@ static inline int ocfs2_sprintf_system_inode_name(char *buf, int len, * list has a copy per slot. */ if (type <= OCFS2_LAST_GLOBAL_SYSTEM_INODE) - chars = snprintf(buf, len, + chars = snprintf(buf, len, "%s", ocfs2_system_inodes[type].si_name); else chars = snprintf(buf, len, diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index bd7e0f3..353fc35 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -550,26 +550,17 @@ static ssize_t ocfs2_control_read(struct file *file, size_t count, loff_t *ppos) { - char *proto_string = OCFS2_CONTROL_PROTO; - size_t to_write = 0; - - if (*ppos >= OCFS2_CONTROL_PROTO_LEN) - return 0; - - to_write = OCFS2_CONTROL_PROTO_LEN - *ppos; - if (to_write > count) - to_write = count; - if (copy_to_user(buf, proto_string + *ppos, to_write)) - return -EFAULT; + ssize_t ret; - *ppos += to_write; + ret = simple_read_from_buffer(buf, count, ppos, + OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN); /* Have we read the whole protocol list? */ - if (*ppos >= OCFS2_CONTROL_PROTO_LEN) + if (ret > 0 && *ppos >= OCFS2_CONTROL_PROTO_LEN) ocfs2_control_set_handshake_state(file, OCFS2_CONTROL_HANDSHAKE_READ); - return to_write; + return ret; } static int ocfs2_control_release(struct inode *inode, struct file *file) diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index df63ba2..ccecfe5 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1703,7 +1703,11 @@ static int ocfs2_check_volume(struct ocfs2_super *osb) local = ocfs2_mount_local(osb); /* will play back anything left in the journal. */ - ocfs2_journal_load(osb->journal, local); + status = ocfs2_journal_load(osb->journal, local); + if (status < 0) { + mlog(ML_ERROR, "ocfs2 journal load failed! %d\n", status); + goto finally; + } if (dirty) { /* recover my local alloc if we didn't unmount cleanly. */ |