diff options
author | Steve French <sfrench@us.ibm.com> | 2006-02-18 15:07:18 +0000 |
---|---|---|
committer | Steve French <sfrench@us.ibm.com> | 2006-02-18 15:07:18 +0000 |
commit | e601ef22bc5ec9332c8d785533895ee81c834b8a (patch) | |
tree | 4685a666fe2a553dccdfe7b4b3f88816d075b29e /fs | |
parent | 27754b34600770beb38e3ae12cb3f345f02e3797 (diff) | |
parent | bd71c2b17468a2531fb4c81ec1d73520845e97e1 (diff) | |
download | op-kernel-dev-e601ef22bc5ec9332c8d785533895ee81c834b8a.zip op-kernel-dev-e601ef22bc5ec9332c8d785533895ee81c834b8a.tar.gz |
Merge with /pub/scm/linux/kernel/git/torvalds/linux-2.6.git
Signed-off-by: Steve French <sfrench@us.ibm.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/compat.c | 6 | ||||
-rw-r--r-- | fs/ext2/xattr.c | 6 | ||||
-rw-r--r-- | fs/fuse/dev.c | 6 | ||||
-rw-r--r-- | fs/fuse/file.c | 11 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmcommon.h | 4 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmconvert.c | 12 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmlock.c | 25 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmmaster.c | 7 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmrecovery.c | 42 | ||||
-rw-r--r-- | fs/ocfs2/journal.c | 7 | ||||
-rw-r--r-- | fs/ocfs2/journal.h | 2 | ||||
-rw-r--r-- | fs/select.c | 6 |
12 files changed, 112 insertions, 22 deletions
diff --git a/fs/compat.c b/fs/compat.c index a2ba78b..5333c7d 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1757,7 +1757,7 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, goto sticky; rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); rtv.tv_sec = timeout; - if (compat_timeval_compare(&rtv, &tv) < 0) + if (compat_timeval_compare(&rtv, &tv) >= 0) rtv = tv; if (copy_to_user(tvp, &rtv, sizeof(rtv))) { sticky: @@ -1834,7 +1834,7 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp, rts.tv_sec++; rts.tv_nsec -= NSEC_PER_SEC; } - if (compat_timespec_compare(&rts, &ts) < 0) + if (compat_timespec_compare(&rts, &ts) >= 0) rts = ts; copy_to_user(tsp, &rts, sizeof(rts)); } @@ -1934,7 +1934,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000; rts.tv_sec = timeout; - if (compat_timespec_compare(&rts, &ts) < 0) + if (compat_timespec_compare(&rts, &ts) >= 0) rts = ts; if (copy_to_user(tsp, &rts, sizeof(rts))) { sticky: diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index a2ca310..86ae8e9 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -792,18 +792,20 @@ ext2_xattr_delete_inode(struct inode *inode) ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1); get_bh(bh); bforget(bh); + unlock_buffer(bh); } else { HDR(bh)->h_refcount = cpu_to_le32( le32_to_cpu(HDR(bh)->h_refcount) - 1); if (ce) mb_cache_entry_release(ce); + ea_bdebug(bh, "refcount now=%d", + le32_to_cpu(HDR(bh)->h_refcount)); + unlock_buffer(bh); mark_buffer_dirty(bh); if (IS_SYNC(inode)) sync_dirty_buffer(bh); DQUOT_FREE_BLOCK(inode, 1); } - ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); - unlock_buffer(bh); EXT2_I(inode)->i_file_acl = 0; cleanup: diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index f556a0d..0c9a2ee 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -66,6 +66,12 @@ static void restore_sigs(sigset_t *oldset) sigprocmask(SIG_SETMASK, oldset, NULL); } +/* + * Reset request, so that it can be reused + * + * The caller must be _very_ careful to make sure, that it is holding + * the only reference to req + */ void fuse_reset_request(struct fuse_req *req) { int preallocated = req->preallocated; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 2963516..6f05379 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -116,9 +116,14 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir) /* Special case for failed iget in CREATE */ static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) { - u64 nodeid = req->in.h.nodeid; - fuse_reset_request(req); - fuse_send_forget(fc, req, nodeid, 1); + /* If called from end_io_requests(), req has more than one + reference and fuse_reset_request() cannot work */ + if (fc->connected) { + u64 nodeid = req->in.h.nodeid; + fuse_reset_request(req); + fuse_send_forget(fc, req, nodeid, 1); + } else + fuse_put_request(fc, req); } void fuse_send_release(struct fuse_conn *fc, struct fuse_file *ff, diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 42eb53b..23ceaa7 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -208,6 +208,9 @@ static inline void __dlm_set_joining_node(struct dlm_ctxt *dlm, #define DLM_LOCK_RES_IN_PROGRESS 0x00000010 #define DLM_LOCK_RES_MIGRATING 0x00000020 +/* max milliseconds to wait to sync up a network failure with a node death */ +#define DLM_NODE_DEATH_WAIT_MAX (5 * 1000) + #define DLM_PURGE_INTERVAL_MS (8 * 1000) struct dlm_lock_resource @@ -658,6 +661,7 @@ int dlm_launch_recovery_thread(struct dlm_ctxt *dlm); void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); void dlm_wait_for_recovery(struct dlm_ctxt *dlm); int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node); +int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout); void dlm_put(struct dlm_ctxt *dlm); struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm); diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index 6001b22..f66e2d81 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c @@ -392,6 +392,11 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm, } else { mlog_errno(tmpret); if (dlm_is_host_down(tmpret)) { + /* instead of logging the same network error over + * and over, sleep here and wait for the heartbeat + * to notice the node is dead. times out after 5s. */ + dlm_wait_for_node_death(dlm, res->owner, + DLM_NODE_DEATH_WAIT_MAX); ret = DLM_RECOVERING; mlog(0, "node %u died so returning DLM_RECOVERING " "from convert message!\n", res->owner); @@ -421,7 +426,7 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data) struct dlm_lockstatus *lksb; enum dlm_status status = DLM_NORMAL; u32 flags; - int call_ast = 0, kick_thread = 0; + int call_ast = 0, kick_thread = 0, ast_reserved = 0; if (!dlm_grab(dlm)) { dlm_error(DLM_REJECTED); @@ -490,6 +495,7 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data) status = __dlm_lockres_state_to_status(res); if (status == DLM_NORMAL) { __dlm_lockres_reserve_ast(res); + ast_reserved = 1; res->state |= DLM_LOCK_RES_IN_PROGRESS; status = __dlmconvert_master(dlm, res, lock, flags, cnv->requested_type, @@ -512,10 +518,10 @@ leave: else dlm_lock_put(lock); - /* either queue the ast or release it */ + /* either queue the ast or release it, if reserved */ if (call_ast) dlm_queue_ast(dlm, lock); - else + else if (ast_reserved) dlm_lockres_release_ast(dlm, res); if (kick_thread) diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index d1a0038..671d4ff 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c @@ -220,6 +220,17 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm, dlm_error(status); dlm_revert_pending_lock(res, lock); dlm_lock_put(lock); + } else if (dlm_is_recovery_lock(res->lockname.name, + res->lockname.len)) { + /* special case for the $RECOVERY lock. + * there will never be an AST delivered to put + * this lock on the proper secondary queue + * (granted), so do it manually. */ + mlog(0, "%s: $RECOVERY lock for this node (%u) is " + "mastered by %u; got lock, manually granting (no ast)\n", + dlm->name, dlm->node_num, res->owner); + list_del_init(&lock->list); + list_add_tail(&lock->list, &res->granted); } spin_unlock(&res->spinlock); @@ -646,7 +657,19 @@ retry_lock: mlog(0, "retrying lock with migration/" "recovery/in progress\n"); msleep(100); - dlm_wait_for_recovery(dlm); + /* no waiting for dlm_reco_thread */ + if (recovery) { + if (status == DLM_RECOVERING) { + mlog(0, "%s: got RECOVERING " + "for $REOCVERY lock, master " + "was %u\n", dlm->name, + res->owner); + dlm_wait_for_node_death(dlm, res->owner, + DLM_NODE_DEATH_WAIT_MAX); + } + } else { + dlm_wait_for_recovery(dlm); + } goto retry_lock; } diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index a3194fe..2e2e95e 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2482,7 +2482,9 @@ top: atomic_set(&mle->woken, 1); spin_unlock(&mle->spinlock); wake_up(&mle->wq); - /* final put will take care of list removal */ + /* do not need events any longer, so detach + * from heartbeat */ + __dlm_mle_detach_hb_events(dlm, mle); __dlm_put_mle(mle); } continue; @@ -2537,6 +2539,9 @@ top: spin_unlock(&res->spinlock); dlm_lockres_put(res); + /* about to get rid of mle, detach from heartbeat */ + __dlm_mle_detach_hb_events(dlm, mle); + /* dump the mle */ spin_lock(&dlm->master_lock); __dlm_put_mle(mle); diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 186e9a7..ed76bda 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -278,6 +278,24 @@ int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node) return dead; } +int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout) +{ + if (timeout) { + mlog(ML_NOTICE, "%s: waiting %dms for notification of " + "death of node %u\n", dlm->name, timeout, node); + wait_event_timeout(dlm->dlm_reco_thread_wq, + dlm_is_node_dead(dlm, node), + msecs_to_jiffies(timeout)); + } else { + mlog(ML_NOTICE, "%s: waiting indefinitely for notification " + "of death of node %u\n", dlm->name, node); + wait_event(dlm->dlm_reco_thread_wq, + dlm_is_node_dead(dlm, node)); + } + /* for now, return 0 */ + return 0; +} + /* callers of the top-level api calls (dlmlock/dlmunlock) should * block on the dlm->reco.event when recovery is in progress. * the dlm recovery thread will set this state when it begins @@ -2032,6 +2050,30 @@ again: dlm->reco.new_master); status = -EEXIST; } else { + status = 0; + + /* see if recovery was already finished elsewhere */ + spin_lock(&dlm->spinlock); + if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) { + status = -EINVAL; + mlog(0, "%s: got reco EX lock, but " + "node got recovered already\n", dlm->name); + if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM) { + mlog(ML_ERROR, "%s: new master is %u " + "but no dead node!\n", + dlm->name, dlm->reco.new_master); + BUG(); + } + } + spin_unlock(&dlm->spinlock); + } + + /* if this node has actually become the recovery master, + * set the master and send the messages to begin recovery */ + if (!status) { + mlog(0, "%s: dead=%u, this=%u, sending " + "begin_reco now\n", dlm->name, + dlm->reco.dead_node, dlm->node_num); status = dlm_send_begin_reco_message(dlm, dlm->reco.dead_node); /* this always succeeds */ diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index fa0bcac..d329c9d 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1584,10 +1584,9 @@ static int ocfs2_commit_thread(void *arg) while (!(kthread_should_stop() && atomic_read(&journal->j_num_trans) == 0)) { - wait_event_interruptible_timeout(osb->checkpoint_event, - atomic_read(&journal->j_num_trans) - || kthread_should_stop(), - OCFS2_CHECKPOINT_INTERVAL); + wait_event_interruptible(osb->checkpoint_event, + atomic_read(&journal->j_num_trans) + || kthread_should_stop()); status = ocfs2_commit_cache(osb); if (status < 0) diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 7d0a816..2f3a6ac 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -29,8 +29,6 @@ #include <linux/fs.h> #include <linux/jbd.h> -#define OCFS2_CHECKPOINT_INTERVAL (8 * HZ) - enum ocfs2_journal_state { OCFS2_JOURNAL_FREE = 0, OCFS2_JOURNAL_LOADED, diff --git a/fs/select.c b/fs/select.c index 6ce68a9..1815a57 100644 --- a/fs/select.c +++ b/fs/select.c @@ -404,7 +404,7 @@ asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, goto sticky; rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); rtv.tv_sec = timeout; - if (timeval_compare(&rtv, &tv) < 0) + if (timeval_compare(&rtv, &tv) >= 0) rtv = tv; if (copy_to_user(tvp, &rtv, sizeof(rtv))) { sticky: @@ -471,7 +471,7 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000; rts.tv_sec = timeout; - if (timespec_compare(&rts, &ts) < 0) + if (timespec_compare(&rts, &ts) >= 0) rts = ts; if (copy_to_user(tsp, &rts, sizeof(rts))) { sticky: @@ -775,7 +775,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000; rts.tv_sec = timeout; - if (timespec_compare(&rts, &ts) < 0) + if (timespec_compare(&rts, &ts) >= 0) rts = ts; if (copy_to_user(tsp, &rts, sizeof(rts))) { sticky: |