summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjeff <jeff@FreeBSD.org>2010-05-19 06:18:01 +0000
committerjeff <jeff@FreeBSD.org>2010-05-19 06:18:01 +0000
commitebb7d74daeb7d8d95e5f195222ca706cfe992819 (patch)
treeb0b17d01f5e38a6e802e8109fbc913803f5cc3bc
parente720d3461e7a2b4706734e4c74de4dfdc2fd8054 (diff)
downloadFreeBSD-src-ebb7d74daeb7d8d95e5f195222ca706cfe992819.zip
FreeBSD-src-ebb7d74daeb7d8d95e5f195222ca706cfe992819.tar.gz
- Don't immediately re-run softdepflush if we didn't make any progress
on the last iteration. This can lead to a deadlock when we have worklist items that cannot be immediately satisfied. Reported by: uqs, Dimitry Andric <dimitry@andric.com> - Remove some unnecessary debugging code and place some other under SUJ_DEBUG. - Examine the journal state in softdep_slowdown(). - Re-format some comments so I may more easily add flag descriptions.
-rw-r--r--sys/ufs/ffs/ffs_softdep.c41
-rw-r--r--sys/ufs/ffs/softdep.h82
2 files changed, 72 insertions, 51 deletions
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 49510a7..3a1edee 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -51,7 +51,6 @@ __FBSDID("$FreeBSD$");
#ifndef DEBUG
#define DEBUG
#endif
-#define SUJ_DEBUG
#include <sys/param.h>
#include <sys/kernel.h>
@@ -1200,6 +1199,7 @@ softdep_flush(void)
struct ufsmount *ump;
struct thread *td;
int remaining;
+ int progress;
int vfslocked;
td = curthread;
@@ -1224,7 +1224,7 @@ softdep_flush(void)
}
FREE_LOCK(&lk);
VFS_UNLOCK_GIANT(vfslocked);
- remaining = 0;
+ remaining = progress = 0;
mtx_lock(&mountlist_mtx);
for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
nmp = TAILQ_NEXT(mp, mnt_list);
@@ -1233,7 +1233,7 @@ softdep_flush(void)
if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK))
continue;
vfslocked = VFS_LOCK_GIANT(mp);
- softdep_process_worklist(mp, 0);
+ progress += softdep_process_worklist(mp, 0);
ump = VFSTOUFS(mp);
remaining += ump->softdep_on_worklist -
ump->softdep_on_worklist_inprogress;
@@ -1243,7 +1243,7 @@ softdep_flush(void)
vfs_unbusy(mp);
}
mtx_unlock(&mountlist_mtx);
- if (remaining)
+ if (remaining && progress)
continue;
ACQUIRE_LOCK(&lk);
if (!req_pending)
@@ -1449,7 +1449,7 @@ process_worklist_item(mp, flags)
struct mount *mp;
int flags;
{
- struct worklist *wk, *wkXXX;
+ struct worklist *wk;
struct ufsmount *ump;
struct vnode *vp;
int matchcnt = 0;
@@ -1472,11 +1472,8 @@ process_worklist_item(mp, flags)
vp = NULL;
ump = VFSTOUFS(mp);
LIST_FOREACH(wk, &ump->softdep_workitem_pending, wk_list) {
- if (wk->wk_state & INPROGRESS) {
- wkXXX = wk;
+ if (wk->wk_state & INPROGRESS)
continue;
- }
- wkXXX = wk; /* Record the last valid wk pointer. */
if ((flags & LK_NOWAIT) == 0 || wk->wk_type != D_DIRREM)
break;
wk->wk_state |= INPROGRESS;
@@ -2364,7 +2361,7 @@ remove_from_journal(wk)
mtx_assert(&lk, MA_OWNED);
ump = VFSTOUFS(wk->wk_mp);
-#ifdef DEBUG /* XXX Expensive, temporary. */
+#ifdef SUJ_DEBUG
{
struct worklist *wkn;
@@ -2401,16 +2398,15 @@ journal_space(ump, thresh)
struct jblocks *jblocks;
int avail;
+ jblocks = ump->softdep_jblocks;
+ if (jblocks == NULL)
+ return (1);
/*
* We use a tighter restriction here to prevent request_cleanup()
* running in threads from running into locks we currently hold.
*/
if (num_inodedep > (max_softdeps / 10) * 9)
return (0);
-
- jblocks = ump->softdep_jblocks;
- if (jblocks == NULL)
- return (1);
if (thresh)
thresh = jblocks->jb_min;
else
@@ -2727,7 +2723,7 @@ softdep_process_journal(mp, flags)
break;
printf("softdep: Out of journal space!\n");
softdep_speedup();
- msleep(jblocks, &lk, PRIBIO, "jblocks", 1);
+ msleep(jblocks, &lk, PRIBIO, "jblocks", hz);
}
FREE_LOCK(&lk);
jseg = malloc(sizeof(*jseg), M_JSEG, M_SOFTDEP_FLAGS);
@@ -10870,18 +10866,29 @@ int
softdep_slowdown(vp)
struct vnode *vp;
{
+ struct ufsmount *ump;
+ int jlow;
int max_softdeps_hard;
ACQUIRE_LOCK(&lk);
+ jlow = 0;
+ /*
+ * Check for journal space if needed.
+ */
+ if (DOINGSUJ(vp)) {
+ ump = VFSTOUFS(vp->v_mount);
+ if (journal_space(ump, 0) == 0)
+ jlow = 1;
+ }
max_softdeps_hard = max_softdeps * 11 / 10;
if (num_dirrem < max_softdeps_hard / 2 &&
num_inodedep < max_softdeps_hard &&
VFSTOUFS(vp->v_mount)->um_numindirdeps < maxindirdeps &&
- num_freeblkdep < max_softdeps_hard) {
+ num_freeblkdep < max_softdeps_hard && jlow == 0) {
FREE_LOCK(&lk);
return (0);
}
- if (VFSTOUFS(vp->v_mount)->um_numindirdeps >= maxindirdeps)
+ if (VFSTOUFS(vp->v_mount)->um_numindirdeps >= maxindirdeps || jlow)
softdep_speedup();
stat_sync_limit_hit += 1;
FREE_LOCK(&lk);
diff --git a/sys/ufs/ffs/softdep.h b/sys/ufs/ffs/softdep.h
index 5d8a869..e61b81b 100644
--- a/sys/ufs/ffs/softdep.h
+++ b/sys/ufs/ffs/softdep.h
@@ -46,51 +46,65 @@
* copy of the data. A particular data dependency is eliminated when
* it is ALLCOMPLETE: that is ATTACHED, DEPCOMPLETE, and COMPLETE.
*
- * ATTACHED means that the data is not currently being written to
- * disk. UNDONE means that the data has been rolled back to a safe
+ * The ATTACHED flag means that the data is not currently being written
+ * to disk.
+ *
+ * The UNDONE flag means that the data has been rolled back to a safe
* state for writing to the disk. When the I/O completes, the data is
* restored to its current form and the state reverts to ATTACHED.
* The data must be locked throughout the rollback, I/O, and roll
* forward so that the rolled back information is never visible to
- * user processes. The COMPLETE flag indicates that the item has been
- * written. For example, a dependency that requires that an inode be
- * written will be marked COMPLETE after the inode has been written
- * to disk. The DEPCOMPLETE flag indicates the completion of any other
+ * user processes.
+ *
+ * The COMPLETE flag indicates that the item has been written. For example,
+ * a dependency that requires that an inode be written will be marked
+ * COMPLETE after the inode has been written to disk.
+ *
+ * The DEPCOMPLETE flag indicates the completion of any other
* dependencies such as the writing of a cylinder group map has been
* completed. A dependency structure may be freed only when both it
* and its dependencies have completed and any rollbacks that are in
* progress have finished as indicated by the set of ALLCOMPLETE flags
- * all being set. The two MKDIR flags indicate additional dependencies
- * that must be done when creating a new directory. MKDIR_BODY is
- * cleared when the directory data block containing the "." and ".."
- * entries has been written. MKDIR_PARENT is cleared when the parent
- * inode with the increased link count for ".." has been written. When
- * both MKDIR flags have been cleared, the DEPCOMPLETE flag is set to
- * indicate that the directory dependencies have been completed. The
- * writing of the directory inode itself sets the COMPLETE flag which
- * then allows the directory entry for the new directory to be written
- * to disk. The RMDIR flag marks a dirrem structure as representing
- * the removal of a directory rather than a file. When the removal
- * dependencies are completed, additional work needs to be done
- * (truncation of the "." and ".." entries, an additional decrement
- * of the associated inode, and a decrement of the parent inode). The
- * DIRCHG flag marks a diradd structure as representing the changing
+ * all being set.
+ *
+ * The two MKDIR flags indicate additional dependencies that must be done
+ * when creating a new directory. MKDIR_BODY is cleared when the directory
+ * data block containing the "." and ".." entries has been written.
+ * MKDIR_PARENT is cleared when the parent inode with the increased link
+ * count for ".." has been written. When both MKDIR flags have been
+ * cleared, the DEPCOMPLETE flag is set to indicate that the directory
+ * dependencies have been completed. The writing of the directory inode
+ * itself sets the COMPLETE flag which then allows the directory entry for
+ * the new directory to be written to disk. The RMDIR flag marks a dirrem
+ * structure as representing the removal of a directory rather than a
+ * file. When the removal dependencies are completed, additional work needs
+ * to be done* (an additional decrement of the associated inode, and a
+ * decrement of the parent inode).
+ *
+ * The DIRCHG flag marks a diradd structure as representing the changing
* of an existing entry rather than the addition of a new one. When
* the update is complete the dirrem associated with the inode for
* the old name must be added to the worklist to do the necessary
- * reference count decrement. The GOINGAWAY flag indicates that the
- * data structure is frozen from further change until its dependencies
- * have been completed and its resources freed after which it will be
- * discarded. The IOSTARTED flag prevents multiple calls to the I/O
- * start routine from doing multiple rollbacks. The SPACECOUNTED flag
- * says that the files space has been accounted to the pending free
- * space count. The NEWBLOCK flag marks pagedep structures that have
- * just been allocated, so must be claimed by the inode before all
- * dependencies are complete. The INPROGRESS flag marks worklist
- * structures that are still on the worklist, but are being considered
- * for action by some process. The UFS1FMT flag indicates that the
- * inode being processed is a ufs1 format. The EXTDATA flag indicates
- * that the allocdirect describes an extended-attributes dependency.
+ * reference count decrement.
+ *
+ * The GOINGAWAY flag indicates that the data structure is frozen from
+ * further change until its dependencies have been completed and its
+ * resources freed after which it will be discarded.
+ *
+ * The IOSTARTED flag prevents multiple calls to the I/O start routine from
+ * doing multiple rollbacks.
+ *
+ * The NEWBLOCK flag marks pagedep structures that have just been allocated,
+ * so must be claimed by the inode before all dependencies are complete.
+ *
+ * The INPROGRESS flag marks worklist structures that are still on the
+ * worklist, but are being considered for action by some process.
+ *
+ * The UFS1FMT flag indicates that the inode being processed is a ufs1 format.
+ *
+ * The EXTDATA flag indicates that the allocdirect describes an
+ * extended-attributes dependency.
+ *
* The ONWORKLIST flag shows whether the structure is currently linked
* onto a worklist.
*/
OpenPOWER on IntegriCloud