From a862e0fdcb8862aab2538ec2fc2f0dc07a625c59 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 11 Jan 2010 11:47:41 +0000 Subject: xfs: Don't directly reference m_perag in allocation code Start abstracting the perag references so that the indexing of the structures is not directly coded into all the places that uses the perag structures. This will allow us to separate the use of the perag structure and the way it is indexed and hence avoid the known deadlocks related to growing a busy filesystem. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_alloc.c | 82 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 46 insertions(+), 36 deletions(-) (limited to 'fs/xfs/xfs_alloc.c') diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 275b1f4..84070f2 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -1662,11 +1662,13 @@ xfs_free_ag_extent( xfs_agf_t *agf; xfs_perag_t *pag; /* per allocation group data */ + pag = xfs_perag_get(mp, agno); + pag->pagf_freeblks += len; + xfs_perag_put(pag); + agf = XFS_BUF_TO_AGF(agbp); - pag = &mp->m_perag[agno]; be32_add_cpu(&agf->agf_freeblks, len); xfs_trans_agblocks_delta(tp, len); - pag->pagf_freeblks += len; XFS_WANT_CORRUPTED_GOTO( be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length), @@ -1969,10 +1971,12 @@ xfs_alloc_get_freelist( xfs_trans_brelse(tp, agflbp); if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp)) agf->agf_flfirst = 0; - pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)]; + + pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); be32_add_cpu(&agf->agf_flcount, -1); xfs_trans_agflist_delta(tp, -1); pag->pagf_flcount--; + xfs_perag_put(pag); logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT; if (btreeblk) { @@ -2078,7 +2082,8 @@ xfs_alloc_put_freelist( be32_add_cpu(&agf->agf_fllast, 1); if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp)) agf->agf_fllast = 0; - pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)]; + + pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); be32_add_cpu(&agf->agf_flcount, 1); xfs_trans_agflist_delta(tp, 1); pag->pagf_flcount++; @@ -2089,6 +2094,7 @@ xfs_alloc_put_freelist( pag->pagf_btreeblks--; logflags |= XFS_AGF_BTREEBLKS; } + xfs_perag_put(pag); xfs_alloc_log_agf(tp, agbp, logflags); @@ -2152,7 +2158,6 @@ xfs_read_agf( xfs_trans_brelse(tp, *bpp); return XFS_ERROR(EFSCORRUPTED); } - XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGF, XFS_AGF_REF); return 0; } @@ -2184,7 +2189,7 @@ xfs_alloc_read_agf( ASSERT(!XFS_BUF_GETERROR(*bpp)); agf = XFS_BUF_TO_AGF(*bpp); - pag = &mp->m_perag[agno]; + pag = xfs_perag_get(mp, agno); if (!pag->pagf_init) { pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks); @@ -2211,6 +2216,7 @@ xfs_alloc_read_agf( be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi])); } #endif + xfs_perag_put(pag); return 0; } @@ -2271,7 +2277,7 @@ xfs_alloc_vextent( */ args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); down_read(&mp->m_peraglock); - args->pag = &mp->m_perag[args->agno]; + args->pag = xfs_perag_get(mp, args->agno); args->minleft = 0; error = xfs_alloc_fix_freelist(args, 0); args->minleft = minleft; @@ -2341,7 +2347,7 @@ xfs_alloc_vextent( */ down_read(&mp->m_peraglock); for (;;) { - args->pag = &mp->m_perag[args->agno]; + args->pag = xfs_perag_get(mp, args->agno); if (no_min) args->minleft = 0; error = xfs_alloc_fix_freelist(args, flags); args->minleft = minleft; @@ -2400,6 +2406,7 @@ xfs_alloc_vextent( } } } + xfs_perag_put(args->pag); } up_read(&mp->m_peraglock); if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) { @@ -2427,8 +2434,10 @@ xfs_alloc_vextent( args->len); #endif } + xfs_perag_put(args->pag); return 0; error0: + xfs_perag_put(args->pag); up_read(&mp->m_peraglock); return error; } @@ -2455,7 +2464,7 @@ xfs_free_extent( ASSERT(args.agno < args.mp->m_sb.sb_agcount); args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); down_read(&args.mp->m_peraglock); - args.pag = &args.mp->m_perag[args.agno]; + args.pag = xfs_perag_get(args.mp, args.agno); if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) goto error0; #ifdef DEBUG @@ -2465,6 +2474,7 @@ xfs_free_extent( #endif error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); error0: + xfs_perag_put(args.pag); up_read(&args.mp->m_peraglock); return error; } @@ -2486,15 +2496,15 @@ xfs_alloc_mark_busy(xfs_trans_t *tp, xfs_agblock_t bno, xfs_extlen_t len) { - xfs_mount_t *mp; xfs_perag_busy_t *bsy; + struct xfs_perag *pag; int n; - mp = tp->t_mountp; - spin_lock(&mp->m_perag[agno].pagb_lock); + pag = xfs_perag_get(tp->t_mountp, agno); + spin_lock(&pag->pagb_lock); /* search pagb_list for an open slot */ - for (bsy = mp->m_perag[agno].pagb_list, n = 0; + for (bsy = pag->pagb_list, n = 0; n < XFS_PAGB_NUM_SLOTS; bsy++, n++) { if (bsy->busy_tp == NULL) { @@ -2502,11 +2512,11 @@ xfs_alloc_mark_busy(xfs_trans_t *tp, } } - trace_xfs_alloc_busy(mp, agno, bno, len, n); + trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len, n); if (n < XFS_PAGB_NUM_SLOTS) { - bsy = &mp->m_perag[agno].pagb_list[n]; - mp->m_perag[agno].pagb_count++; + bsy = &pag->pagb_list[n]; + pag->pagb_count++; bsy->busy_start = bno; bsy->busy_length = len; bsy->busy_tp = tp; @@ -2521,7 +2531,8 @@ xfs_alloc_mark_busy(xfs_trans_t *tp, xfs_trans_set_sync(tp); } - spin_unlock(&mp->m_perag[agno].pagb_lock); + spin_unlock(&pag->pagb_lock); + xfs_perag_put(pag); } void @@ -2529,24 +2540,23 @@ xfs_alloc_clear_busy(xfs_trans_t *tp, xfs_agnumber_t agno, int idx) { - xfs_mount_t *mp; + struct xfs_perag *pag; xfs_perag_busy_t *list; - mp = tp->t_mountp; - - spin_lock(&mp->m_perag[agno].pagb_lock); - list = mp->m_perag[agno].pagb_list; - ASSERT(idx < XFS_PAGB_NUM_SLOTS); + pag = xfs_perag_get(tp->t_mountp, agno); + spin_lock(&pag->pagb_lock); + list = pag->pagb_list; - trace_xfs_alloc_unbusy(mp, agno, idx, list[idx].busy_tp == tp); + trace_xfs_alloc_unbusy(tp->t_mountp, agno, idx, list[idx].busy_tp == tp); if (list[idx].busy_tp == tp) { list[idx].busy_tp = NULL; - mp->m_perag[agno].pagb_count--; + pag->pagb_count--; } - spin_unlock(&mp->m_perag[agno].pagb_lock); + spin_unlock(&pag->pagb_lock); + xfs_perag_put(pag); } @@ -2560,17 +2570,15 @@ xfs_alloc_search_busy(xfs_trans_t *tp, xfs_agblock_t bno, xfs_extlen_t len) { - xfs_mount_t *mp; + struct xfs_perag *pag; xfs_perag_busy_t *bsy; xfs_agblock_t uend, bend; xfs_lsn_t lsn = 0; int cnt; - mp = tp->t_mountp; - - spin_lock(&mp->m_perag[agno].pagb_lock); - - uend = bno + len - 1; + pag = xfs_perag_get(tp->t_mountp, agno); + spin_lock(&pag->pagb_lock); + cnt = pag->pagb_count; /* * search pagb_list for this slot, skipping open slots. We have to @@ -2578,8 +2586,9 @@ xfs_alloc_search_busy(xfs_trans_t *tp, * we have to get the most recent LSN for the log force to push out * all the transactions that span the range. */ - for (cnt = 0; cnt < mp->m_perag[agno].pagb_count; cnt++) { - bsy = &mp->m_perag[agno].pagb_list[cnt]; + uend = bno + len - 1; + for (cnt = 0; cnt < pag->pagb_count; cnt++) { + bsy = &pag->pagb_list[cnt]; if (!bsy->busy_tp) continue; @@ -2591,7 +2600,8 @@ xfs_alloc_search_busy(xfs_trans_t *tp, if (XFS_LSN_CMP(bsy->busy_tp->t_commit_lsn, lsn) > 0) lsn = bsy->busy_tp->t_commit_lsn; } - spin_unlock(&mp->m_perag[agno].pagb_lock); + spin_unlock(&pag->pagb_lock); + xfs_perag_put(pag); trace_xfs_alloc_busysearch(tp->t_mountp, agno, bno, len, lsn); /* @@ -2599,5 +2609,5 @@ xfs_alloc_search_busy(xfs_trans_t *tp, * transaction that freed the block */ if (lsn) - xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); + xfs_log_force(tp->t_mountp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); } -- cgit v1.1 From 1c1c6ebcf5284aee4910f3b906ac90c20e510c82 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 11 Jan 2010 11:47:44 +0000 Subject: xfs: Replace per-ag array with a radix tree The use of an array for the per-ag structures requires reallocation of the array when growing the filesystem. This requires locking access to the array to avoid use after free situations, and the locking is difficult to get right. To avoid needing to reallocate an array, change the per-ag structures to an allocated object per ag and index them using a tree structure. The AGs are always densely indexed (hence the use of an array), but the number supported is 2^32 and lookups tend to be random and hence indexing needs to scale. A simple choice is a radix tree - it works well with this sort of index. This change also removes another large contiguous allocation from the mount/growfs path in XFS. The growing process now needs to change to only initialise the new AGs required for the extra space, and as such only needs to exclusively lock the tree for inserts. The rest of the code only needs to lock the tree while doing lookups, and hence this will remove all the deadlocks that currently occur on the m_perag_lock as it is now an innermost lock. The lock is also changed to a spinlock from a read/write lock as the hold time is now extremely short. To complete the picture, the per-ag structures will need to be reference counted to ensure that we don't free/modify them while they are still in use. This will be done in subsequent patch. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_alloc.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'fs/xfs/xfs_alloc.c') diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 84070f2..4d66bb7 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -2276,7 +2276,6 @@ xfs_alloc_vextent( * These three force us into a single a.g. */ args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); - down_read(&mp->m_peraglock); args->pag = xfs_perag_get(mp, args->agno); args->minleft = 0; error = xfs_alloc_fix_freelist(args, 0); @@ -2286,14 +2285,12 @@ xfs_alloc_vextent( goto error0; } if (!args->agbp) { - up_read(&mp->m_peraglock); trace_xfs_alloc_vextent_noagbp(args); break; } args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); if ((error = xfs_alloc_ag_vextent(args))) goto error0; - up_read(&mp->m_peraglock); break; case XFS_ALLOCTYPE_START_BNO: /* @@ -2345,7 +2342,6 @@ xfs_alloc_vextent( * Loop over allocation groups twice; first time with * trylock set, second time without. */ - down_read(&mp->m_peraglock); for (;;) { args->pag = xfs_perag_get(mp, args->agno); if (no_min) args->minleft = 0; @@ -2408,7 +2404,6 @@ xfs_alloc_vextent( } xfs_perag_put(args->pag); } - up_read(&mp->m_peraglock); if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) { if (args->agno == sagno) mp->m_agfrotor = (mp->m_agfrotor + 1) % @@ -2438,7 +2433,6 @@ xfs_alloc_vextent( return 0; error0: xfs_perag_put(args->pag); - up_read(&mp->m_peraglock); return error; } @@ -2463,7 +2457,6 @@ xfs_free_extent( args.agno = XFS_FSB_TO_AGNO(args.mp, bno); ASSERT(args.agno < args.mp->m_sb.sb_agcount); args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); - down_read(&args.mp->m_peraglock); args.pag = xfs_perag_get(args.mp, args.agno); if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) goto error0; @@ -2475,7 +2468,6 @@ xfs_free_extent( error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); error0: xfs_perag_put(args.pag); - up_read(&args.mp->m_peraglock); return error; } -- cgit v1.1 From e57336ff7fc7520bec7b3a7741043bdebaf622ea Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 11 Jan 2010 11:47:49 +0000 Subject: xfs: embed the pagb_list array in the perag structure Now that the perag structure is allocated memory rather than held in an array, we don't need to have the busy extent array external to the structure. Embed it into the perag structure to avoid needing an extra allocation when setting up. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/xfs/xfs_alloc.c') diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 4d66bb7..8aa181d 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -2200,8 +2200,8 @@ xfs_alloc_read_agf( pag->pagf_levels[XFS_BTNUM_CNTi] = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); spin_lock_init(&pag->pagb_lock); - pag->pagb_list = kmem_zalloc(XFS_PAGB_NUM_SLOTS * - sizeof(xfs_perag_busy_t), KM_SLEEP); + pag->pagb_count = 0; + memset(pag->pagb_list, 0, sizeof(pag->pagb_list)); pag->pagf_init = 1; } #ifdef DEBUG -- cgit v1.1 From 0cadda1c5f194f98a05d252ff4385d86d2ed0862 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jan 2010 09:56:44 +0000 Subject: xfs: remove duplicate buffer flags Currently we define aliases for the buffer flags in various namespaces, which only adds confusion. Remove all but the XBF_ flags to clean this up a bit. Note that we still abuse XFS_B_ASYNC/XBF_ASYNC for some non-buffer uses, but I'll clean that up later. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/xfs_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs/xfs_alloc.c') diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 8aa181d..a27aeb7 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -2180,7 +2180,7 @@ xfs_alloc_read_agf( ASSERT(agno != NULLAGNUMBER); error = xfs_read_agf(mp, tp, agno, - (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0, + (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0, bpp); if (error) return error; -- cgit v1.1 From a14a348bff2f99471a28e5928eb6801224c053d8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jan 2010 09:56:46 +0000 Subject: xfs: cleanup up xfs_log_force calling conventions Remove the XFS_LOG_FORCE argument which was always set, and the XFS_LOG_URGE define, which was never used. Split xfs_log_force into a two helpers - xfs_log_force which forces the whole log, and xfs_log_force_lsn which forces up to the specified LSN. The underlying implementations already were entirely separate, as were the users. Also re-indent the new _xfs_log_force/_xfs_log_force which previously had a weird coding style. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs/xfs_alloc.c') diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index a27aeb7..94cddbf 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -2601,5 +2601,5 @@ xfs_alloc_search_busy(xfs_trans_t *tp, * transaction that freed the block */ if (lsn) - xfs_log_force(tp->t_mountp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); + xfs_log_force_lsn(tp->t_mountp, lsn, XFS_LOG_SYNC); } -- cgit v1.1