summaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c40
-rw-r--r--fs/xfs/libxfs/xfs_format.h62
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c6
-rw-r--r--fs/xfs/libxfs/xfs_sb.c12
-rw-r--r--fs/xfs/xfs_bmap_util.c31
-rw-r--r--fs/xfs/xfs_file.c84
-rw-r--r--fs/xfs/xfs_fsops.c20
-rw-r--r--fs/xfs/xfs_inode.c128
-rw-r--r--fs/xfs/xfs_inode.h36
-rw-r--r--fs/xfs/xfs_ioctl.c5
-rw-r--r--fs/xfs/xfs_iomap.c3
-rw-r--r--fs/xfs/xfs_iops.c99
-rw-r--r--fs/xfs/xfs_linux.h9
-rw-r--r--fs/xfs/xfs_log_recover.c4
-rw-r--r--fs/xfs/xfs_mount.c918
-rw-r--r--fs/xfs/xfs_mount.h95
-rw-r--r--fs/xfs/xfs_pnfs.c4
-rw-r--r--fs/xfs/xfs_qm.c5
-rw-r--r--fs/xfs/xfs_super.c107
-rw-r--r--fs/xfs/xfs_super.h2
-rw-r--r--fs/xfs/xfs_trace.h3
-rw-r--r--fs/xfs/xfs_trans.c234
22 files changed, 634 insertions, 1273 deletions
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 60cfa90..8ae3775 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -2215,9 +2215,8 @@ xfs_bmap_add_extent_delay_real(
diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
(bma->cur ? bma->cur->bc_private.b.allocated : 0));
if (diff > 0) {
- error = xfs_icsb_modify_counters(bma->ip->i_mount,
- XFS_SBS_FDBLOCKS,
- -((int64_t)diff), 0);
+ error = xfs_mod_fdblocks(bma->ip->i_mount,
+ -((int64_t)diff), false);
ASSERT(!error);
if (error)
goto done;
@@ -2268,9 +2267,8 @@ xfs_bmap_add_extent_delay_real(
temp += bma->cur->bc_private.b.allocated;
ASSERT(temp <= da_old);
if (temp < da_old)
- xfs_icsb_modify_counters(bma->ip->i_mount,
- XFS_SBS_FDBLOCKS,
- (int64_t)(da_old - temp), 0);
+ xfs_mod_fdblocks(bma->ip->i_mount,
+ (int64_t)(da_old - temp), false);
}
/* clear out the allocated field, done with it now in any case. */
@@ -2948,8 +2946,8 @@ xfs_bmap_add_extent_hole_delay(
}
if (oldlen != newlen) {
ASSERT(oldlen > newlen);
- xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
- (int64_t)(oldlen - newlen), 0);
+ xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
+ false);
/*
* Nothing to do for disk quota accounting here.
*/
@@ -4166,18 +4164,15 @@ xfs_bmapi_reserve_delalloc(
ASSERT(indlen > 0);
if (rt) {
- error = xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
- -((int64_t)extsz), 0);
+ error = xfs_mod_frextents(mp, -((int64_t)extsz));
} else {
- error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
- -((int64_t)alen), 0);
+ error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
}
if (error)
goto out_unreserve_quota;
- error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
- -((int64_t)indlen), 0);
+ error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
if (error)
goto out_unreserve_blocks;
@@ -4204,9 +4199,9 @@ xfs_bmapi_reserve_delalloc(
out_unreserve_blocks:
if (rt)
- xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, extsz, 0);
+ xfs_mod_frextents(mp, extsz);
else
- xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, alen, 0);
+ xfs_mod_fdblocks(mp, alen, false);
out_unreserve_quota:
if (XFS_IS_QUOTA_ON(mp))
xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ?
@@ -5019,10 +5014,8 @@ xfs_bmap_del_extent(
* Nothing to do for disk quota accounting here.
*/
ASSERT(da_old >= da_new);
- if (da_old > da_new) {
- xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
- (int64_t)(da_old - da_new), 0);
- }
+ if (da_old > da_new)
+ xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false);
done:
*logflagsp = flags;
return error;
@@ -5291,14 +5284,13 @@ xfs_bunmapi(
rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
do_div(rtexts, mp->m_sb.sb_rextsize);
- xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
- (int64_t)rtexts, 0);
+ xfs_mod_frextents(mp, (int64_t)rtexts);
(void)xfs_trans_reserve_quota_nblks(NULL,
ip, -((long)del.br_blockcount), 0,
XFS_QMOPT_RES_RTBLKS);
} else {
- xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
- (int64_t)del.br_blockcount, 0);
+ xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount,
+ false);
(void)xfs_trans_reserve_quota_nblks(NULL,
ip, -((long)del.br_blockcount), 0,
XFS_QMOPT_RES_REGBLKS);
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 8eb7189..4daaa66 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -264,68 +264,6 @@ typedef struct xfs_dsb {
/* must be padded to 64 bit alignment */
} xfs_dsb_t;
-/*
- * Sequence number values for the fields.
- */
-typedef enum {
- XFS_SBS_MAGICNUM, XFS_SBS_BLOCKSIZE, XFS_SBS_DBLOCKS, XFS_SBS_RBLOCKS,
- XFS_SBS_REXTENTS, XFS_SBS_UUID, XFS_SBS_LOGSTART, XFS_SBS_ROOTINO,
- XFS_SBS_RBMINO, XFS_SBS_RSUMINO, XFS_SBS_REXTSIZE, XFS_SBS_AGBLOCKS,
- XFS_SBS_AGCOUNT, XFS_SBS_RBMBLOCKS, XFS_SBS_LOGBLOCKS,
- XFS_SBS_VERSIONNUM, XFS_SBS_SECTSIZE, XFS_SBS_INODESIZE,
- XFS_SBS_INOPBLOCK, XFS_SBS_FNAME, XFS_SBS_BLOCKLOG,
- XFS_SBS_SECTLOG, XFS_SBS_INODELOG, XFS_SBS_INOPBLOG, XFS_SBS_AGBLKLOG,
- XFS_SBS_REXTSLOG, XFS_SBS_INPROGRESS, XFS_SBS_IMAX_PCT, XFS_SBS_ICOUNT,
- XFS_SBS_IFREE, XFS_SBS_FDBLOCKS, XFS_SBS_FREXTENTS, XFS_SBS_UQUOTINO,
- XFS_SBS_GQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN,
- XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG,
- XFS_SBS_LOGSECTLOG, XFS_SBS_LOGSECTSIZE, XFS_SBS_LOGSUNIT,
- XFS_SBS_FEATURES2, XFS_SBS_BAD_FEATURES2, XFS_SBS_FEATURES_COMPAT,
- XFS_SBS_FEATURES_RO_COMPAT, XFS_SBS_FEATURES_INCOMPAT,
- XFS_SBS_FEATURES_LOG_INCOMPAT, XFS_SBS_CRC, XFS_SBS_PAD,
- XFS_SBS_PQUOTINO, XFS_SBS_LSN,
- XFS_SBS_FIELDCOUNT
-} xfs_sb_field_t;
-
-/*
- * Mask values, defined based on the xfs_sb_field_t values.
- * Only define the ones we're using.
- */
-#define XFS_SB_MVAL(x) (1LL << XFS_SBS_ ## x)
-#define XFS_SB_UUID XFS_SB_MVAL(UUID)
-#define XFS_SB_FNAME XFS_SB_MVAL(FNAME)
-#define XFS_SB_ROOTINO XFS_SB_MVAL(ROOTINO)
-#define XFS_SB_RBMINO XFS_SB_MVAL(RBMINO)
-#define XFS_SB_RSUMINO XFS_SB_MVAL(RSUMINO)
-#define XFS_SB_VERSIONNUM XFS_SB_MVAL(VERSIONNUM)
-#define XFS_SB_UQUOTINO XFS_SB_MVAL(UQUOTINO)
-#define XFS_SB_GQUOTINO XFS_SB_MVAL(GQUOTINO)
-#define XFS_SB_QFLAGS XFS_SB_MVAL(QFLAGS)
-#define XFS_SB_SHARED_VN XFS_SB_MVAL(SHARED_VN)
-#define XFS_SB_UNIT XFS_SB_MVAL(UNIT)
-#define XFS_SB_WIDTH XFS_SB_MVAL(WIDTH)
-#define XFS_SB_ICOUNT XFS_SB_MVAL(ICOUNT)
-#define XFS_SB_IFREE XFS_SB_MVAL(IFREE)
-#define XFS_SB_FDBLOCKS XFS_SB_MVAL(FDBLOCKS)
-#define XFS_SB_FEATURES2 (XFS_SB_MVAL(FEATURES2) | \
- XFS_SB_MVAL(BAD_FEATURES2))
-#define XFS_SB_FEATURES_COMPAT XFS_SB_MVAL(FEATURES_COMPAT)
-#define XFS_SB_FEATURES_RO_COMPAT XFS_SB_MVAL(FEATURES_RO_COMPAT)
-#define XFS_SB_FEATURES_INCOMPAT XFS_SB_MVAL(FEATURES_INCOMPAT)
-#define XFS_SB_FEATURES_LOG_INCOMPAT XFS_SB_MVAL(FEATURES_LOG_INCOMPAT)
-#define XFS_SB_CRC XFS_SB_MVAL(CRC)
-#define XFS_SB_PQUOTINO XFS_SB_MVAL(PQUOTINO)
-#define XFS_SB_NUM_BITS ((int)XFS_SBS_FIELDCOUNT)
-#define XFS_SB_ALL_BITS ((1LL << XFS_SB_NUM_BITS) - 1)
-#define XFS_SB_MOD_BITS \
- (XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \
- XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \
- XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \
- XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \
- XFS_SB_FEATURES_COMPAT | XFS_SB_FEATURES_RO_COMPAT | \
- XFS_SB_FEATURES_INCOMPAT | XFS_SB_FEATURES_LOG_INCOMPAT | \
- XFS_SB_PQUOTINO)
-
/*
* Misc. Flags - warning - these will be cleared by xfs_repair unless
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index db04448..07349a1 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -376,7 +376,8 @@ xfs_ialloc_ag_alloc(
*/
newlen = args.mp->m_ialloc_inos;
if (args.mp->m_maxicount &&
- args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount)
+ percpu_counter_read(&args.mp->m_icount) + newlen >
+ args.mp->m_maxicount)
return -ENOSPC;
args.minlen = args.maxlen = args.mp->m_ialloc_blks;
/*
@@ -1340,7 +1341,8 @@ xfs_dialloc(
* inode.
*/
if (mp->m_maxicount &&
- mp->m_sb.sb_icount + mp->m_ialloc_inos > mp->m_maxicount) {
+ percpu_counter_read(&mp->m_icount) + mp->m_ialloc_inos >
+ mp->m_maxicount) {
noroom = 1;
okalloc = 0;
}
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index f3ea02b..dc4bfc5 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -735,17 +735,15 @@ xfs_initialize_perag_data(
btree += pag->pagf_btreeblks;
xfs_perag_put(pag);
}
- /*
- * Overwrite incore superblock counters with just-read data
- */
+
+ /* Overwrite incore superblock counters with just-read data */
spin_lock(&mp->m_sb_lock);
sbp->sb_ifree = ifree;
sbp->sb_icount = ialloc;
sbp->sb_fdblocks = bfree + bfreelst + btree;
spin_unlock(&mp->m_sb_lock);
- /* Fixup the per-cpu counters as well. */
- xfs_icsb_reinit_counters(mp);
+ xfs_reinit_percpu_counters(mp);
return 0;
}
@@ -763,6 +761,10 @@ xfs_log_sb(
struct xfs_mount *mp = tp->t_mountp;
struct xfs_buf *bp = xfs_trans_getsb(tp, mp, 0);
+ mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
+ mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree);
+ mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks);
+
xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb);
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb));
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 22a5dcb..7efa23e 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1599,13 +1599,6 @@ xfs_swap_extent_flush(
/* Verify O_DIRECT for ftmp */
if (VFS_I(ip)->i_mapping->nrpages)
return -EINVAL;
-
- /*
- * Don't try to swap extents on mmap()d files because we can't lock
- * out races against page faults safely.
- */
- if (mapping_mapped(VFS_I(ip)->i_mapping))
- return -EBUSY;
return 0;
}
@@ -1633,13 +1626,14 @@ xfs_swap_extents(
}
/*
- * Lock up the inodes against other IO and truncate to begin with.
- * Then we can ensure the inodes are flushed and have no page cache
- * safely. Once we have done this we can take the ilocks and do the rest
- * of the checks.
+ * Lock the inodes against other IO, page faults and truncate to
+ * begin with. Then we can ensure the inodes are flushed and have no
+ * page cache safely. Once we have done this we can take the ilocks and
+ * do the rest of the checks.
*/
- lock_flags = XFS_IOLOCK_EXCL;
+ lock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
+ xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL);
/* Verify that both files have the same format */
if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
@@ -1666,8 +1660,16 @@ xfs_swap_extents(
xfs_trans_cancel(tp, 0);
goto out_unlock;
}
+
+ /*
+ * Lock and join the inodes to the tansaction so that transaction commit
+ * or cancel will unlock the inodes from this point onwards.
+ */
xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
lock_flags |= XFS_ILOCK_EXCL;
+ xfs_trans_ijoin(tp, ip, lock_flags);
+ xfs_trans_ijoin(tp, tip, lock_flags);
+
/* Verify all data are being swapped */
if (sxp->sx_offset != 0 ||
@@ -1720,9 +1722,6 @@ xfs_swap_extents(
goto out_trans_cancel;
}
- xfs_trans_ijoin(tp, ip, lock_flags);
- xfs_trans_ijoin(tp, tip, lock_flags);
-
/*
* Before we've swapped the forks, lets set the owners of the forks
* appropriately. We have to do this as we are demand paging the btree
@@ -1856,5 +1855,5 @@ out_unlock:
out_trans_cancel:
xfs_trans_cancel(tp, 0);
- goto out_unlock;
+ goto out;
}
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index ce615d1..b101e80 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -397,7 +397,8 @@ STATIC int /* error (positive) */
xfs_zero_last_block(
struct xfs_inode *ip,
xfs_fsize_t offset,
- xfs_fsize_t isize)
+ xfs_fsize_t isize,
+ bool *did_zeroing)
{
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t last_fsb = XFS_B_TO_FSBT(mp, isize);
@@ -425,6 +426,7 @@ xfs_zero_last_block(
zero_len = mp->m_sb.sb_blocksize - zero_offset;
if (isize + zero_len > offset)
zero_len = offset - isize;
+ *did_zeroing = true;
return xfs_iozero(ip, isize, zero_len);
}
@@ -443,7 +445,8 @@ int /* error (positive) */
xfs_zero_eof(
struct xfs_inode *ip,
xfs_off_t offset, /* starting I/O offset */
- xfs_fsize_t isize) /* current inode size */
+ xfs_fsize_t isize, /* current inode size */
+ bool *did_zeroing)
{
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t start_zero_fsb;
@@ -465,7 +468,7 @@ xfs_zero_eof(
* We only zero a part of that block so it is handled specially.
*/
if (XFS_B_FSB_OFFSET(mp, isize) != 0) {
- error = xfs_zero_last_block(ip, offset, isize);
+ error = xfs_zero_last_block(ip, offset, isize, did_zeroing);
if (error)
return error;
}
@@ -525,6 +528,7 @@ xfs_zero_eof(
if (error)
return error;
+ *did_zeroing = true;
start_zero_fsb = imap.br_startoff + imap.br_blockcount;
ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
}
@@ -567,13 +571,15 @@ restart:
* having to redo all checks before.
*/
if (*pos > i_size_read(inode)) {
+ bool zero = false;
+
if (*iolock == XFS_IOLOCK_SHARED) {
xfs_rw_iunlock(ip, *iolock);
*iolock = XFS_IOLOCK_EXCL;
xfs_rw_ilock(ip, *iolock);
goto restart;
}
- error = xfs_zero_eof(ip, *pos, i_size_read(inode));
+ error = xfs_zero_eof(ip, *pos, i_size_read(inode), &zero);
if (error)
return error;
}
@@ -841,6 +847,9 @@ xfs_file_fallocate(
if (error)
goto out_unlock;
+ xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+ iolock |= XFS_MMAPLOCK_EXCL;
+
if (mode & FALLOC_FL_PUNCH_HOLE) {
error = xfs_free_file_space(ip, offset, len);
if (error)
@@ -991,20 +1000,6 @@ xfs_file_mmap(
}
/*
- * mmap()d file has taken write protection fault and is being made
- * writable. We can set the page state up correctly for a writable
- * page, which means we can do correct delalloc accounting (ENOSPC
- * checking!) and unwritten extent mapping.
- */
-STATIC int
-xfs_vm_page_mkwrite(
- struct vm_area_struct *vma,
- struct vm_fault *vmf)
-{
- return block_page_mkwrite(vma, vmf, xfs_get_blocks);
-}
-
-/*
* This type is designed to indicate the type of offset we would like
* to search from page cache for xfs_seek_hole_data().
*/
@@ -1379,6 +1374,55 @@ xfs_file_llseek(
}
}
+/*
+ * Locking for serialisation of IO during page faults. This results in a lock
+ * ordering of:
+ *
+ * mmap_sem (MM)
+ * i_mmap_lock (XFS - truncate serialisation)
+ * page_lock (MM)
+ * i_lock (XFS - extent map serialisation)
+ */
+STATIC int
+xfs_filemap_fault(
+ struct vm_area_struct *vma,
+ struct vm_fault *vmf)
+{
+ struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host);
+ int error;
+
+ trace_xfs_filemap_fault(ip);
+
+ xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+ error = filemap_fault(vma, vmf);
+ xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+
+ return error;
+}
+
+/*
+ * mmap()d file has taken write protection fault and is being made writable. We
+ * can set the page state up correctly for a writable page, which means we can
+ * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
+ * mapping.
+ */
+STATIC int
+xfs_filemap_page_mkwrite(
+ struct vm_area_struct *vma,
+ struct vm_fault *vmf)
+{
+ struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host);
+ int error;
+
+ trace_xfs_filemap_page_mkwrite(ip);
+
+ xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+ error = block_page_mkwrite(vma, vmf, xfs_get_blocks);
+ xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+
+ return error;
+}
+
const struct file_operations xfs_file_operations = {
.llseek = xfs_file_llseek,
.read = new_sync_read,
@@ -1411,7 +1455,7 @@ const struct file_operations xfs_dir_file_operations = {
};
static const struct vm_operations_struct xfs_file_vm_ops = {
- .fault = filemap_fault,
+ .fault = xfs_filemap_fault,
.map_pages = filemap_map_pages,
- .page_mkwrite = xfs_vm_page_mkwrite,
+ .page_mkwrite = xfs_filemap_page_mkwrite,
};
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 74efe5b..cb7e8a2 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -637,12 +637,13 @@ xfs_fs_counts(
xfs_mount_t *mp,
xfs_fsop_counts_t *cnt)
{
- xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
+ cnt->allocino = percpu_counter_read_positive(&mp->m_icount);
+ cnt->freeino = percpu_counter_read_positive(&mp->m_ifree);
+ cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) -
+ XFS_ALLOC_SET_ASIDE(mp);
+
spin_lock(&mp->m_sb_lock);
- cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
cnt->freertx = mp->m_sb.sb_frextents;
- cnt->freeino = mp->m_sb.sb_ifree;
- cnt->allocino = mp->m_sb.sb_icount;
spin_unlock(&mp->m_sb_lock);
return 0;
}
@@ -692,14 +693,9 @@ xfs_reserve_blocks(
* what to do. This means that the amount of free space can
* change while we do this, so we need to retry if we end up
* trying to reserve more space than is available.
- *
- * We also use the xfs_mod_incore_sb() interface so that we
- * don't have to care about whether per cpu counter are
- * enabled, disabled or even compiled in....
*/
retry:
spin_lock(&mp->m_sb_lock);
- xfs_icsb_sync_counters_locked(mp, 0);
/*
* If our previous reservation was larger than the current value,
@@ -716,7 +712,8 @@ retry:
} else {
__int64_t free;
- free = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
+ free = percpu_counter_sum(&mp->m_fdblocks) -
+ XFS_ALLOC_SET_ASIDE(mp);
if (!free)
goto out; /* ENOSPC and fdblks_delta = 0 */
@@ -755,8 +752,7 @@ out:
* the extra reserve blocks from the reserve.....
*/
int error;
- error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
- fdblks_delta, 0);
+ error = xfs_mod_fdblocks(mp, fdblks_delta, 0);
if (error == -ENOSPC)
goto retry;
}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ded129d..8394f6f 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -117,24 +117,34 @@ xfs_ilock_attr_map_shared(
}
/*
- * The xfs inode contains 2 locks: a multi-reader lock called the
- * i_iolock and a multi-reader lock called the i_lock. This routine
- * allows either or both of the locks to be obtained.
+ * The xfs inode contains 3 multi-reader locks: the i_iolock the i_mmap_lock and
+ * the i_lock. This routine allows various combinations of the locks to be
+ * obtained.
*
- * The 2 locks should always be ordered so that the IO lock is
- * obtained first in order to prevent deadlock.
+ * The 3 locks should always be ordered so that the IO lock is obtained first,
+ * the mmap lock second and the ilock last in order to prevent deadlock.
*
- * ip -- the inode being locked
- * lock_flags -- this parameter indicates the inode's locks
- * to be locked. It can be:
- * XFS_IOLOCK_SHARED,
- * XFS_IOLOCK_EXCL,
- * XFS_ILOCK_SHARED,
- * XFS_ILOCK_EXCL,
- * XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED,
- * XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL,
- * XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED,
- * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL
+ * Basic locking order:
+ *
+ * i_iolock -> i_mmap_lock -> page_lock -> i_ilock
+ *
+ * mmap_sem locking order:
+ *
+ * i_iolock -> page lock -> mmap_sem
+ * mmap_sem -> i_mmap_lock -> page_lock
+ *
+ * The difference in mmap_sem locking order mean that we cannot hold the
+ * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
+ * fault in pages during copy in/out (for buffered IO) or require the mmap_sem
+ * in get_user_pages() to map the user pages into the kernel address space for
+ * direct IO. Similarly the i_iolock cannot be taken inside a page fault because
+ * page faults already hold the mmap_sem.
+ *
+ * Hence to serialise fully against both syscall and mmap based IO, we need to
+ * take both the i_iolock and the i_mmap_lock. These locks should *only* be both
+ * taken in places where we need to invalidate the page cache in a race
+ * free manner (e.g. truncate, hole punch and other extent manipulation
+ * functions).
*/
void
xfs_ilock(
@@ -150,6 +160,8 @@ xfs_ilock(
*/
ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
(XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
+ ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
+ (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -159,6 +171,11 @@ xfs_ilock(
else if (lock_flags & XFS_IOLOCK_SHARED)
mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
+ if (lock_flags & XFS_MMAPLOCK_EXCL)
+ mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
+ else if (lock_flags & XFS_MMAPLOCK_SHARED)
+ mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
+
if (lock_flags & XFS_ILOCK_EXCL)
mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
else if (lock_flags & XFS_ILOCK_SHARED)
@@ -191,6 +208,8 @@ xfs_ilock_nowait(
*/
ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
(XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
+ ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
+ (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -202,21 +221,35 @@ xfs_ilock_nowait(
if (!mrtryaccess(&ip->i_iolock))
goto out;
}
+
+ if (lock_flags & XFS_MMAPLOCK_EXCL) {
+ if (!mrtryupdate(&ip->i_mmaplock))
+ goto out_undo_iolock;
+ } else if (lock_flags & XFS_MMAPLOCK_SHARED) {
+ if (!mrtryaccess(&ip->i_mmaplock))
+ goto out_undo_iolock;
+ }
+
if (lock_flags & XFS_ILOCK_EXCL) {
if (!mrtryupdate(&ip->i_lock))
- goto out_undo_iolock;
+ goto out_undo_mmaplock;
} else if (lock_flags & XFS_ILOCK_SHARED) {
if (!mrtryaccess(&ip->i_lock))
- goto out_undo_iolock;
+ goto out_undo_mmaplock;
}
return 1;
- out_undo_iolock:
+out_undo_mmaplock:
+ if (lock_flags & XFS_MMAPLOCK_EXCL)
+ mrunlock_excl(&ip->i_mmaplock);
+ else if (lock_flags & XFS_MMAPLOCK_SHARED)
+ mrunlock_shared(&ip->i_mmaplock);
+out_undo_iolock:
if (lock_flags & XFS_IOLOCK_EXCL)
mrunlock_excl(&ip->i_iolock);
else if (lock_flags & XFS_IOLOCK_SHARED)
mrunlock_shared(&ip->i_iolock);
- out:
+out:
return 0;
}
@@ -244,6 +277,8 @@ xfs_iunlock(
*/
ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
(XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
+ ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
+ (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -254,6 +289,11 @@ xfs_iunlock(
else if (lock_flags & XFS_IOLOCK_SHARED)
mrunlock_shared(&ip->i_iolock);
+ if (lock_flags & XFS_MMAPLOCK_EXCL)
+ mrunlock_excl(&ip->i_mmaplock);
+ else if (lock_flags & XFS_MMAPLOCK_SHARED)
+ mrunlock_shared(&ip->i_mmaplock);
+
if (lock_flags & XFS_ILOCK_EXCL)
mrunlock_excl(&ip->i_lock);
else if (lock_flags & XFS_ILOCK_SHARED)
@@ -271,11 +311,14 @@ xfs_ilock_demote(
xfs_inode_t *ip,
uint lock_flags)
{
- ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL));
- ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
+ ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL));
+ ASSERT((lock_flags &
+ ~(XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
if (lock_flags & XFS_ILOCK_EXCL)
mrdemote(&ip->i_lock);
+ if (lock_flags & XFS_MMAPLOCK_EXCL)
+ mrdemote(&ip->i_mmaplock);
if (lock_flags & XFS_IOLOCK_EXCL)
mrdemote(&ip->i_iolock);
@@ -294,6 +337,12 @@ xfs_isilocked(
return rwsem_is_locked(&ip->i_lock.mr_lock);
}
+ if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) {
+ if (!(lock_flags & XFS_MMAPLOCK_SHARED))
+ return !!ip->i_mmaplock.mr_writer;
+ return rwsem_is_locked(&ip->i_mmaplock.mr_lock);
+ }
+
if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
if (!(lock_flags & XFS_IOLOCK_SHARED))
return !!ip->i_iolock.mr_writer;
@@ -314,14 +363,27 @@ int xfs_lock_delays;
#endif
/*
- * Bump the subclass so xfs_lock_inodes() acquires each lock with
- * a different value
+ * Bump the subclass so xfs_lock_inodes() acquires each lock with a different
+ * value. This shouldn't be called for page fault locking, but we also need to
+ * ensure we don't overrun the number of lockdep subclasses for the iolock or
+ * mmaplock as that is limited to 12 by the mmap lock lockdep annotations.
*/
static inline int
xfs_lock_inumorder(int lock_mode, int subclass)
{
- if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
+ if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
+ ASSERT(subclass + XFS_LOCK_INUMORDER <
+ (1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT)));
lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
+ }
+
+ if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
+ ASSERT(subclass + XFS_LOCK_INUMORDER <
+ (1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT)));
+ lock_mode |= (subclass + XFS_LOCK_INUMORDER) <<
+ XFS_MMAPLOCK_SHIFT;
+ }
+
if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
@@ -427,10 +489,10 @@ again:
}
/*
- * xfs_lock_two_inodes() can only be used to lock one type of lock
- * at a time - the iolock or the ilock, but not both at once. If
- * we lock both at once, lockdep will report false positives saying
- * we have violated locking orders.
+ * xfs_lock_two_inodes() can only be used to lock one type of lock at a time -
+ * the iolock, the mmaplock or the ilock, but not more than one at a time. If we
+ * lock more than one at a time, lockdep will report false positives saying we
+ * have violated locking orders.
*/
void
xfs_lock_two_inodes(
@@ -442,8 +504,12 @@ xfs_lock_two_inodes(
int attempts = 0;
xfs_log_item_t *lp;
- if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
- ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0);
+ if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
+ ASSERT(!(lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
+ ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
+ } else if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))
+ ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
+
ASSERT(ip0->i_ino != ip1->i_ino);
if (ip0->i_ino > ip1->i_ino) {
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 8e82b41..8f22d20 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -56,6 +56,7 @@ typedef struct xfs_inode {
struct xfs_inode_log_item *i_itemp; /* logging information */
mrlock_t i_lock; /* inode lock */
mrlock_t i_iolock; /* inode IO lock */
+ mrlock_t i_mmaplock; /* inode mmap IO lock */
atomic_t i_pincount; /* inode pin count */
spinlock_t i_flags_lock; /* inode i_flags lock */
/* Miscellaneous state. */
@@ -263,15 +264,20 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
#define XFS_IOLOCK_SHARED (1<<1)
#define XFS_ILOCK_EXCL (1<<2)
#define XFS_ILOCK_SHARED (1<<3)
+#define XFS_MMAPLOCK_EXCL (1<<4)
+#define XFS_MMAPLOCK_SHARED (1<<5)
#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
- | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
+ | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \
+ | XFS_MMAPLOCK_EXCL | XFS_MMAPLOCK_SHARED)
#define XFS_LOCK_FLAGS \
{ XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \
{ XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \
{ XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \
- { XFS_ILOCK_SHARED, "ILOCK_SHARED" }
+ { XFS_ILOCK_SHARED, "ILOCK_SHARED" }, \
+ { XFS_MMAPLOCK_EXCL, "MMAPLOCK_EXCL" }, \
+ { XFS_MMAPLOCK_SHARED, "MMAPLOCK_SHARED" }
/*
@@ -302,17 +308,26 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
#define XFS_IOLOCK_SHIFT 16
#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
+#define XFS_MMAPLOCK_SHIFT 20
+
#define XFS_ILOCK_SHIFT 24
#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
-#define XFS_IOLOCK_DEP_MASK 0x00ff0000
+#define XFS_IOLOCK_DEP_MASK 0x000f0000
+#define XFS_MMAPLOCK_DEP_MASK 0x00f00000
#define XFS_ILOCK_DEP_MASK 0xff000000
-#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | XFS_ILOCK_DEP_MASK)
+#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | \
+ XFS_MMAPLOCK_DEP_MASK | \
+ XFS_ILOCK_DEP_MASK)
-#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT)
-#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT)
+#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) \
+ >> XFS_IOLOCK_SHIFT)
+#define XFS_MMAPLOCK_DEP(flags) (((flags) & XFS_MMAPLOCK_DEP_MASK) \
+ >> XFS_MMAPLOCK_SHIFT)
+#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) \
+ >> XFS_ILOCK_SHIFT)
/*
* For multiple groups support: if S_ISGID bit is set in the parent
@@ -384,10 +399,11 @@ enum xfs_prealloc_flags {
XFS_PREALLOC_INVISIBLE = (1 << 4),
};
-int xfs_update_prealloc_flags(struct xfs_inode *,
- enum xfs_prealloc_flags);
-int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
-int xfs_iozero(struct xfs_inode *, loff_t, size_t);
+int xfs_update_prealloc_flags(struct xfs_inode *ip,
+ enum xfs_prealloc_flags flags);
+int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
+ xfs_fsize_t isize, bool *did_zeroing);
+int xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count);
/* from xfs_iops.c */
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index ac4feae..4ee44dd 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -631,7 +631,7 @@ xfs_ioc_space(
if (filp->f_flags & O_DSYNC)
flags |= XFS_PREALLOC_SYNC;
- if (ioflags & XFS_IO_INVIS)
+ if (ioflags & XFS_IO_INVIS)
flags |= XFS_PREALLOC_INVISIBLE;
error = mnt_want_write_file(filp);
@@ -643,6 +643,9 @@ xfs_ioc_space(
if (error)
goto out_unlock;
+ xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+ iolock |= XFS_MMAPLOCK_EXCL;
+
switch (bf->l_whence) {
case 0: /*SEEK_SET*/
break;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index ccb1dd0..38e633b 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -460,8 +460,7 @@ xfs_iomap_prealloc_size(
alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN),
alloc_blocks);
- xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
- freesp = mp->m_sb.sb_fdblocks;
+ freesp = percpu_counter_read_positive(&mp->m_fdblocks);
if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) {
shift = 2;
if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT])
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 9bcad71..015d6a3 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -756,6 +756,7 @@ xfs_setattr_size(
int error;
uint lock_flags = 0;
uint commit_flags = 0;
+ bool did_zeroing = false;
trace_xfs_setattr(ip);
@@ -770,6 +771,7 @@ xfs_setattr_size(
return error;
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+ ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
ASSERT(S_ISREG(ip->i_d.di_mode));
ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
@@ -799,20 +801,16 @@ xfs_setattr_size(
return error;
/*
- * Now we can make the changes. Before we join the inode to the
- * transaction, take care of the part of the truncation that must be
- * done without the inode lock. This needs to be done before joining
- * the inode to the transaction, because the inode cannot be unlocked
- * once it is a part of the transaction.
+ * File data changes must be complete before we start the transaction to
+ * modify the inode. This needs to be done before joining the inode to
+ * the transaction because the inode cannot be unlocked once it is a
+ * part of the transaction.
+ *
+ * Start with zeroing any data block beyond EOF that we may expose on
+ * file extension.
*/
if (newsize > oldsize) {
- /*
- * Do the first part of growing a file: zero any data in the
- * last block that is beyond the old EOF. We need to do this
- * before the inode is joined to the transaction to modify
- * i_size.
- */
- error = xfs_zero_eof(ip, newsize, oldsize);
+ error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing);
if (error)
return error;
}
@@ -822,75 +820,42 @@ xfs_setattr_size(
* any previous writes that are beyond the on disk EOF and the new
* EOF that have not been written out need to be written here. If we
* do not write the data out, we expose ourselves to the null files
- * problem.
- *
- * Only flush from the on disk size to the smaller of the in memory
- * file size or the new size as that's the range we really care about
- * here and prevents waiting for other data not within the range we
- * care about here.
+ * problem. Note that this includes any block zeroing we did above;
+ * otherwise those blocks may not be zeroed after a crash.
*/
- if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) {
+ if (newsize > ip->i_d.di_size &&
+ (oldsize != ip->i_d.di_size || did_zeroing)) {
error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
ip->i_d.di_size, newsize);
if (error)
return error;
}
- /*
- * Wait for all direct I/O to complete.
- */
+ /* Now wait for all direct I/O to complete. */
inode_dio_wait(inode);
/*
- * Do all the page cache truncate work outside the transaction context
- * as the "lock" order is page lock->log space reservation. i.e.
- * locking pages inside the transaction can ABBA deadlock with
- * writeback. We have to do the VFS inode size update before we truncate
- * the pagecache, however, to avoid racing with page faults beyond the
- * new EOF they are not serialised against truncate operations except by
- * page locks and size updates.
+ * We've already locked out new page faults, so now we can safely remove
+ * pages from the page cache knowing they won't get refaulted until we
+ * drop the XFS_MMAP_EXCL lock after the extent manipulations are
+ * complete. The truncate_setsize() call also cleans partial EOF page
+ * PTEs on extending truncates and hence ensures sub-page block size
+ * filesystems are correctly handled, too.
*
- * Hence we are in a situation where a truncate can fail with ENOMEM
- * from xfs_trans_reserve(), but having already truncated the in-memory
- * version of the file (i.e. made user visible changes). There's not
- * much we can do about this, except to hope that the caller sees ENOMEM
- * and retries the truncate operation.
+ * We have to do all the page cache truncate work outside the
+ * transaction context as the "lock" order is page lock->log space
+ * reservation as defined by extent allocation in the writeback path.
+ * Hence a truncate can fail with ENOMEM from xfs_trans_reserve(), but
+ * having already truncated the in-memory version of the file (i.e. made
+ * user visible changes). There's not much we can do about this, except
+ * to hope that the caller sees ENOMEM and retries the truncate
+ * operation.
*/
error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
if (error)
return error;
truncate_setsize(inode, newsize);
- /*
- * The "we can't serialise against page faults" pain gets worse.
- *
- * If the file is mapped then we have to clean the page at the old EOF
- * when extending the file. Extending the file can expose changes the
- * underlying page mapping (e.g. from beyond EOF to a hole or
- * unwritten), and so on the next attempt to write to that page we need
- * to remap it for write. i.e. we need .page_mkwrite() to be called.
- * Hence we need to clean the page to clean the pte and so a new write
- * fault will be triggered appropriately.
- *
- * If we do it before we change the inode size, then we can race with a
- * page fault that maps the page with exactly the same problem. If we do
- * it after we change the file size, then a new page fault can come in
- * and allocate space before we've run the rest of the truncate
- * transaction. That's kinda grotesque, but it's better than have data
- * over a hole, and so that's the lesser evil that has been chosen here.
- *
- * The real solution, however, is to have some mechanism for locking out
- * page faults while a truncate is in progress.
- */
- if (newsize > oldsize && mapping_mapped(VFS_I(ip)->i_mapping)) {
- error = filemap_write_and_wait_range(
- VFS_I(ip)->i_mapping,
- round_down(oldsize, PAGE_CACHE_SIZE),
- round_up(oldsize, PAGE_CACHE_SIZE) - 1);
- if (error)
- return error;
- }
-
tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
if (error)
@@ -989,8 +954,12 @@ xfs_vn_setattr(
xfs_ilock(ip, iolock);
error = xfs_break_layouts(dentry->d_inode, &iolock);
- if (!error)
+ if (!error) {
+ xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+ iolock |= XFS_MMAPLOCK_EXCL;
+
error = xfs_setattr_size(ip, iattr);
+ }
xfs_iunlock(ip, iolock);
} else {
error = xfs_setattr_nonsize(ip, iattr, 0);
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index c31d2c2..7c7842c 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -116,15 +116,6 @@ typedef __uint64_t __psunsigned_t;
#undef XFS_NATIVE_HOST
#endif
-/*
- * Feature macros (disable/enable)
- */
-#ifdef CONFIG_SMP
-#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
-#else
-#undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
-#endif
-
#define irix_sgid_inherit xfs_params.sgid_inherit.val
#define irix_symlink_mode xfs_params.symlink_mode.val
#define xfs_panic_mask xfs_params.panic_mask.val
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index a5a945f..4f5784f 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -4463,10 +4463,10 @@ xlog_do_recover(
xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC);
ASSERT(xfs_sb_good_version(sbp));
+ xfs_reinit_percpu_counters(log->l_mp);
+
xfs_buf_relse(bp);
- /* We've re-read the superblock so re-initialize per-cpu counters */
- xfs_icsb_reinit_counters(log->l_mp);
xlog_recover_check_summary(log);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 4fa80e6..2ce7ee3 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -43,18 +43,6 @@
#include "xfs_sysfs.h"
-#ifdef HAVE_PERCPU_SB
-STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
- int);
-STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
- int);
-STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
-#else
-
-#define xfs_icsb_balance_counter(mp, a, b) do { } while (0)
-#define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0)
-#endif
-
static DEFINE_MUTEX(xfs_uuid_table_mutex);
static int xfs_uuid_table_size;
static uuid_t *xfs_uuid_table;
@@ -347,8 +335,7 @@ reread:
goto reread;
}
- /* Initialize per-cpu counters */
- xfs_icsb_reinit_counters(mp);
+ xfs_reinit_percpu_counters(mp);
/* no need to be quiet anymore, so reset the buf ops */
bp->b_ops = &xfs_sb_buf_ops;
@@ -1087,8 +1074,6 @@ xfs_log_sbcount(xfs_mount_t *mp)
if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
return 0;
- xfs_icsb_sync_counters(mp, 0);
-
/*
* we don't need to do this if we are updating the superblock
* counters on every modification.
@@ -1099,253 +1084,136 @@ xfs_log_sbcount(xfs_mount_t *mp)
return xfs_sync_sb(mp, true);
}
-/*
- * xfs_mod_incore_sb_unlocked() is a utility routine commonly used to apply
- * a delta to a specified field in the in-core superblock. Simply
- * switch on the field indicated and apply the delta to that field.
- * Fields are not allowed to dip below zero, so if the delta would
- * do this do not apply it and return EINVAL.
- *
- * The m_sb_lock must be held when this routine is called.
- */
-STATIC int
-xfs_mod_incore_sb_unlocked(
- xfs_mount_t *mp,
- xfs_sb_field_t field,
- int64_t delta,
- int rsvd)
+int
+xfs_mod_icount(
+ struct xfs_mount *mp,
+ int64_t delta)
{
- int scounter; /* short counter for 32 bit fields */
- long long lcounter; /* long counter for 64 bit fields */
- long long res_used, rem;
-
- /*
- * With the in-core superblock spin lock held, switch
- * on the indicated field. Apply the delta to the
- * proper field. If the fields value would dip below
- * 0, then do not apply the delta and return EINVAL.
- */
- switch (field) {
- case XFS_SBS_ICOUNT:
- lcounter = (long long)mp->m_sb.sb_icount;
- lcounter += delta;
- if (lcounter < 0) {
- ASSERT(0);
- return -EINVAL;
- }
- mp->m_sb.sb_icount = lcounter;
- return 0;
- case XFS_SBS_IFREE:
- lcounter = (long long)mp->m_sb.sb_ifree;
- lcounter += delta;
- if (lcounter < 0) {
- ASSERT(0);
- return -EINVAL;
- }
- mp->m_sb.sb_ifree = lcounter;
- return 0;
- case XFS_SBS_FDBLOCKS:
- lcounter = (long long)
- mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
- res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
-
- if (delta > 0) { /* Putting blocks back */
- if (res_used > delta) {
- mp->m_resblks_avail += delta;
- } else {
- rem = delta - res_used;
- mp->m_resblks_avail = mp->m_resblks;
- lcounter += rem;
- }
- } else { /* Taking blocks away */
- lcounter += delta;
- if (lcounter >= 0) {
- mp->m_sb.sb_fdblocks = lcounter +
- XFS_ALLOC_SET_ASIDE(mp);
- return 0;
- }
-
- /*
- * We are out of blocks, use any available reserved
- * blocks if were allowed to.
- */
- if (!rsvd)
- return -ENOSPC;
-
- lcounter = (long long)mp->m_resblks_avail + delta;
- if (lcounter >= 0) {
- mp->m_resblks_avail = lcounter;
- return 0;
- }
- printk_once(KERN_WARNING
- "Filesystem \"%s\": reserve blocks depleted! "
- "Consider increasing reserve pool size.",
- mp->m_fsname);
- return -ENOSPC;
- }
-
- mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
- return 0;
- case XFS_SBS_FREXTENTS:
- lcounter = (long long)mp->m_sb.sb_frextents;
- lcounter += delta;
- if (lcounter < 0) {
- return -ENOSPC;
- }
- mp->m_sb.sb_frextents = lcounter;
- return 0;
- case XFS_SBS_DBLOCKS:
- lcounter = (long long)mp->m_sb.sb_dblocks;
- lcounter += delta;
- if (lcounter < 0) {
- ASSERT(0);
- return -EINVAL;
- }
- mp->m_sb.sb_dblocks = lcounter;
- return 0;
- case XFS_SBS_AGCOUNT:
- scounter = mp->m_sb.sb_agcount;
- scounter += delta;
- if (scounter < 0) {
- ASSERT(0);
- return -EINVAL;
- }
- mp->m_sb.sb_agcount = scounter;
- return 0;
- case XFS_SBS_IMAX_PCT:
- scounter = mp->m_sb.sb_imax_pct;
- scounter += delta;
- if (scounter < 0) {
- ASSERT(0);
- return -EINVAL;
- }
- mp->m_sb.sb_imax_pct = scounter;
- return 0;
- case XFS_SBS_REXTSIZE:
- scounter = mp->m_sb.sb_rextsize;
- scounter += delta;
- if (scounter < 0) {
- ASSERT(0);
- return -EINVAL;
- }
- mp->m_sb.sb_rextsize = scounter;
- return 0;
- case XFS_SBS_RBMBLOCKS:
- scounter = mp->m_sb.sb_rbmblocks;
- scounter += delta;
- if (scounter < 0) {
- ASSERT(0);
- return -EINVAL;
- }
- mp->m_sb.sb_rbmblocks = scounter;
- return 0;
- case XFS_SBS_RBLOCKS:
- lcounter = (long long)mp->m_sb.sb_rblocks;
- lcounter += delta;
- if (lcounter < 0) {
- ASSERT(0);
- return -EINVAL;
- }
- mp->m_sb.sb_rblocks = lcounter;
- return 0;
- case XFS_SBS_REXTENTS:
- lcounter = (long long)mp->m_sb.sb_rextents;
- lcounter += delta;
- if (lcounter < 0) {
- ASSERT(0);
- return -EINVAL;
- }
- mp->m_sb.sb_rextents = lcounter;
- return 0;
- case XFS_SBS_REXTSLOG:
- scounter = mp->m_sb.sb_rextslog;
- scounter += delta;
- if (scounter < 0) {
- ASSERT(0);
- return -EINVAL;
- }
- mp->m_sb.sb_rextslog = scounter;
- return 0;
- default:
+ /* deltas are +/-64, hence the large batch size of 128. */
+ __percpu_counter_add(&mp->m_icount, delta, 128);
+ if (percpu_counter_compare(&mp->m_icount, 0) < 0) {
ASSERT(0);
+ percpu_counter_add(&mp->m_icount, -delta);
return -EINVAL;
}
+ return 0;
}
-/*
- * xfs_mod_incore_sb() is used to change a field in the in-core
- * superblock structure by the specified delta. This modification
- * is protected by the m_sb_lock. Just use the xfs_mod_incore_sb_unlocked()
- * routine to do the work.
- */
int
-xfs_mod_incore_sb(
+xfs_mod_ifree(
struct xfs_mount *mp,
- xfs_sb_field_t field,
- int64_t delta,
- int rsvd)
+ int64_t delta)
{
- int status;
-
-#ifdef HAVE_PERCPU_SB
- ASSERT(field < XFS_SBS_ICOUNT || field > XFS_SBS_FDBLOCKS);
-#endif
- spin_lock(&mp->m_sb_lock);
- status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
- spin_unlock(&mp->m_sb_lock);
-
- return status;
+ percpu_counter_add(&mp->m_ifree, delta);
+ if (percpu_counter_compare(&mp->m_ifree, 0) < 0) {
+ ASSERT(0);
+ percpu_counter_add(&mp->m_ifree, -delta);
+ return -EINVAL;
+ }
+ return 0;
}
-/*
- * Change more than one field in the in-core superblock structure at a time.
- *
- * The fields and changes to those fields are specified in the array of
- * xfs_mod_sb structures passed in. Either all of the specified deltas
- * will be applied or none of them will. If any modified field dips below 0,
- * then all modifications will be backed out and EINVAL will be returned.
- *
- * Note that this function may not be used for the superblock values that
- * are tracked with the in-memory per-cpu counters - a direct call to
- * xfs_icsb_modify_counters is required for these.
- */
int
-xfs_mod_incore_sb_batch(
+xfs_mod_fdblocks(
struct xfs_mount *mp,
- xfs_mod_sb_t *msb,
- uint nmsb,
- int rsvd)
+ int64_t delta,
+ bool rsvd)
{
- xfs_mod_sb_t *msbp;
- int error = 0;
+ int64_t lcounter;
+ long long res_used;
+ s32 batch;
+
+ if (delta > 0) {
+ /*
+ * If the reserve pool is depleted, put blocks back into it
+ * first. Most of the time the pool is full.
+ */
+ if (likely(mp->m_resblks == mp->m_resblks_avail)) {
+ percpu_counter_add(&mp->m_fdblocks, delta);
+ return 0;
+ }
+
+ spin_lock(&mp->m_sb_lock);
+ res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
+
+ if (res_used > delta) {
+ mp->m_resblks_avail += delta;
+ } else {
+ delta -= res_used;
+ mp->m_resblks_avail = mp->m_resblks;
+ percpu_counter_add(&mp->m_fdblocks, delta);
+ }
+ spin_unlock(&mp->m_sb_lock);
+ return 0;
+ }
/*
- * Loop through the array of mod structures and apply each individually.
- * If any fail, then back out all those which have already been applied.
- * Do all of this within the scope of the m_sb_lock so that all of the
- * changes will be atomic.
+ * Taking blocks away, need to be more accurate the closer we
+ * are to zero.
+ *
+ * batch size is set to a maximum of 1024 blocks - if we are
+ * allocating of freeing extents larger than this then we aren't
+ * going to be hammering the counter lock so a lock per update
+ * is not a problem.
+ *
+ * If the counter has a value of less than 2 * max batch size,
+ * then make everything serialise as we are real close to
+ * ENOSPC.
+ */
+#define __BATCH 1024
+ if (percpu_counter_compare(&mp->m_fdblocks, 2 * __BATCH) < 0)
+ batch = 1;
+ else
+ batch = __BATCH;
+#undef __BATCH
+
+ __percpu_counter_add(&mp->m_fdblocks, delta, batch);
+ if (percpu_counter_compare(&mp->m_fdblocks,
+ XFS_ALLOC_SET_ASIDE(mp)) >= 0) {
+ /* we had space! */
+ return 0;
+ }
+
+ /*
+ * lock up the sb for dipping into reserves before releasing the space
+ * that took us to ENOSPC.
*/
spin_lock(&mp->m_sb_lock);
- for (msbp = msb; msbp < (msb + nmsb); msbp++) {
- ASSERT(msbp->msb_field < XFS_SBS_ICOUNT ||
- msbp->msb_field > XFS_SBS_FDBLOCKS);
+ percpu_counter_add(&mp->m_fdblocks, -delta);
+ if (!rsvd)
+ goto fdblocks_enospc;
- error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field,
- msbp->msb_delta, rsvd);
- if (error)
- goto unwind;
+ lcounter = (long long)mp->m_resblks_avail + delta;
+ if (lcounter >= 0) {
+ mp->m_resblks_avail = lcounter;
+ spin_unlock(&mp->m_sb_lock);
+ return 0;
}
+ printk_once(KERN_WARNING
+ "Filesystem \"%s\": reserve blocks depleted! "
+ "Consider increasing reserve pool size.",
+ mp->m_fsname);
+fdblocks_enospc:
spin_unlock(&mp->m_sb_lock);
- return 0;
+ return -ENOSPC;
+}
-unwind:
- while (--msbp >= msb) {
- error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field,
- -msbp->msb_delta, rsvd);
- ASSERT(error == 0);
- }
+int
+xfs_mod_frextents(
+ struct xfs_mount *mp,
+ int64_t delta)
+{
+ int64_t lcounter;
+ int ret = 0;
+
+ spin_lock(&mp->m_sb_lock);
+ lcounter = mp->m_sb.sb_frextents + delta;
+ if (lcounter < 0)
+ ret = -ENOSPC;
+ else
+ mp->m_sb.sb_frextents = lcounter;
spin_unlock(&mp->m_sb_lock);
- return error;
+ return ret;
}
/*
@@ -1407,573 +1275,3 @@ xfs_dev_is_read_only(
}
return 0;
}
-
-#ifdef HAVE_PERCPU_SB
-/*
- * Per-cpu incore superblock counters
- *
- * Simple concept, difficult implementation
- *
- * Basically, replace the incore superblock counters with a distributed per cpu
- * counter for contended fields (e.g. free block count).
- *
- * Difficulties arise in that the incore sb is used for ENOSPC checking, and
- * hence needs to be accurately read when we are running low on space. Hence
- * there is a method to enable and disable the per-cpu counters based on how
- * much "stuff" is available in them.
- *
- * Basically, a counter is enabled if there is enough free resource to justify
- * running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local
- * ENOSPC), then we disable the counters to synchronise all callers and
- * re-distribute the available resources.
- *
- * If, once we redistributed the available resources, we still get a failure,
- * we disable the per-cpu counter and go through the slow path.
- *
- * The slow path is the current xfs_mod_incore_sb() function. This means that
- * when we disable a per-cpu counter, we need to drain its resources back to
- * the global superblock. We do this after disabling the counter to prevent
- * more threads from queueing up on the counter.
- *
- * Essentially, this means that we still need a lock in the fast path to enable
- * synchronisation between the global counters and the per-cpu counters. This
- * is not a problem because the lock will be local to a CPU almost all the time
- * and have little contention except when we get to ENOSPC conditions.
- *
- * Basically, this lock becomes a barrier that enables us to lock out the fast
- * path while we do things like enabling and disabling counters and
- * synchronising the counters.
- *
- * Locking rules:
- *
- * 1. m_sb_lock before picking up per-cpu locks
- * 2. per-cpu locks always picked up via for_each_online_cpu() order
- * 3. accurate counter sync requires m_sb_lock + per cpu locks
- * 4. modifying per-cpu counters requires holding per-cpu lock
- * 5. modifying global counters requires holding m_sb_lock
- * 6. enabling or disabling a counter requires holding the m_sb_lock
- * and _none_ of the per-cpu locks.
- *
- * Disabled counters are only ever re-enabled by a balance operation
- * that results in more free resources per CPU than a given threshold.
- * To ensure counters don't remain disabled, they are rebalanced when
- * the global resource goes above a higher threshold (i.e. some hysteresis
- * is present to prevent thrashing).
- */
-
-#ifdef CONFIG_HOTPLUG_CPU
-/*
- * hot-plug CPU notifier support.
- *
- * We need a notifier per filesystem as we need to be able to identify
- * the filesystem to balance the counters out. This is achieved by
- * having a notifier block embedded in the xfs_mount_t and doing pointer
- * magic to get the mount pointer from the notifier block address.
- */
-STATIC int
-xfs_icsb_cpu_notify(
- struct notifier_block *nfb,
- unsigned long action,
- void *hcpu)
-{
- xfs_icsb_cnts_t *cntp;
- xfs_mount_t *mp;
-
- mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier);
- cntp = (xfs_icsb_cnts_t *)
- per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu);
- switch (action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- /* Easy Case - initialize the area and locks, and
- * then rebalance when online does everything else for us. */
- memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
- break;
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- xfs_icsb_lock(mp);
- xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
- xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
- xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
- xfs_icsb_unlock(mp);
- break;
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- /* Disable all the counters, then fold the dead cpu's
- * count into the total on the global superblock and
- * re-enable the counters. */
- xfs_icsb_lock(mp);
- spin_lock(&mp->m_sb_lock);
- xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT);
- xfs_icsb_disable_counter(mp, XFS_SBS_IFREE);
- xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS);
-
- mp->m_sb.sb_icount += cntp->icsb_icount;
- mp->m_sb.sb_ifree += cntp->icsb_ifree;
- mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks;
-
- memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
-
- xfs_icsb_balance_counter_locked(mp, XFS_SBS_ICOUNT, 0);
- xfs_icsb_balance_counter_locked(mp, XFS_SBS_IFREE, 0);
- xfs_icsb_balance_counter_locked(mp, XFS_SBS_FDBLOCKS, 0);
- spin_unlock(&mp->m_sb_lock);
- xfs_icsb_unlock(mp);
- break;
- }
-
- return NOTIFY_OK;
-}
-#endif /* CONFIG_HOTPLUG_CPU */
-
-int
-xfs_icsb_init_counters(
- xfs_mount_t *mp)
-{
- xfs_icsb_cnts_t *cntp;
- int i;
-
- mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t);
- if (mp->m_sb_cnts == NULL)
- return -ENOMEM;
-
- for_each_online_cpu(i) {
- cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
- memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
- }
-
- mutex_init(&mp->m_icsb_mutex);
-
- /*
- * start with all counters disabled so that the
- * initial balance kicks us off correctly
- */
- mp->m_icsb_counters = -1;
-
-#ifdef CONFIG_HOTPLUG_CPU
- mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify;
- mp->m_icsb_notifier.priority = 0;
- register_hotcpu_notifier(&mp->m_icsb_notifier);
-#endif /* CONFIG_HOTPLUG_CPU */
-
- return 0;
-}
-
-void
-xfs_icsb_reinit_counters(
- xfs_mount_t *mp)
-{
- xfs_icsb_lock(mp);
- /*
- * start with all counters disabled so that the
- * initial balance kicks us off correctly
- */
- mp->m_icsb_counters = -1;
- xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
- xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
- xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
- xfs_icsb_unlock(mp);
-}
-
-void
-xfs_icsb_destroy_counters(
- xfs_mount_t *mp)
-{
- if (mp->m_sb_cnts) {
- unregister_hotcpu_notifier(&mp->m_icsb_notifier);
- free_percpu(mp->m_sb_cnts);
- }
- mutex_destroy(&mp->m_icsb_mutex);
-}
-
-STATIC void
-xfs_icsb_lock_cntr(
- xfs_icsb_cnts_t *icsbp)
-{
- while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) {
- ndelay(1000);
- }
-}
-
-STATIC void
-xfs_icsb_unlock_cntr(
- xfs_icsb_cnts_t *icsbp)
-{
- clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags);
-}
-
-
-STATIC void
-xfs_icsb_lock_all_counters(
- xfs_mount_t *mp)
-{
- xfs_icsb_cnts_t *cntp;
- int i;
-
- for_each_online_cpu(i) {
- cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
- xfs_icsb_lock_cntr(cntp);
- }
-}
-
-STATIC void
-xfs_icsb_unlock_all_counters(
- xfs_mount_t *mp)
-{
- xfs_icsb_cnts_t *cntp;
- int i;
-
- for_each_online_cpu(i) {
- cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
- xfs_icsb_unlock_cntr(cntp);
- }
-}
-
-STATIC void
-xfs_icsb_count(
- xfs_mount_t *mp,
- xfs_icsb_cnts_t *cnt,
- int flags)
-{
- xfs_icsb_cnts_t *cntp;
- int i;
-
- memset(cnt, 0, sizeof(xfs_icsb_cnts_t));
-
- if (!(flags & XFS_ICSB_LAZY_COUNT))
- xfs_icsb_lock_all_counters(mp);
-
- for_each_online_cpu(i) {
- cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
- cnt->icsb_icount += cntp->icsb_icount;
- cnt->icsb_ifree += cntp->icsb_ifree;
- cnt->icsb_fdblocks += cntp->icsb_fdblocks;
- }
-
- if (!(flags & XFS_ICSB_LAZY_COUNT))
- xfs_icsb_unlock_all_counters(mp);
-}
-
-STATIC int
-xfs_icsb_counter_disabled(
- xfs_mount_t *mp,
- xfs_sb_field_t field)
-{
- ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
- return test_bit(field, &mp->m_icsb_counters);
-}
-
-STATIC void
-xfs_icsb_disable_counter(
- xfs_mount_t *mp,
- xfs_sb_field_t field)
-{
- xfs_icsb_cnts_t cnt;
-
- ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
-
- /*
- * If we are already disabled, then there is nothing to do
- * here. We check before locking all the counters to avoid
- * the expensive lock operation when being called in the
- * slow path and the counter is already disabled. This is
- * safe because the only time we set or clear this state is under
- * the m_icsb_mutex.
- */
- if (xfs_icsb_counter_disabled(mp, field))
- return;
-
- xfs_icsb_lock_all_counters(mp);
- if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
- /* drain back to superblock */
-
- xfs_icsb_count(mp, &cnt, XFS_ICSB_LAZY_COUNT);
- switch(field) {
- case XFS_SBS_ICOUNT:
- mp->m_sb.sb_icount = cnt.icsb_icount;
- break;
- case XFS_SBS_IFREE:
- mp->m_sb.sb_ifree = cnt.icsb_ifree;
- break;
- case XFS_SBS_FDBLOCKS:
- mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
- break;
- default:
- BUG();
- }
- }
-
- xfs_icsb_unlock_all_counters(mp);
-}
-
-STATIC void
-xfs_icsb_enable_counter(
- xfs_mount_t *mp,
- xfs_sb_field_t field,
- uint64_t count,
- uint64_t resid)
-{
- xfs_icsb_cnts_t *cntp;
- int i;
-
- ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
-
- xfs_icsb_lock_all_counters(mp);
- for_each_online_cpu(i) {
- cntp = per_cpu_ptr(mp->m_sb_cnts, i);
- switch (field) {
- case XFS_SBS_ICOUNT:
- cntp->icsb_icount = count + resid;
- break;
- case XFS_SBS_IFREE:
- cntp->icsb_ifree = count + resid;
- break;
- case XFS_SBS_FDBLOCKS:
- cntp->icsb_fdblocks = count + resid;
- break;
- default:
- BUG();
- break;
- }
- resid = 0;
- }
- clear_bit(field, &mp->m_icsb_counters);
- xfs_icsb_unlock_all_counters(mp);
-}
-
-void
-xfs_icsb_sync_counters_locked(
- xfs_mount_t *mp,
- int flags)
-{
- xfs_icsb_cnts_t cnt;
-
- xfs_icsb_count(mp, &cnt, flags);
-
- if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT))
- mp->m_sb.sb_icount = cnt.icsb_icount;
- if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE))
- mp->m_sb.sb_ifree = cnt.icsb_ifree;
- if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS))
- mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
-}
-
-/*
- * Accurate update of per-cpu counters to incore superblock
- */
-void
-xfs_icsb_sync_counters(
- xfs_mount_t *mp,
- int flags)
-{
- spin_lock(&mp->m_sb_lock);
- xfs_icsb_sync_counters_locked(mp, flags);
- spin_unlock(&mp->m_sb_lock);
-}
-
-/*
- * Balance and enable/disable counters as necessary.
- *
- * Thresholds for re-enabling counters are somewhat magic. inode counts are
- * chosen to be the same number as single on disk allocation chunk per CPU, and
- * free blocks is something far enough zero that we aren't going thrash when we
- * get near ENOSPC. We also need to supply a minimum we require per cpu to
- * prevent looping endlessly when xfs_alloc_space asks for more than will
- * be distributed to a single CPU but each CPU has enough blocks to be
- * reenabled.
- *
- * Note that we can be called when counters are already disabled.
- * xfs_icsb_disable_counter() optimises the counter locking in this case to
- * prevent locking every per-cpu counter needlessly.
- */
-
-#define XFS_ICSB_INO_CNTR_REENABLE (uint64_t)64
-#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \
- (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp))
-STATIC void
-xfs_icsb_balance_counter_locked(
- xfs_mount_t *mp,
- xfs_sb_field_t field,
- int min_per_cpu)
-{
- uint64_t count, resid;
- int weight = num_online_cpus();
- uint64_t min = (uint64_t)min_per_cpu;
-
- /* disable counter and sync counter */
- xfs_icsb_disable_counter(mp, field);
-
- /* update counters - first CPU gets residual*/
- switch (field) {
- case XFS_SBS_ICOUNT:
- count = mp->m_sb.sb_icount;
- resid = do_div(count, weight);
- if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
- return;
- break;
- case XFS_SBS_IFREE:
- count = mp->m_sb.sb_ifree;
- resid = do_div(count, weight);
- if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
- return;
- break;
- case XFS_SBS_FDBLOCKS:
- count = mp->m_sb.sb_fdblocks;
- resid = do_div(count, weight);
- if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp)))
- return;
- break;
- default:
- BUG();
- count = resid = 0; /* quiet, gcc */
- break;
- }
-
- xfs_icsb_enable_counter(mp, field, count, resid);
-}
-
-STATIC void
-xfs_icsb_balance_counter(
- xfs_mount_t *mp,
- xfs_sb_field_t fields,
- int min_per_cpu)
-{
- spin_lock(&mp->m_sb_lock);
- xfs_icsb_balance_counter_locked(mp, fields, min_per_cpu);
- spin_unlock(&mp->m_sb_lock);
-}
-
-int
-xfs_icsb_modify_counters(
- xfs_mount_t *mp,
- xfs_sb_field_t field,
- int64_t delta,
- int rsvd)
-{
- xfs_icsb_cnts_t *icsbp;
- long long lcounter; /* long counter for 64 bit fields */
- int ret = 0;
-
- might_sleep();
-again:
- preempt_disable();
- icsbp = this_cpu_ptr(mp->m_sb_cnts);
-
- /*
- * if the counter is disabled, go to slow path
- */
- if (unlikely(xfs_icsb_counter_disabled(mp, field)))
- goto slow_path;
- xfs_icsb_lock_cntr(icsbp);
- if (unlikely(xfs_icsb_counter_disabled(mp, field))) {
- xfs_icsb_unlock_cntr(icsbp);
- goto slow_path;
- }
-
- switch (field) {
- case XFS_SBS_ICOUNT:
- lcounter = icsbp->icsb_icount;
- lcounter += delta;
- if (unlikely(lcounter < 0))
- goto balance_counter;
- icsbp->icsb_icount = lcounter;
- break;
-
- case XFS_SBS_IFREE:
- lcounter = icsbp->icsb_ifree;
- lcounter += delta;
- if (unlikely(lcounter < 0))
- goto balance_counter;
- icsbp->icsb_ifree = lcounter;
- break;
-
- case XFS_SBS_FDBLOCKS:
- BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0);
-
- lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
- lcounter += delta;
- if (unlikely(lcounter < 0))
- goto balance_counter;
- icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
- break;
- default:
- BUG();
- break;
- }
- xfs_icsb_unlock_cntr(icsbp);
- preempt_enable();
- return 0;
-
-slow_path:
- preempt_enable();
-
- /*
- * serialise with a mutex so we don't burn lots of cpu on
- * the superblock lock. We still need to hold the superblock
- * lock, however, when we modify the global structures.
- */
- xfs_icsb_lock(mp);
-
- /*
- * Now running atomically.
- *
- * If the counter is enabled, someone has beaten us to rebalancing.
- * Drop the lock and try again in the fast path....
- */
- if (!(xfs_icsb_counter_disabled(mp, field))) {
- xfs_icsb_unlock(mp);
- goto again;
- }
-
- /*
- * The counter is currently disabled. Because we are
- * running atomically here, we know a rebalance cannot
- * be in progress. Hence we can go straight to operating
- * on the global superblock. We do not call xfs_mod_incore_sb()
- * here even though we need to get the m_sb_lock. Doing so
- * will cause us to re-enter this function and deadlock.
- * Hence we get the m_sb_lock ourselves and then call
- * xfs_mod_incore_sb_unlocked() as the unlocked path operates
- * directly on the global counters.
- */
- spin_lock(&mp->m_sb_lock);
- ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
- spin_unlock(&mp->m_sb_lock);
-
- /*
- * Now that we've modified the global superblock, we
- * may be able to re-enable the distributed counters
- * (e.g. lots of space just got freed). After that
- * we are done.
- */
- if (ret != -ENOSPC)
- xfs_icsb_balance_counter(mp, field, 0);
- xfs_icsb_unlock(mp);
- return ret;
-
-balance_counter:
- xfs_icsb_unlock_cntr(icsbp);
- preempt_enable();
-
- /*
- * We may have multiple threads here if multiple per-cpu
- * counters run dry at the same time. This will mean we can
- * do more balances than strictly necessary but it is not
- * the common slowpath case.
- */
- xfs_icsb_lock(mp);
-
- /*
- * running atomically.
- *
- * This will leave the counter in the correct state for future
- * accesses. After the rebalance, we simply try again and our retry
- * will either succeed through the fast path or slow path without
- * another balance operation being required.
- */
- xfs_icsb_balance_counter(mp, field, delta);
- xfs_icsb_unlock(mp);
- goto again;
-}
-
-#endif
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 0d8abd6..8c995a2 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -18,8 +18,6 @@
#ifndef __XFS_MOUNT_H__
#define __XFS_MOUNT_H__
-#ifdef __KERNEL__
-
struct xlog;
struct xfs_inode;
struct xfs_mru_cache;
@@ -29,44 +27,6 @@ struct xfs_quotainfo;
struct xfs_dir_ops;
struct xfs_da_geometry;
-#ifdef HAVE_PERCPU_SB
-
-/*
- * Valid per-cpu incore superblock counters. Note that if you add new counters,
- * you may need to define new counter disabled bit field descriptors as there
- * are more possible fields in the superblock that can fit in a bitfield on a
- * 32 bit platform. The XFS_SBS_* values for the current current counters just
- * fit.
- */
-typedef struct xfs_icsb_cnts {
- uint64_t icsb_fdblocks;
- uint64_t icsb_ifree;
- uint64_t icsb_icount;
- unsigned long icsb_flags;
-} xfs_icsb_cnts_t;
-
-#define XFS_ICSB_FLAG_LOCK (1 << 0) /* counter lock bit */
-
-#define XFS_ICSB_LAZY_COUNT (1 << 1) /* accuracy not needed */
-
-extern int xfs_icsb_init_counters(struct xfs_mount *);
-extern void xfs_icsb_reinit_counters(struct xfs_mount *);
-extern void xfs_icsb_destroy_counters(struct xfs_mount *);
-extern void xfs_icsb_sync_counters(struct xfs_mount *, int);
-extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
-extern int xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t,
- int64_t, int);
-
-#else
-#define xfs_icsb_init_counters(mp) (0)
-#define xfs_icsb_destroy_counters(mp) do { } while (0)
-#define xfs_icsb_reinit_counters(mp) do { } while (0)
-#define xfs_icsb_sync_counters(mp, flags) do { } while (0)
-#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
-#define xfs_icsb_modify_counters(mp, field, delta, rsvd) \
- xfs_mod_incore_sb(mp, field, delta, rsvd)
-#endif
-
/* dynamic preallocation free space thresholds, 5% down to 1% */
enum {
XFS_LOWSP_1_PCNT = 0,
@@ -81,8 +41,13 @@ typedef struct xfs_mount {
struct super_block *m_super;
xfs_tid_t m_tid; /* next unused tid for fs */
struct xfs_ail *m_ail; /* fs active log item list */
- xfs_sb_t m_sb; /* copy of fs superblock */
+
+ struct xfs_sb m_sb; /* copy of fs superblock */
spinlock_t m_sb_lock; /* sb counter lock */
+ struct percpu_counter m_icount; /* allocated inodes counter */
+ struct percpu_counter m_ifree; /* free inodes counter */
+ struct percpu_counter m_fdblocks; /* free block counter */
+
struct xfs_buf *m_sb_bp; /* buffer for superblock */
char *m_fsname; /* filesystem name */
int m_fsname_len; /* strlen of fs name */
@@ -152,12 +117,6 @@ typedef struct xfs_mount {
const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */
uint m_chsize; /* size of next field */
atomic_t m_active_trans; /* number trans frozen */
-#ifdef HAVE_PERCPU_SB
- xfs_icsb_cnts_t __percpu *m_sb_cnts; /* per-cpu superblock counters */
- unsigned long m_icsb_counters; /* disabled per-cpu counters */
- struct notifier_block m_icsb_notifier; /* hotplug cpu notifier */
- struct mutex m_icsb_mutex; /* balancer sync lock */
-#endif
struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
struct delayed_work m_reclaim_work; /* background inode reclaim */
struct delayed_work m_eofblocks_work; /* background eof blocks
@@ -301,35 +260,6 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
}
/*
- * Per-cpu superblock locking functions
- */
-#ifdef HAVE_PERCPU_SB
-static inline void
-xfs_icsb_lock(xfs_mount_t *mp)
-{
- mutex_lock(&mp->m_icsb_mutex);
-}
-
-static inline void
-xfs_icsb_unlock(xfs_mount_t *mp)
-{
- mutex_unlock(&mp->m_icsb_mutex);
-}
-#else
-#define xfs_icsb_lock(mp)
-#define xfs_icsb_unlock(mp)
-#endif
-
-/*
- * This structure is for use by the xfs_mod_incore_sb_batch() routine.
- * xfs_growfs can specify a few fields which are more than int limit
- */
-typedef struct xfs_mod_sb {
- xfs_sb_field_t msb_field; /* Field to modify, see below */
- int64_t msb_delta; /* Change to make to specified field */
-} xfs_mod_sb_t;
-
-/*
* Per-ag incore structure, copies of information in agf and agi, to improve the
* performance of allocation group selection.
*/
@@ -383,11 +313,14 @@ extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
extern int xfs_mountfs(xfs_mount_t *mp);
extern int xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount,
xfs_agnumber_t *maxagi);
-
extern void xfs_unmountfs(xfs_mount_t *);
-extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
-extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
- uint, int);
+
+extern int xfs_mod_icount(struct xfs_mount *mp, int64_t delta);
+extern int xfs_mod_ifree(struct xfs_mount *mp, int64_t delta);
+extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
+ bool reserved);
+extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);
+
extern int xfs_mount_log_sb(xfs_mount_t *);
extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
extern int xfs_readsb(xfs_mount_t *, int);
@@ -399,6 +332,4 @@ extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
extern void xfs_set_low_space_thresholds(struct xfs_mount *);
-#endif /* __KERNEL__ */
-
#endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 4b33ef1..365dd57 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -300,8 +300,10 @@ xfs_fs_commit_blocks(
tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
- if (error)
+ if (error) {
+ xfs_trans_cancel(tp, 0);
goto out_drop_iolock;
+ }
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index c6b22e1..5538468 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -841,6 +841,11 @@ xfs_qm_reset_dqcounts(
*/
xfs_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
"xfs_quotacheck");
+ /*
+ * Reset type in case we are reusing group quota file for
+ * project quotas or vice versa
+ */
+ ddq->d_flags = type;
ddq->d_bcount = 0;
ddq->d_icount = 0;
ddq->d_rtbcount = 0;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 58453e3..3ad0b17 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -966,6 +966,8 @@ xfs_fs_inode_init_once(
atomic_set(&ip->i_pincount, 0);
spin_lock_init(&ip->i_flags_lock);
+ mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
+ "xfsino", ip->i_ino);
mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
"xfsino", ip->i_ino);
}
@@ -1013,24 +1015,6 @@ xfs_free_fsname(
kfree(mp->m_logname);
}
-STATIC void
-xfs_fs_put_super(
- struct super_block *sb)
-{
- struct xfs_mount *mp = XFS_M(sb);
-
- xfs_notice(mp, "Unmounting Filesystem");
- xfs_filestream_unmount(mp);
- xfs_unmountfs(mp);
-
- xfs_freesb(mp);
- xfs_icsb_destroy_counters(mp);
- xfs_destroy_mount_workqueues(mp);
- xfs_close_devices(mp);
- xfs_free_fsname(mp);
- kfree(mp);
-}
-
STATIC int
xfs_fs_sync_fs(
struct super_block *sb,
@@ -1066,6 +1050,9 @@ xfs_fs_statfs(
xfs_sb_t *sbp = &mp->m_sb;
struct xfs_inode *ip = XFS_I(dentry->d_inode);
__uint64_t fakeinos, id;
+ __uint64_t icount;
+ __uint64_t ifree;
+ __uint64_t fdblocks;
xfs_extlen_t lsize;
__int64_t ffree;
@@ -1076,17 +1063,21 @@ xfs_fs_statfs(
statp->f_fsid.val[0] = (u32)id;
statp->f_fsid.val[1] = (u32)(id >> 32);
- xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
+ icount = percpu_counter_sum(&mp->m_icount);
+ ifree = percpu_counter_sum(&mp->m_ifree);
+ fdblocks = percpu_counter_sum(&mp->m_fdblocks);
spin_lock(&mp->m_sb_lock);
statp->f_bsize = sbp->sb_blocksize;
lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
statp->f_blocks = sbp->sb_dblocks - lsize;
- statp->f_bfree = statp->f_bavail =
- sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
+ spin_unlock(&mp->m_sb_lock);
+
+ statp->f_bfree = fdblocks - XFS_ALLOC_SET_ASIDE(mp);
+ statp->f_bavail = statp->f_bfree;
+
fakeinos = statp->f_bfree << sbp->sb_inopblog;
- statp->f_files =
- MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
+ statp->f_files = MIN(icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
if (mp->m_maxicount)
statp->f_files = min_t(typeof(statp->f_files),
statp->f_files,
@@ -1098,10 +1089,9 @@ xfs_fs_statfs(
sbp->sb_icount);
/* make sure statp->f_ffree does not underflow */
- ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
+ ffree = statp->f_files - (icount - ifree);
statp->f_ffree = max_t(__int64_t, ffree, 0);
- spin_unlock(&mp->m_sb_lock);
if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
@@ -1382,6 +1372,51 @@ xfs_finish_flags(
return 0;
}
+static int
+xfs_init_percpu_counters(
+ struct xfs_mount *mp)
+{
+ int error;
+
+ error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
+ if (error)
+ return ENOMEM;
+
+ error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL);
+ if (error)
+ goto free_icount;
+
+ error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL);
+ if (error)
+ goto free_ifree;
+
+ return 0;
+
+free_ifree:
+ percpu_counter_destroy(&mp->m_ifree);
+free_icount:
+ percpu_counter_destroy(&mp->m_icount);
+ return -ENOMEM;
+}
+
+void
+xfs_reinit_percpu_counters(
+ struct xfs_mount *mp)
+{
+ percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
+ percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
+ percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks);
+}
+
+static void
+xfs_destroy_percpu_counters(
+ struct xfs_mount *mp)
+{
+ percpu_counter_destroy(&mp->m_icount);
+ percpu_counter_destroy(&mp->m_ifree);
+ percpu_counter_destroy(&mp->m_fdblocks);
+}
+
STATIC int
xfs_fs_fill_super(
struct super_block *sb,
@@ -1430,7 +1465,7 @@ xfs_fs_fill_super(
if (error)
goto out_close_devices;
- error = xfs_icsb_init_counters(mp);
+ error = xfs_init_percpu_counters(mp);
if (error)
goto out_destroy_workqueues;
@@ -1488,7 +1523,7 @@ xfs_fs_fill_super(
out_free_sb:
xfs_freesb(mp);
out_destroy_counters:
- xfs_icsb_destroy_counters(mp);
+ xfs_destroy_percpu_counters(mp);
out_destroy_workqueues:
xfs_destroy_mount_workqueues(mp);
out_close_devices:
@@ -1505,6 +1540,24 @@ out_destroy_workqueues:
goto out_free_sb;
}
+STATIC void
+xfs_fs_put_super(
+ struct super_block *sb)
+{
+ struct xfs_mount *mp = XFS_M(sb);
+
+ xfs_notice(mp, "Unmounting Filesystem");
+ xfs_filestream_unmount(mp);
+ xfs_unmountfs(mp);
+
+ xfs_freesb(mp);
+ xfs_destroy_percpu_counters(mp);
+ xfs_destroy_mount_workqueues(mp);
+ xfs_close_devices(mp);
+ xfs_free_fsname(mp);
+ kfree(mp);
+}
+
STATIC struct dentry *
xfs_fs_mount(
struct file_system_type *fs_type,
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index 2b830c2..499058f 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -72,6 +72,8 @@ extern const struct export_operations xfs_export_operations;
extern const struct xattr_handler *xfs_xattr_handlers[];
extern const struct quotactl_ops xfs_quotactl_operations;
+extern void xfs_reinit_percpu_counters(struct xfs_mount *mp);
+
#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info))
#endif /* __XFS_SUPER_H__ */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 51372e3..b1e059b 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -685,6 +685,9 @@ DEFINE_INODE_EVENT(xfs_inode_set_eofblocks_tag);
DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag);
DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid);
+DEFINE_INODE_EVENT(xfs_filemap_fault);
+DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite);
+
DECLARE_EVENT_CLASS(xfs_iref_class,
TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
TP_ARGS(ip, caller_ip),
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index eb90cd5..220ef2c 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -173,7 +173,7 @@ xfs_trans_reserve(
uint rtextents)
{
int error = 0;
- int rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
+ bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
/* Mark this thread as being in a transaction */
current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
@@ -184,8 +184,7 @@ xfs_trans_reserve(
* fail if the count would go below zero.
*/
if (blocks > 0) {
- error = xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS,
- -((int64_t)blocks), rsvd);
+ error = xfs_mod_fdblocks(tp->t_mountp, -((int64_t)blocks), rsvd);
if (error != 0) {
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
return -ENOSPC;
@@ -236,8 +235,7 @@ xfs_trans_reserve(
* fail if the count would go below zero.
*/
if (rtextents > 0) {
- error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FREXTENTS,
- -((int64_t)rtextents), rsvd);
+ error = xfs_mod_frextents(tp->t_mountp, -((int64_t)rtextents));
if (error) {
error = -ENOSPC;
goto undo_log;
@@ -268,8 +266,7 @@ undo_log:
undo_blocks:
if (blocks > 0) {
- xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS,
- (int64_t)blocks, rsvd);
+ xfs_mod_fdblocks(tp->t_mountp, -((int64_t)blocks), rsvd);
tp->t_blk_res = 0;
}
@@ -488,6 +485,54 @@ xfs_trans_apply_sb_deltas(
sizeof(sbp->sb_frextents) - 1);
}
+STATIC int
+xfs_sb_mod8(
+ uint8_t *field,
+ int8_t delta)
+{
+ int8_t counter = *field;
+
+ counter += delta;
+ if (counter < 0) {
+ ASSERT(0);
+ return -EINVAL;
+ }
+ *field = counter;
+ return 0;
+}
+
+STATIC int
+xfs_sb_mod32(
+ uint32_t *field,
+ int32_t delta)
+{
+ int32_t counter = *field;
+
+ counter += delta;
+ if (counter < 0) {
+ ASSERT(0);
+ return -EINVAL;
+ }
+ *field = counter;
+ return 0;
+}
+
+STATIC int
+xfs_sb_mod64(
+ uint64_t *field,
+ int64_t delta)
+{
+ int64_t counter = *field;
+
+ counter += delta;
+ if (counter < 0) {
+ ASSERT(0);
+ return -EINVAL;
+ }
+ *field = counter;
+ return 0;
+}
+
/*
* xfs_trans_unreserve_and_mod_sb() is called to release unused reservations
* and apply superblock counter changes to the in-core superblock. The
@@ -495,13 +540,6 @@ xfs_trans_apply_sb_deltas(
* applied to the in-core superblock. The idea is that that has already been
* done.
*
- * This is done efficiently with a single call to xfs_mod_incore_sb_batch().
- * However, we have to ensure that we only modify each superblock field only
- * once because the application of the delta values may not be atomic. That can
- * lead to ENOSPC races occurring if we have two separate modifcations of the
- * free space counter to put back the entire reservation and then take away
- * what we used.
- *
* If we are not logging superblock counters, then the inode allocated/free and
* used block counts are not updated in the on disk superblock. In this case,
* XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we
@@ -509,21 +547,15 @@ xfs_trans_apply_sb_deltas(
*/
void
xfs_trans_unreserve_and_mod_sb(
- xfs_trans_t *tp)
+ struct xfs_trans *tp)
{
- xfs_mod_sb_t msb[9]; /* If you add cases, add entries */
- xfs_mod_sb_t *msbp;
- xfs_mount_t *mp = tp->t_mountp;
- /* REFERENCED */
- int error;
- int rsvd;
- int64_t blkdelta = 0;
- int64_t rtxdelta = 0;
- int64_t idelta = 0;
- int64_t ifreedelta = 0;
-
- msbp = msb;
- rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
+ struct xfs_mount *mp = tp->t_mountp;
+ bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
+ int64_t blkdelta = 0;
+ int64_t rtxdelta = 0;
+ int64_t idelta = 0;
+ int64_t ifreedelta = 0;
+ int error;
/* calculate deltas */
if (tp->t_blk_res > 0)
@@ -547,97 +579,115 @@ xfs_trans_unreserve_and_mod_sb(
/* apply the per-cpu counters */
if (blkdelta) {
- error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
- blkdelta, rsvd);
+ error = xfs_mod_fdblocks(mp, blkdelta, rsvd);
if (error)
goto out;
}
if (idelta) {
- error = xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT,
- idelta, rsvd);
+ error = xfs_mod_icount(mp, idelta);
if (error)
goto out_undo_fdblocks;
}
if (ifreedelta) {
- error = xfs_icsb_modify_counters(mp, XFS_SBS_IFREE,
- ifreedelta, rsvd);
+ error = xfs_mod_ifree(mp, ifreedelta);
if (error)
goto out_undo_icount;
}
+ if (rtxdelta == 0 && !(tp->t_flags & XFS_TRANS_SB_DIRTY))
+ return;
+
/* apply remaining deltas */
- if (rtxdelta != 0) {
- msbp->msb_field = XFS_SBS_FREXTENTS;
- msbp->msb_delta = rtxdelta;
- msbp++;
+ spin_lock(&mp->m_sb_lock);
+ if (rtxdelta) {
+ error = xfs_sb_mod64(&mp->m_sb.sb_frextents, rtxdelta);
+ if (error)
+ goto out_undo_ifree;
}
- if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
- if (tp->t_dblocks_delta != 0) {
- msbp->msb_field = XFS_SBS_DBLOCKS;
- msbp->msb_delta = tp->t_dblocks_delta;
- msbp++;
- }
- if (tp->t_agcount_delta != 0) {
- msbp->msb_field = XFS_SBS_AGCOUNT;
- msbp->msb_delta = tp->t_agcount_delta;
- msbp++;
- }
- if (tp->t_imaxpct_delta != 0) {
- msbp->msb_field = XFS_SBS_IMAX_PCT;
- msbp->msb_delta = tp->t_imaxpct_delta;
- msbp++;
- }
- if (tp->t_rextsize_delta != 0) {
- msbp->msb_field = XFS_SBS_REXTSIZE;
- msbp->msb_delta = tp->t_rextsize_delta;
- msbp++;
- }
- if (tp->t_rbmblocks_delta != 0) {
- msbp->msb_field = XFS_SBS_RBMBLOCKS;
- msbp->msb_delta = tp->t_rbmblocks_delta;
- msbp++;
- }
- if (tp->t_rblocks_delta != 0) {
- msbp->msb_field = XFS_SBS_RBLOCKS;
- msbp->msb_delta = tp->t_rblocks_delta;
- msbp++;
- }
- if (tp->t_rextents_delta != 0) {
- msbp->msb_field = XFS_SBS_REXTENTS;
- msbp->msb_delta = tp->t_rextents_delta;
- msbp++;
- }
- if (tp->t_rextslog_delta != 0) {
- msbp->msb_field = XFS_SBS_REXTSLOG;
- msbp->msb_delta = tp->t_rextslog_delta;
- msbp++;
- }
+ if (tp->t_dblocks_delta != 0) {
+ error = xfs_sb_mod64(&mp->m_sb.sb_dblocks, tp->t_dblocks_delta);
+ if (error)
+ goto out_undo_frextents;
}
-
- /*
- * If we need to change anything, do it.
- */
- if (msbp > msb) {
- error = xfs_mod_incore_sb_batch(tp->t_mountp, msb,
- (uint)(msbp - msb), rsvd);
+ if (tp->t_agcount_delta != 0) {
+ error = xfs_sb_mod32(&mp->m_sb.sb_agcount, tp->t_agcount_delta);
if (error)
- goto out_undo_ifreecount;
+ goto out_undo_dblocks;
}
-
+ if (tp->t_imaxpct_delta != 0) {
+ error = xfs_sb_mod8(&mp->m_sb.sb_imax_pct, tp->t_imaxpct_delta);
+ if (error)
+ goto out_undo_agcount;
+ }
+ if (tp->t_rextsize_delta != 0) {
+ error = xfs_sb_mod32(&mp->m_sb.sb_rextsize,
+ tp->t_rextsize_delta);
+ if (error)
+ goto out_undo_imaxpct;
+ }
+ if (tp->t_rbmblocks_delta != 0) {
+ error = xfs_sb_mod32(&mp->m_sb.sb_rbmblocks,
+ tp->t_rbmblocks_delta);
+ if (error)
+ goto out_undo_rextsize;
+ }
+ if (tp->t_rblocks_delta != 0) {
+ error = xfs_sb_mod64(&mp->m_sb.sb_rblocks, tp->t_rblocks_delta);
+ if (error)
+ goto out_undo_rbmblocks;
+ }
+ if (tp->t_rextents_delta != 0) {
+ error = xfs_sb_mod64(&mp->m_sb.sb_rextents,
+ tp->t_rextents_delta);
+ if (error)
+ goto out_undo_rblocks;
+ }
+ if (tp->t_rextslog_delta != 0) {
+ error = xfs_sb_mod8(&mp->m_sb.sb_rextslog,
+ tp->t_rextslog_delta);
+ if (error)
+ goto out_undo_rextents;
+ }
+ spin_unlock(&mp->m_sb_lock);
return;
-out_undo_ifreecount:
+out_undo_rextents:
+ if (tp->t_rextents_delta)
+ xfs_sb_mod64(&mp->m_sb.sb_rextents, -tp->t_rextents_delta);
+out_undo_rblocks:
+ if (tp->t_rblocks_delta)
+ xfs_sb_mod64(&mp->m_sb.sb_rblocks, -tp->t_rblocks_delta);
+out_undo_rbmblocks:
+ if (tp->t_rbmblocks_delta)
+ xfs_sb_mod32(&mp->m_sb.sb_rbmblocks, -tp->t_rbmblocks_delta);
+out_undo_rextsize:
+ if (tp->t_rextsize_delta)
+ xfs_sb_mod32(&mp->m_sb.sb_rextsize, -tp->t_rextsize_delta);
+out_undo_imaxpct:
+ if (tp->t_rextsize_delta)
+ xfs_sb_mod8(&mp->m_sb.sb_imax_pct, -tp->t_imaxpct_delta);
+out_undo_agcount:
+ if (tp->t_agcount_delta)
+ xfs_sb_mod32(&mp->m_sb.sb_agcount, -tp->t_agcount_delta);
+out_undo_dblocks:
+ if (tp->t_dblocks_delta)
+ xfs_sb_mod64(&mp->m_sb.sb_dblocks, -tp->t_dblocks_delta);
+out_undo_frextents:
+ if (rtxdelta)
+ xfs_sb_mod64(&mp->m_sb.sb_frextents, -rtxdelta);
+out_undo_ifree:
+ spin_unlock(&mp->m_sb_lock);
if (ifreedelta)
- xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, -ifreedelta, rsvd);
+ xfs_mod_ifree(mp, -ifreedelta);
out_undo_icount:
if (idelta)
- xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, -idelta, rsvd);
+ xfs_mod_icount(mp, -idelta);
out_undo_fdblocks:
if (blkdelta)
- xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd);
+ xfs_mod_fdblocks(mp, -blkdelta, rsvd);
out:
ASSERT(error == 0);
return;
OpenPOWER on IntegriCloud