diff options
author | Timothy Pearson <tpearson@raptorengineering.com> | 2017-08-23 14:45:25 -0500 |
---|---|---|
committer | Timothy Pearson <tpearson@raptorengineering.com> | 2017-08-23 14:45:25 -0500 |
commit | fcbb27b0ec6dcbc5a5108cb8fb19eae64593d204 (patch) | |
tree | 22962a4387943edc841c72a4e636a068c66d58fd /fs/xfs/quota | |
download | ast2050-linux-kernel-fcbb27b0ec6dcbc5a5108cb8fb19eae64593d204.zip ast2050-linux-kernel-fcbb27b0ec6dcbc5a5108cb8fb19eae64593d204.tar.gz |
Initial import of modified Linux 2.6.28 tree
Original upstream URL:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git | branch linux-2.6.28.y
Diffstat (limited to 'fs/xfs/quota')
-rw-r--r-- | fs/xfs/quota/xfs_dquot.c | 1591 | ||||
-rw-r--r-- | fs/xfs/quota/xfs_dquot.h | 192 | ||||
-rw-r--r-- | fs/xfs/quota/xfs_dquot_item.c | 676 | ||||
-rw-r--r-- | fs/xfs/quota/xfs_dquot_item.h | 52 | ||||
-rw-r--r-- | fs/xfs/quota/xfs_qm.c | 2833 | ||||
-rw-r--r-- | fs/xfs/quota/xfs_qm.h | 210 | ||||
-rw-r--r-- | fs/xfs/quota/xfs_qm_bhv.c | 260 | ||||
-rw-r--r-- | fs/xfs/quota/xfs_qm_stats.c | 130 | ||||
-rw-r--r-- | fs/xfs/quota/xfs_qm_stats.h | 53 | ||||
-rw-r--r-- | fs/xfs/quota/xfs_qm_syscalls.c | 1476 | ||||
-rw-r--r-- | fs/xfs/quota/xfs_quota_priv.h | 165 | ||||
-rw-r--r-- | fs/xfs/quota/xfs_trans_dquot.c | 923 |
12 files changed, 8561 insertions, 0 deletions
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c new file mode 100644 index 0000000..f2705f2 --- /dev/null +++ b/fs/xfs/quota/xfs_dquot.c @@ -0,0 +1,1591 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_alloc.h" +#include "xfs_dmapi.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dir2_sf.h" +#include "xfs_attr_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" +#include "xfs_bmap.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_itable.h" +#include "xfs_rw.h" +#include "xfs_acl.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_trans_space.h" +#include "xfs_trans_priv.h" +#include "xfs_qm.h" + + +/* + LOCK ORDER + + inode lock (ilock) + dquot hash-chain lock (hashlock) + xqm dquot freelist lock (freelistlock + mount's dquot list lock (mplistlock) + user dquot lock - lock ordering among dquots is based on the uid or gid + group dquot lock - similar to udquots. Between the two dquots, the udquot + has to be locked first. + pin lock - the dquot lock must be held to take this lock. + flush lock - ditto. +*/ + +STATIC void xfs_qm_dqflush_done(xfs_buf_t *, xfs_dq_logitem_t *); + +#ifdef DEBUG +xfs_buftarg_t *xfs_dqerror_target; +int xfs_do_dqerror; +int xfs_dqreq_num; +int xfs_dqerror_mod = 33; +#endif + +/* + * Allocate and initialize a dquot. We don't always allocate fresh memory; + * we try to reclaim a free dquot if the number of incore dquots are above + * a threshold. + * The only field inside the core that gets initialized at this point + * is the d_id field. The idea is to fill in the entire q_core + * when we read in the on disk dquot. + */ +STATIC xfs_dquot_t * +xfs_qm_dqinit( + xfs_mount_t *mp, + xfs_dqid_t id, + uint type) +{ + xfs_dquot_t *dqp; + boolean_t brandnewdquot; + + brandnewdquot = xfs_qm_dqalloc_incore(&dqp); + dqp->dq_flags = type; + dqp->q_core.d_id = cpu_to_be32(id); + dqp->q_mount = mp; + + /* + * No need to re-initialize these if this is a reclaimed dquot. + */ + if (brandnewdquot) { + dqp->dq_flnext = dqp->dq_flprev = dqp; + mutex_init(&dqp->q_qlock); + sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq"); + + /* + * Because we want to use a counting completion, complete + * the flush completion once to allow a single access to + * the flush completion without blocking. + */ + init_completion(&dqp->q_flush); + complete(&dqp->q_flush); + +#ifdef XFS_DQUOT_TRACE + dqp->q_trace = ktrace_alloc(DQUOT_TRACE_SIZE, KM_NOFS); + xfs_dqtrace_entry(dqp, "DQINIT"); +#endif + } else { + /* + * Only the q_core portion was zeroed in dqreclaim_one(). + * So, we need to reset others. + */ + dqp->q_nrefs = 0; + dqp->q_blkno = 0; + dqp->MPL_NEXT = dqp->HL_NEXT = NULL; + dqp->HL_PREVP = dqp->MPL_PREVP = NULL; + dqp->q_bufoffset = 0; + dqp->q_fileoffset = 0; + dqp->q_transp = NULL; + dqp->q_gdquot = NULL; + dqp->q_res_bcount = 0; + dqp->q_res_icount = 0; + dqp->q_res_rtbcount = 0; + dqp->q_pincount = 0; + dqp->q_hash = NULL; + ASSERT(dqp->dq_flnext == dqp->dq_flprev); + +#ifdef XFS_DQUOT_TRACE + ASSERT(dqp->q_trace); + xfs_dqtrace_entry(dqp, "DQRECLAIMED_INIT"); +#endif + } + + /* + * log item gets initialized later + */ + return (dqp); +} + +/* + * This is called to free all the memory associated with a dquot + */ +void +xfs_qm_dqdestroy( + xfs_dquot_t *dqp) +{ + ASSERT(! XFS_DQ_IS_ON_FREELIST(dqp)); + + mutex_destroy(&dqp->q_qlock); + sv_destroy(&dqp->q_pinwait); + +#ifdef XFS_DQUOT_TRACE + if (dqp->q_trace) + ktrace_free(dqp->q_trace); + dqp->q_trace = NULL; +#endif + kmem_zone_free(xfs_Gqm->qm_dqzone, dqp); + atomic_dec(&xfs_Gqm->qm_totaldquots); +} + +/* + * This is what a 'fresh' dquot inside a dquot chunk looks like on disk. + */ +STATIC void +xfs_qm_dqinit_core( + xfs_dqid_t id, + uint type, + xfs_dqblk_t *d) +{ + /* + * Caller has zero'd the entire dquot 'chunk' already. + */ + d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); + d->dd_diskdq.d_version = XFS_DQUOT_VERSION; + d->dd_diskdq.d_id = cpu_to_be32(id); + d->dd_diskdq.d_flags = type; +} + + +#ifdef XFS_DQUOT_TRACE +/* + * Dquot tracing for debugging. + */ +/* ARGSUSED */ +void +__xfs_dqtrace_entry( + xfs_dquot_t *dqp, + char *func, + void *retaddr, + xfs_inode_t *ip) +{ + xfs_dquot_t *udqp = NULL; + xfs_ino_t ino = 0; + + ASSERT(dqp->q_trace); + if (ip) { + ino = ip->i_ino; + udqp = ip->i_udquot; + } + ktrace_enter(dqp->q_trace, + (void *)(__psint_t)DQUOT_KTRACE_ENTRY, + (void *)func, + (void *)(__psint_t)dqp->q_nrefs, + (void *)(__psint_t)dqp->dq_flags, + (void *)(__psint_t)dqp->q_res_bcount, + (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_bcount), + (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_icount), + (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_blk_hardlimit), + (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_blk_softlimit), + (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_ino_hardlimit), + (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_ino_softlimit), + (void *)(__psint_t)be32_to_cpu(dqp->q_core.d_id), + (void *)(__psint_t)current_pid(), + (void *)(__psint_t)ino, + (void *)(__psint_t)retaddr, + (void *)(__psint_t)udqp); + return; +} +#endif + + +/* + * If default limits are in force, push them into the dquot now. + * We overwrite the dquot limits only if they are zero and this + * is not the root dquot. + */ +void +xfs_qm_adjust_dqlimits( + xfs_mount_t *mp, + xfs_disk_dquot_t *d) +{ + xfs_quotainfo_t *q = mp->m_quotainfo; + + ASSERT(d->d_id); + + if (q->qi_bsoftlimit && !d->d_blk_softlimit) + d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit); + if (q->qi_bhardlimit && !d->d_blk_hardlimit) + d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit); + if (q->qi_isoftlimit && !d->d_ino_softlimit) + d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit); + if (q->qi_ihardlimit && !d->d_ino_hardlimit) + d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit); + if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit) + d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit); + if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit) + d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit); +} + +/* + * Check the limits and timers of a dquot and start or reset timers + * if necessary. + * This gets called even when quota enforcement is OFF, which makes our + * life a little less complicated. (We just don't reject any quota + * reservations in that case, when enforcement is off). + * We also return 0 as the values of the timers in Q_GETQUOTA calls, when + * enforcement's off. + * In contrast, warnings are a little different in that they don't + * 'automatically' get started when limits get exceeded. They do + * get reset to zero, however, when we find the count to be under + * the soft limit (they are only ever set non-zero via userspace). + */ +void +xfs_qm_adjust_dqtimers( + xfs_mount_t *mp, + xfs_disk_dquot_t *d) +{ + ASSERT(d->d_id); + +#ifdef QUOTADEBUG + if (d->d_blk_hardlimit) + ASSERT(be64_to_cpu(d->d_blk_softlimit) <= + be64_to_cpu(d->d_blk_hardlimit)); + if (d->d_ino_hardlimit) + ASSERT(be64_to_cpu(d->d_ino_softlimit) <= + be64_to_cpu(d->d_ino_hardlimit)); + if (d->d_rtb_hardlimit) + ASSERT(be64_to_cpu(d->d_rtb_softlimit) <= + be64_to_cpu(d->d_rtb_hardlimit)); +#endif + if (!d->d_btimer) { + if ((d->d_blk_softlimit && + (be64_to_cpu(d->d_bcount) >= + be64_to_cpu(d->d_blk_softlimit))) || + (d->d_blk_hardlimit && + (be64_to_cpu(d->d_bcount) >= + be64_to_cpu(d->d_blk_hardlimit)))) { + d->d_btimer = cpu_to_be32(get_seconds() + + XFS_QI_BTIMELIMIT(mp)); + } else { + d->d_bwarns = 0; + } + } else { + if ((!d->d_blk_softlimit || + (be64_to_cpu(d->d_bcount) < + be64_to_cpu(d->d_blk_softlimit))) && + (!d->d_blk_hardlimit || + (be64_to_cpu(d->d_bcount) < + be64_to_cpu(d->d_blk_hardlimit)))) { + d->d_btimer = 0; + } + } + + if (!d->d_itimer) { + if ((d->d_ino_softlimit && + (be64_to_cpu(d->d_icount) >= + be64_to_cpu(d->d_ino_softlimit))) || + (d->d_ino_hardlimit && + (be64_to_cpu(d->d_icount) >= + be64_to_cpu(d->d_ino_hardlimit)))) { + d->d_itimer = cpu_to_be32(get_seconds() + + XFS_QI_ITIMELIMIT(mp)); + } else { + d->d_iwarns = 0; + } + } else { + if ((!d->d_ino_softlimit || + (be64_to_cpu(d->d_icount) < + be64_to_cpu(d->d_ino_softlimit))) && + (!d->d_ino_hardlimit || + (be64_to_cpu(d->d_icount) < + be64_to_cpu(d->d_ino_hardlimit)))) { + d->d_itimer = 0; + } + } + + if (!d->d_rtbtimer) { + if ((d->d_rtb_softlimit && + (be64_to_cpu(d->d_rtbcount) >= + be64_to_cpu(d->d_rtb_softlimit))) || + (d->d_rtb_hardlimit && + (be64_to_cpu(d->d_rtbcount) >= + be64_to_cpu(d->d_rtb_hardlimit)))) { + d->d_rtbtimer = cpu_to_be32(get_seconds() + + XFS_QI_RTBTIMELIMIT(mp)); + } else { + d->d_rtbwarns = 0; + } + } else { + if ((!d->d_rtb_softlimit || + (be64_to_cpu(d->d_rtbcount) < + be64_to_cpu(d->d_rtb_softlimit))) && + (!d->d_rtb_hardlimit || + (be64_to_cpu(d->d_rtbcount) < + be64_to_cpu(d->d_rtb_hardlimit)))) { + d->d_rtbtimer = 0; + } + } +} + +/* + * initialize a buffer full of dquots and log the whole thing + */ +STATIC void +xfs_qm_init_dquot_blk( + xfs_trans_t *tp, + xfs_mount_t *mp, + xfs_dqid_t id, + uint type, + xfs_buf_t *bp) +{ + xfs_dqblk_t *d; + int curid, i; + + ASSERT(tp); + ASSERT(XFS_BUF_ISBUSY(bp)); + ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); + + d = (xfs_dqblk_t *)XFS_BUF_PTR(bp); + + /* + * ID of the first dquot in the block - id's are zero based. + */ + curid = id - (id % XFS_QM_DQPERBLK(mp)); + ASSERT(curid >= 0); + memset(d, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp))); + for (i = 0; i < XFS_QM_DQPERBLK(mp); i++, d++, curid++) + xfs_qm_dqinit_core(curid, type, d); + xfs_trans_dquot_buf(tp, bp, + (type & XFS_DQ_USER ? XFS_BLI_UDQUOT_BUF : + ((type & XFS_DQ_PROJ) ? XFS_BLI_PDQUOT_BUF : + XFS_BLI_GDQUOT_BUF))); + xfs_trans_log_buf(tp, bp, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp)) - 1); +} + + + +/* + * Allocate a block and fill it with dquots. + * This is called when the bmapi finds a hole. + */ +STATIC int +xfs_qm_dqalloc( + xfs_trans_t **tpp, + xfs_mount_t *mp, + xfs_dquot_t *dqp, + xfs_inode_t *quotip, + xfs_fileoff_t offset_fsb, + xfs_buf_t **O_bpp) +{ + xfs_fsblock_t firstblock; + xfs_bmap_free_t flist; + xfs_bmbt_irec_t map; + int nmaps, error, committed; + xfs_buf_t *bp; + xfs_trans_t *tp = *tpp; + + ASSERT(tp != NULL); + xfs_dqtrace_entry(dqp, "DQALLOC"); + + /* + * Initialize the bmap freelist prior to calling bmapi code. + */ + XFS_BMAP_INIT(&flist, &firstblock); + xfs_ilock(quotip, XFS_ILOCK_EXCL); + /* + * Return if this type of quotas is turned off while we didn't + * have an inode lock + */ + if (XFS_IS_THIS_QUOTA_OFF(dqp)) { + xfs_iunlock(quotip, XFS_ILOCK_EXCL); + return (ESRCH); + } + + /* + * xfs_trans_commit normally decrements the vnode ref count + * when it unlocks the inode. Since we want to keep the quota + * inode around, we bump the vnode ref count now. + */ + IHOLD(quotip); + + xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL); + nmaps = 1; + if ((error = xfs_bmapi(tp, quotip, + offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB, + XFS_BMAPI_METADATA | XFS_BMAPI_WRITE, + &firstblock, + XFS_QM_DQALLOC_SPACE_RES(mp), + &map, &nmaps, &flist, NULL))) { + goto error0; + } + ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); + ASSERT(nmaps == 1); + ASSERT((map.br_startblock != DELAYSTARTBLOCK) && + (map.br_startblock != HOLESTARTBLOCK)); + + /* + * Keep track of the blkno to save a lookup later + */ + dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); + + /* now we can just get the buffer (there's nothing to read yet) */ + bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, + dqp->q_blkno, + XFS_QI_DQCHUNKLEN(mp), + 0); + if (!bp || (error = XFS_BUF_GETERROR(bp))) + goto error1; + /* + * Make a chunk of dquots out of this buffer and log + * the entire thing. + */ + xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id), + dqp->dq_flags & XFS_DQ_ALLTYPES, bp); + + /* + * xfs_bmap_finish() may commit the current transaction and + * start a second transaction if the freelist is not empty. + * + * Since we still want to modify this buffer, we need to + * ensure that the buffer is not released on commit of + * the first transaction and ensure the buffer is added to the + * second transaction. + * + * If there is only one transaction then don't stop the buffer + * from being released when it commits later on. + */ + + xfs_trans_bhold(tp, bp); + + if ((error = xfs_bmap_finish(tpp, &flist, &committed))) { + goto error1; + } + + if (committed) { + tp = *tpp; + xfs_trans_bjoin(tp, bp); + } else { + xfs_trans_bhold_release(tp, bp); + } + + *O_bpp = bp; + return 0; + + error1: + xfs_bmap_cancel(&flist); + error0: + xfs_iunlock(quotip, XFS_ILOCK_EXCL); + + return (error); +} + +/* + * Maps a dquot to the buffer containing its on-disk version. + * This returns a ptr to the buffer containing the on-disk dquot + * in the bpp param, and a ptr to the on-disk dquot within that buffer + */ +STATIC int +xfs_qm_dqtobp( + xfs_trans_t **tpp, + xfs_dquot_t *dqp, + xfs_disk_dquot_t **O_ddpp, + xfs_buf_t **O_bpp, + uint flags) +{ + xfs_bmbt_irec_t map; + int nmaps, error; + xfs_buf_t *bp; + xfs_inode_t *quotip; + xfs_mount_t *mp; + xfs_disk_dquot_t *ddq; + xfs_dqid_t id; + boolean_t newdquot; + xfs_trans_t *tp = (tpp ? *tpp : NULL); + + mp = dqp->q_mount; + id = be32_to_cpu(dqp->q_core.d_id); + nmaps = 1; + newdquot = B_FALSE; + + /* + * If we don't know where the dquot lives, find out. + */ + if (dqp->q_blkno == (xfs_daddr_t) 0) { + /* We use the id as an index */ + dqp->q_fileoffset = (xfs_fileoff_t)id / XFS_QM_DQPERBLK(mp); + nmaps = 1; + quotip = XFS_DQ_TO_QIP(dqp); + xfs_ilock(quotip, XFS_ILOCK_SHARED); + /* + * Return if this type of quotas is turned off while we didn't + * have an inode lock + */ + if (XFS_IS_THIS_QUOTA_OFF(dqp)) { + xfs_iunlock(quotip, XFS_ILOCK_SHARED); + return (ESRCH); + } + /* + * Find the block map; no allocations yet + */ + error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset, + XFS_DQUOT_CLUSTER_SIZE_FSB, + XFS_BMAPI_METADATA, + NULL, 0, &map, &nmaps, NULL, NULL); + + xfs_iunlock(quotip, XFS_ILOCK_SHARED); + if (error) + return (error); + ASSERT(nmaps == 1); + ASSERT(map.br_blockcount == 1); + + /* + * offset of dquot in the (fixed sized) dquot chunk. + */ + dqp->q_bufoffset = (id % XFS_QM_DQPERBLK(mp)) * + sizeof(xfs_dqblk_t); + if (map.br_startblock == HOLESTARTBLOCK) { + /* + * We don't allocate unless we're asked to + */ + if (!(flags & XFS_QMOPT_DQALLOC)) + return (ENOENT); + + ASSERT(tp); + if ((error = xfs_qm_dqalloc(tpp, mp, dqp, quotip, + dqp->q_fileoffset, &bp))) + return (error); + tp = *tpp; + newdquot = B_TRUE; + } else { + /* + * store the blkno etc so that we don't have to do the + * mapping all the time + */ + dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); + } + } + ASSERT(dqp->q_blkno != DELAYSTARTBLOCK); + ASSERT(dqp->q_blkno != HOLESTARTBLOCK); + + /* + * Read in the buffer, unless we've just done the allocation + * (in which case we already have the buf). + */ + if (! newdquot) { + xfs_dqtrace_entry(dqp, "DQTOBP READBUF"); + if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, + dqp->q_blkno, + XFS_QI_DQCHUNKLEN(mp), + 0, &bp))) { + return (error); + } + if (error || !bp) + return XFS_ERROR(error); + } + ASSERT(XFS_BUF_ISBUSY(bp)); + ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); + + /* + * calculate the location of the dquot inside the buffer. + */ + ddq = (xfs_disk_dquot_t *)((char *)XFS_BUF_PTR(bp) + dqp->q_bufoffset); + + /* + * A simple sanity check in case we got a corrupted dquot... + */ + if (xfs_qm_dqcheck(ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES, + flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN), + "dqtobp")) { + if (!(flags & XFS_QMOPT_DQREPAIR)) { + xfs_trans_brelse(tp, bp); + return XFS_ERROR(EIO); + } + XFS_BUF_BUSY(bp); /* We dirtied this */ + } + + *O_bpp = bp; + *O_ddpp = ddq; + + return (0); +} + + +/* + * Read in the ondisk dquot using dqtobp() then copy it to an incore version, + * and release the buffer immediately. + * + */ +/* ARGSUSED */ +STATIC int +xfs_qm_dqread( + xfs_trans_t **tpp, + xfs_dqid_t id, + xfs_dquot_t *dqp, /* dquot to get filled in */ + uint flags) +{ + xfs_disk_dquot_t *ddqp; + xfs_buf_t *bp; + int error; + xfs_trans_t *tp; + + ASSERT(tpp); + + /* + * get a pointer to the on-disk dquot and the buffer containing it + * dqp already knows its own type (GROUP/USER). + */ + xfs_dqtrace_entry(dqp, "DQREAD"); + if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) { + return (error); + } + tp = *tpp; + + /* copy everything from disk dquot to the incore dquot */ + memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t)); + ASSERT(be32_to_cpu(dqp->q_core.d_id) == id); + xfs_qm_dquot_logitem_init(dqp); + + /* + * Reservation counters are defined as reservation plus current usage + * to avoid having to add everytime. + */ + dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount); + dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); + dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount); + + /* Mark the buf so that this will stay incore a little longer */ + XFS_BUF_SET_VTYPE_REF(bp, B_FS_DQUOT, XFS_DQUOT_REF); + + /* + * We got the buffer with a xfs_trans_read_buf() (in dqtobp()) + * So we need to release with xfs_trans_brelse(). + * The strategy here is identical to that of inodes; we lock + * the dquot in xfs_qm_dqget() before making it accessible to + * others. This is because dquots, like inodes, need a good level of + * concurrency, and we don't want to take locks on the entire buffers + * for dquot accesses. + * Note also that the dquot buffer may even be dirty at this point, if + * this particular dquot was repaired. We still aren't afraid to + * brelse it because we have the changes incore. + */ + ASSERT(XFS_BUF_ISBUSY(bp)); + ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); + xfs_trans_brelse(tp, bp); + + return (error); +} + + +/* + * allocate an incore dquot from the kernel heap, + * and fill its core with quota information kept on disk. + * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk + * if it wasn't already allocated. + */ +STATIC int +xfs_qm_idtodq( + xfs_mount_t *mp, + xfs_dqid_t id, /* gid or uid, depending on type */ + uint type, /* UDQUOT or GDQUOT */ + uint flags, /* DQALLOC, DQREPAIR */ + xfs_dquot_t **O_dqpp)/* OUT : incore dquot, not locked */ +{ + xfs_dquot_t *dqp; + int error; + xfs_trans_t *tp; + int cancelflags=0; + + dqp = xfs_qm_dqinit(mp, id, type); + tp = NULL; + if (flags & XFS_QMOPT_DQALLOC) { + tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); + if ((error = xfs_trans_reserve(tp, + XFS_QM_DQALLOC_SPACE_RES(mp), + XFS_WRITE_LOG_RES(mp) + + BBTOB(XFS_QI_DQCHUNKLEN(mp)) - 1 + + 128, + 0, + XFS_TRANS_PERM_LOG_RES, + XFS_WRITE_LOG_COUNT))) { + cancelflags = 0; + goto error0; + } + cancelflags = XFS_TRANS_RELEASE_LOG_RES; + } + + /* + * Read it from disk; xfs_dqread() takes care of + * all the necessary initialization of dquot's fields (locks, etc) + */ + if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) { + /* + * This can happen if quotas got turned off (ESRCH), + * or if the dquot didn't exist on disk and we ask to + * allocate (ENOENT). + */ + xfs_dqtrace_entry(dqp, "DQREAD FAIL"); + cancelflags |= XFS_TRANS_ABORT; + goto error0; + } + if (tp) { + if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) + goto error1; + } + + *O_dqpp = dqp; + return (0); + + error0: + ASSERT(error); + if (tp) + xfs_trans_cancel(tp, cancelflags); + error1: + xfs_qm_dqdestroy(dqp); + *O_dqpp = NULL; + return (error); +} + +/* + * Lookup a dquot in the incore dquot hashtable. We keep two separate + * hashtables for user and group dquots; and, these are global tables + * inside the XQM, not per-filesystem tables. + * The hash chain must be locked by caller, and it is left locked + * on return. Returning dquot is locked. + */ +STATIC int +xfs_qm_dqlookup( + xfs_mount_t *mp, + xfs_dqid_t id, + xfs_dqhash_t *qh, + xfs_dquot_t **O_dqpp) +{ + xfs_dquot_t *dqp; + uint flist_locked; + xfs_dquot_t *d; + + ASSERT(XFS_DQ_IS_HASH_LOCKED(qh)); + + flist_locked = B_FALSE; + + /* + * Traverse the hashchain looking for a match + */ + for (dqp = qh->qh_next; dqp != NULL; dqp = dqp->HL_NEXT) { + /* + * We already have the hashlock. We don't need the + * dqlock to look at the id field of the dquot, since the + * id can't be modified without the hashlock anyway. + */ + if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) { + xfs_dqtrace_entry(dqp, "DQFOUND BY LOOKUP"); + /* + * All in core dquots must be on the dqlist of mp + */ + ASSERT(dqp->MPL_PREVP != NULL); + + xfs_dqlock(dqp); + if (dqp->q_nrefs == 0) { + ASSERT (XFS_DQ_IS_ON_FREELIST(dqp)); + if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) { + xfs_dqtrace_entry(dqp, "DQLOOKUP: WANT"); + + /* + * We may have raced with dqreclaim_one() + * (and lost). So, flag that we don't + * want the dquot to be reclaimed. + */ + dqp->dq_flags |= XFS_DQ_WANT; + xfs_dqunlock(dqp); + xfs_qm_freelist_lock(xfs_Gqm); + xfs_dqlock(dqp); + dqp->dq_flags &= ~(XFS_DQ_WANT); + } + flist_locked = B_TRUE; + } + + /* + * id couldn't have changed; we had the hashlock all + * along + */ + ASSERT(be32_to_cpu(dqp->q_core.d_id) == id); + + if (flist_locked) { + if (dqp->q_nrefs != 0) { + xfs_qm_freelist_unlock(xfs_Gqm); + flist_locked = B_FALSE; + } else { + /* + * take it off the freelist + */ + xfs_dqtrace_entry(dqp, + "DQLOOKUP: TAKEOFF FL"); + XQM_FREELIST_REMOVE(dqp); + /* xfs_qm_freelist_print(&(xfs_Gqm-> + qm_dqfreelist), + "after removal"); */ + } + } + + /* + * grab a reference + */ + XFS_DQHOLD(dqp); + + if (flist_locked) + xfs_qm_freelist_unlock(xfs_Gqm); + /* + * move the dquot to the front of the hashchain + */ + ASSERT(XFS_DQ_IS_HASH_LOCKED(qh)); + if (dqp->HL_PREVP != &qh->qh_next) { + xfs_dqtrace_entry(dqp, + "DQLOOKUP: HASH MOVETOFRONT"); + if ((d = dqp->HL_NEXT)) + d->HL_PREVP = dqp->HL_PREVP; + *(dqp->HL_PREVP) = d; + d = qh->qh_next; + d->HL_PREVP = &dqp->HL_NEXT; + dqp->HL_NEXT = d; + dqp->HL_PREVP = &qh->qh_next; + qh->qh_next = dqp; + } + xfs_dqtrace_entry(dqp, "LOOKUP END"); + *O_dqpp = dqp; + ASSERT(XFS_DQ_IS_HASH_LOCKED(qh)); + return (0); + } + } + + *O_dqpp = NULL; + ASSERT(XFS_DQ_IS_HASH_LOCKED(qh)); + return (1); +} + +/* + * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a + * a locked dquot, doing an allocation (if requested) as needed. + * When both an inode and an id are given, the inode's id takes precedence. + * That is, if the id changes while we don't hold the ilock inside this + * function, the new dquot is returned, not necessarily the one requested + * in the id argument. + */ +int +xfs_qm_dqget( + xfs_mount_t *mp, + xfs_inode_t *ip, /* locked inode (optional) */ + xfs_dqid_t id, /* uid/projid/gid depending on type */ + uint type, /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */ + uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */ + xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */ +{ + xfs_dquot_t *dqp; + xfs_dqhash_t *h; + uint version; + int error; + + ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) || + (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) || + (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) { + return (ESRCH); + } + h = XFS_DQ_HASH(mp, id, type); + +#ifdef DEBUG + if (xfs_do_dqerror) { + if ((xfs_dqerror_target == mp->m_ddev_targp) && + (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) { + cmn_err(CE_DEBUG, "Returning error in dqget"); + return (EIO); + } + } +#endif + + again: + +#ifdef DEBUG + ASSERT(type == XFS_DQ_USER || + type == XFS_DQ_PROJ || + type == XFS_DQ_GROUP); + if (ip) { + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + if (type == XFS_DQ_USER) + ASSERT(ip->i_udquot == NULL); + else + ASSERT(ip->i_gdquot == NULL); + } +#endif + XFS_DQ_HASH_LOCK(h); + + /* + * Look in the cache (hashtable). + * The chain is kept locked during lookup. + */ + if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) { + XQM_STATS_INC(xqmstats.xs_qm_dqcachehits); + /* + * The dquot was found, moved to the front of the chain, + * taken off the freelist if it was on it, and locked + * at this point. Just unlock the hashchain and return. + */ + ASSERT(*O_dqpp); + ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp)); + XFS_DQ_HASH_UNLOCK(h); + xfs_dqtrace_entry(*O_dqpp, "DQGET DONE (FROM CACHE)"); + return (0); /* success */ + } + XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses); + + /* + * Dquot cache miss. We don't want to keep the inode lock across + * a (potential) disk read. Also we don't want to deal with the lock + * ordering between quotainode and this inode. OTOH, dropping the inode + * lock here means dealing with a chown that can happen before + * we re-acquire the lock. + */ + if (ip) + xfs_iunlock(ip, XFS_ILOCK_EXCL); + /* + * Save the hashchain version stamp, and unlock the chain, so that + * we don't keep the lock across a disk read + */ + version = h->qh_version; + XFS_DQ_HASH_UNLOCK(h); + + /* + * Allocate the dquot on the kernel heap, and read the ondisk + * portion off the disk. Also, do all the necessary initialization + * This can return ENOENT if dquot didn't exist on disk and we didn't + * ask it to allocate; ESRCH if quotas got turned off suddenly. + */ + if ((error = xfs_qm_idtodq(mp, id, type, + flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR| + XFS_QMOPT_DOWARN), + &dqp))) { + if (ip) + xfs_ilock(ip, XFS_ILOCK_EXCL); + return (error); + } + + /* + * See if this is mount code calling to look at the overall quota limits + * which are stored in the id == 0 user or group's dquot. + * Since we may not have done a quotacheck by this point, just return + * the dquot without attaching it to any hashtables, lists, etc, or even + * taking a reference. + * The caller must dqdestroy this once done. + */ + if (flags & XFS_QMOPT_DQSUSER) { + ASSERT(id == 0); + ASSERT(! ip); + goto dqret; + } + + /* + * Dquot lock comes after hashlock in the lock ordering + */ + if (ip) { + xfs_ilock(ip, XFS_ILOCK_EXCL); + if (! XFS_IS_DQTYPE_ON(mp, type)) { + /* inode stays locked on return */ + xfs_qm_dqdestroy(dqp); + return XFS_ERROR(ESRCH); + } + /* + * A dquot could be attached to this inode by now, since + * we had dropped the ilock. + */ + if (type == XFS_DQ_USER) { + if (ip->i_udquot) { + xfs_qm_dqdestroy(dqp); + dqp = ip->i_udquot; + xfs_dqlock(dqp); + goto dqret; + } + } else { + if (ip->i_gdquot) { + xfs_qm_dqdestroy(dqp); + dqp = ip->i_gdquot; + xfs_dqlock(dqp); + goto dqret; + } + } + } + + /* + * Hashlock comes after ilock in lock order + */ + XFS_DQ_HASH_LOCK(h); + if (version != h->qh_version) { + xfs_dquot_t *tmpdqp; + /* + * Now, see if somebody else put the dquot in the + * hashtable before us. This can happen because we didn't + * keep the hashchain lock. We don't have to worry about + * lock order between the two dquots here since dqp isn't + * on any findable lists yet. + */ + if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) { + /* + * Duplicate found. Just throw away the new dquot + * and start over. + */ + xfs_qm_dqput(tmpdqp); + XFS_DQ_HASH_UNLOCK(h); + xfs_qm_dqdestroy(dqp); + XQM_STATS_INC(xqmstats.xs_qm_dquot_dups); + goto again; + } + } + + /* + * Put the dquot at the beginning of the hash-chain and mp's list + * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock .. + */ + ASSERT(XFS_DQ_IS_HASH_LOCKED(h)); + dqp->q_hash = h; + XQM_HASHLIST_INSERT(h, dqp); + + /* + * Attach this dquot to this filesystem's list of all dquots, + * kept inside the mount structure in m_quotainfo field + */ + xfs_qm_mplist_lock(mp); + + /* + * We return a locked dquot to the caller, with a reference taken + */ + xfs_dqlock(dqp); + dqp->q_nrefs = 1; + + XQM_MPLIST_INSERT(&(XFS_QI_MPL_LIST(mp)), dqp); + + xfs_qm_mplist_unlock(mp); + XFS_DQ_HASH_UNLOCK(h); + dqret: + ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); + xfs_dqtrace_entry(dqp, "DQGET DONE"); + *O_dqpp = dqp; + return (0); +} + + +/* + * Release a reference to the dquot (decrement ref-count) + * and unlock it. If there is a group quota attached to this + * dquot, carefully release that too without tripping over + * deadlocks'n'stuff. + */ +void +xfs_qm_dqput( + xfs_dquot_t *dqp) +{ + xfs_dquot_t *gdqp; + + ASSERT(dqp->q_nrefs > 0); + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + xfs_dqtrace_entry(dqp, "DQPUT"); + + if (dqp->q_nrefs != 1) { + dqp->q_nrefs--; + xfs_dqunlock(dqp); + return; + } + + /* + * drop the dqlock and acquire the freelist and dqlock + * in the right order; but try to get it out-of-order first + */ + if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) { + xfs_dqtrace_entry(dqp, "DQPUT: FLLOCK-WAIT"); + xfs_dqunlock(dqp); + xfs_qm_freelist_lock(xfs_Gqm); + xfs_dqlock(dqp); + } + + while (1) { + gdqp = NULL; + + /* We can't depend on nrefs being == 1 here */ + if (--dqp->q_nrefs == 0) { + xfs_dqtrace_entry(dqp, "DQPUT: ON FREELIST"); + /* + * insert at end of the freelist. + */ + XQM_FREELIST_INSERT(&(xfs_Gqm->qm_dqfreelist), dqp); + + /* + * If we just added a udquot to the freelist, then + * we want to release the gdquot reference that + * it (probably) has. Otherwise it'll keep the + * gdquot from getting reclaimed. + */ + if ((gdqp = dqp->q_gdquot)) { + /* + * Avoid a recursive dqput call + */ + xfs_dqlock(gdqp); + dqp->q_gdquot = NULL; + } + + /* xfs_qm_freelist_print(&(xfs_Gqm->qm_dqfreelist), + "@@@@@++ Free list (after append) @@@@@+"); + */ + } + xfs_dqunlock(dqp); + + /* + * If we had a group quota inside the user quota as a hint, + * release it now. + */ + if (! gdqp) + break; + dqp = gdqp; + } + xfs_qm_freelist_unlock(xfs_Gqm); +} + +/* + * Release a dquot. Flush it if dirty, then dqput() it. + * dquot must not be locked. + */ +void +xfs_qm_dqrele( + xfs_dquot_t *dqp) +{ + ASSERT(dqp); + xfs_dqtrace_entry(dqp, "DQRELE"); + + xfs_dqlock(dqp); + /* + * We don't care to flush it if the dquot is dirty here. + * That will create stutters that we want to avoid. + * Instead we do a delayed write when we try to reclaim + * a dirty dquot. Also xfs_sync will take part of the burden... + */ + xfs_qm_dqput(dqp); +} + + +/* + * Write a modified dquot to disk. + * The dquot must be locked and the flush lock too taken by caller. + * The flush lock will not be unlocked until the dquot reaches the disk, + * but the dquot is free to be unlocked and modified by the caller + * in the interim. Dquot is still locked on return. This behavior is + * identical to that of inodes. + */ +int +xfs_qm_dqflush( + xfs_dquot_t *dqp, + uint flags) +{ + xfs_mount_t *mp; + xfs_buf_t *bp; + xfs_disk_dquot_t *ddqp; + int error; + + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + ASSERT(!completion_done(&dqp->q_flush)); + xfs_dqtrace_entry(dqp, "DQFLUSH"); + + /* + * If not dirty, nada. + */ + if (!XFS_DQ_IS_DIRTY(dqp)) { + xfs_dqfunlock(dqp); + return (0); + } + + /* + * Cant flush a pinned dquot. Wait for it. + */ + xfs_qm_dqunpin_wait(dqp); + + /* + * This may have been unpinned because the filesystem is shutting + * down forcibly. If that's the case we must not write this dquot + * to disk, because the log record didn't make it to disk! + */ + if (XFS_FORCED_SHUTDOWN(dqp->q_mount)) { + dqp->dq_flags &= ~(XFS_DQ_DIRTY); + xfs_dqfunlock(dqp); + return XFS_ERROR(EIO); + } + + /* + * Get the buffer containing the on-disk dquot + * We don't need a transaction envelope because we know that the + * the ondisk-dquot has already been allocated for. + */ + if ((error = xfs_qm_dqtobp(NULL, dqp, &ddqp, &bp, XFS_QMOPT_DOWARN))) { + xfs_dqtrace_entry(dqp, "DQTOBP FAIL"); + ASSERT(error != ENOENT); + /* + * Quotas could have gotten turned off (ESRCH) + */ + xfs_dqfunlock(dqp); + return (error); + } + + if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id), + 0, XFS_QMOPT_DOWARN, "dqflush (incore copy)")) { + xfs_force_shutdown(dqp->q_mount, SHUTDOWN_CORRUPT_INCORE); + return XFS_ERROR(EIO); + } + + /* This is the only portion of data that needs to persist */ + memcpy(ddqp, &(dqp->q_core), sizeof(xfs_disk_dquot_t)); + + /* + * Clear the dirty field and remember the flush lsn for later use. + */ + dqp->dq_flags &= ~(XFS_DQ_DIRTY); + mp = dqp->q_mount; + + /* lsn is 64 bits */ + spin_lock(&mp->m_ail_lock); + dqp->q_logitem.qli_flush_lsn = dqp->q_logitem.qli_item.li_lsn; + spin_unlock(&mp->m_ail_lock); + + /* + * Attach an iodone routine so that we can remove this dquot from the + * AIL and release the flush lock once the dquot is synced to disk. + */ + xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t *, xfs_log_item_t *)) + xfs_qm_dqflush_done, &(dqp->q_logitem.qli_item)); + /* + * If the buffer is pinned then push on the log so we won't + * get stuck waiting in the write for too long. + */ + if (XFS_BUF_ISPINNED(bp)) { + xfs_dqtrace_entry(dqp, "DQFLUSH LOG FORCE"); + xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); + } + + if (flags & XFS_QMOPT_DELWRI) { + xfs_bdwrite(mp, bp); + } else if (flags & XFS_QMOPT_ASYNC) { + error = xfs_bawrite(mp, bp); + } else { + error = xfs_bwrite(mp, bp); + } + xfs_dqtrace_entry(dqp, "DQFLUSH END"); + /* + * dqp is still locked, but caller is free to unlock it now. + */ + return (error); + +} + +/* + * This is the dquot flushing I/O completion routine. It is called + * from interrupt level when the buffer containing the dquot is + * flushed to disk. It is responsible for removing the dquot logitem + * from the AIL if it has not been re-logged, and unlocking the dquot's + * flush lock. This behavior is very similar to that of inodes.. + */ +/*ARGSUSED*/ +STATIC void +xfs_qm_dqflush_done( + xfs_buf_t *bp, + xfs_dq_logitem_t *qip) +{ + xfs_dquot_t *dqp; + + dqp = qip->qli_dquot; + + /* + * We only want to pull the item from the AIL if its + * location in the log has not changed since we started the flush. + * Thus, we only bother if the dquot's lsn has + * not changed. First we check the lsn outside the lock + * since it's cheaper, and then we recheck while + * holding the lock before removing the dquot from the AIL. + */ + if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && + qip->qli_item.li_lsn == qip->qli_flush_lsn) { + + spin_lock(&dqp->q_mount->m_ail_lock); + /* + * xfs_trans_delete_ail() drops the AIL lock. + */ + if (qip->qli_item.li_lsn == qip->qli_flush_lsn) + xfs_trans_delete_ail(dqp->q_mount, + (xfs_log_item_t*)qip); + else + spin_unlock(&dqp->q_mount->m_ail_lock); + } + + /* + * Release the dq's flush lock since we're done with it. + */ + xfs_dqfunlock(dqp); +} + +int +xfs_qm_dqlock_nowait( + xfs_dquot_t *dqp) +{ + return mutex_trylock(&dqp->q_qlock); +} + +void +xfs_dqlock( + xfs_dquot_t *dqp) +{ + mutex_lock(&dqp->q_qlock); +} + +void +xfs_dqunlock( + xfs_dquot_t *dqp) +{ + mutex_unlock(&(dqp->q_qlock)); + if (dqp->q_logitem.qli_dquot == dqp) { + /* Once was dqp->q_mount, but might just have been cleared */ + xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_mountp, + (xfs_log_item_t*)&(dqp->q_logitem)); + } +} + + +void +xfs_dqunlock_nonotify( + xfs_dquot_t *dqp) +{ + mutex_unlock(&(dqp->q_qlock)); +} + +void +xfs_dqlock2( + xfs_dquot_t *d1, + xfs_dquot_t *d2) +{ + if (d1 && d2) { + ASSERT(d1 != d2); + if (be32_to_cpu(d1->q_core.d_id) > + be32_to_cpu(d2->q_core.d_id)) { + xfs_dqlock(d2); + xfs_dqlock(d1); + } else { + xfs_dqlock(d1); + xfs_dqlock(d2); + } + } else { + if (d1) { + xfs_dqlock(d1); + } else if (d2) { + xfs_dqlock(d2); + } + } +} + + +/* + * Take a dquot out of the mount's dqlist as well as the hashlist. + * This is called via unmount as well as quotaoff, and the purge + * will always succeed unless there are soft (temp) references + * outstanding. + * + * This returns 0 if it was purged, 1 if it wasn't. It's not an error code + * that we're returning! XXXsup - not cool. + */ +/* ARGSUSED */ +int +xfs_qm_dqpurge( + xfs_dquot_t *dqp) +{ + xfs_dqhash_t *thishash; + xfs_mount_t *mp = dqp->q_mount; + + ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp)); + ASSERT(XFS_DQ_IS_HASH_LOCKED(dqp->q_hash)); + + xfs_dqlock(dqp); + /* + * We really can't afford to purge a dquot that is + * referenced, because these are hard refs. + * It shouldn't happen in general because we went thru _all_ inodes in + * dqrele_all_inodes before calling this and didn't let the mountlock go. + * However it is possible that we have dquots with temporary + * references that are not attached to an inode. e.g. see xfs_setattr(). + */ + if (dqp->q_nrefs != 0) { + xfs_dqunlock(dqp); + XFS_DQ_HASH_UNLOCK(dqp->q_hash); + return (1); + } + + ASSERT(XFS_DQ_IS_ON_FREELIST(dqp)); + + /* + * If we're turning off quotas, we have to make sure that, for + * example, we don't delete quota disk blocks while dquots are + * in the process of getting written to those disk blocks. + * This dquot might well be on AIL, and we can't leave it there + * if we're turning off quotas. Basically, we need this flush + * lock, and are willing to block on it. + */ + if (!xfs_dqflock_nowait(dqp)) { + /* + * Block on the flush lock after nudging dquot buffer, + * if it is incore. + */ + xfs_qm_dqflock_pushbuf_wait(dqp); + } + + /* + * XXXIf we're turning this type of quotas off, we don't care + * about the dirty metadata sitting in this dquot. OTOH, if + * we're unmounting, we do care, so we flush it and wait. + */ + if (XFS_DQ_IS_DIRTY(dqp)) { + int error; + xfs_dqtrace_entry(dqp, "DQPURGE ->DQFLUSH: DQDIRTY"); + /* dqflush unlocks dqflock */ + /* + * Given that dqpurge is a very rare occurrence, it is OK + * that we're holding the hashlist and mplist locks + * across the disk write. But, ... XXXsup + * + * We don't care about getting disk errors here. We need + * to purge this dquot anyway, so we go ahead regardless. + */ + error = xfs_qm_dqflush(dqp, XFS_QMOPT_SYNC); + if (error) + xfs_fs_cmn_err(CE_WARN, mp, + "xfs_qm_dqpurge: dquot %p flush failed", dqp); + xfs_dqflock(dqp); + } + ASSERT(dqp->q_pincount == 0); + ASSERT(XFS_FORCED_SHUTDOWN(mp) || + !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); + + thishash = dqp->q_hash; + XQM_HASHLIST_REMOVE(thishash, dqp); + XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(mp)), dqp); + /* + * XXX Move this to the front of the freelist, if we can get the + * freelist lock. + */ + ASSERT(XFS_DQ_IS_ON_FREELIST(dqp)); + + dqp->q_mount = NULL; + dqp->q_hash = NULL; + dqp->dq_flags = XFS_DQ_INACTIVE; + memset(&dqp->q_core, 0, sizeof(dqp->q_core)); + xfs_dqfunlock(dqp); + xfs_dqunlock(dqp); + XFS_DQ_HASH_UNLOCK(thishash); + return (0); +} + + +#ifdef QUOTADEBUG +void +xfs_qm_dqprint(xfs_dquot_t *dqp) +{ + cmn_err(CE_DEBUG, "-----------KERNEL DQUOT----------------"); + cmn_err(CE_DEBUG, "---- dquotID = %d", + (int)be32_to_cpu(dqp->q_core.d_id)); + cmn_err(CE_DEBUG, "---- type = %s", DQFLAGTO_TYPESTR(dqp)); + cmn_err(CE_DEBUG, "---- fs = 0x%p", dqp->q_mount); + cmn_err(CE_DEBUG, "---- blkno = 0x%x", (int) dqp->q_blkno); + cmn_err(CE_DEBUG, "---- boffset = 0x%x", (int) dqp->q_bufoffset); + cmn_err(CE_DEBUG, "---- blkhlimit = %Lu (0x%x)", + be64_to_cpu(dqp->q_core.d_blk_hardlimit), + (int)be64_to_cpu(dqp->q_core.d_blk_hardlimit)); + cmn_err(CE_DEBUG, "---- blkslimit = %Lu (0x%x)", + be64_to_cpu(dqp->q_core.d_blk_softlimit), + (int)be64_to_cpu(dqp->q_core.d_blk_softlimit)); + cmn_err(CE_DEBUG, "---- inohlimit = %Lu (0x%x)", + be64_to_cpu(dqp->q_core.d_ino_hardlimit), + (int)be64_to_cpu(dqp->q_core.d_ino_hardlimit)); + cmn_err(CE_DEBUG, "---- inoslimit = %Lu (0x%x)", + be64_to_cpu(dqp->q_core.d_ino_softlimit), + (int)be64_to_cpu(dqp->q_core.d_ino_softlimit)); + cmn_err(CE_DEBUG, "---- bcount = %Lu (0x%x)", + be64_to_cpu(dqp->q_core.d_bcount), + (int)be64_to_cpu(dqp->q_core.d_bcount)); + cmn_err(CE_DEBUG, "---- icount = %Lu (0x%x)", + be64_to_cpu(dqp->q_core.d_icount), + (int)be64_to_cpu(dqp->q_core.d_icount)); + cmn_err(CE_DEBUG, "---- btimer = %d", + (int)be32_to_cpu(dqp->q_core.d_btimer)); + cmn_err(CE_DEBUG, "---- itimer = %d", + (int)be32_to_cpu(dqp->q_core.d_itimer)); + cmn_err(CE_DEBUG, "---------------------------"); +} +#endif + +/* + * Give the buffer a little push if it is incore and + * wait on the flush lock. + */ +void +xfs_qm_dqflock_pushbuf_wait( + xfs_dquot_t *dqp) +{ + xfs_buf_t *bp; + + /* + * Check to see if the dquot has been flushed delayed + * write. If so, grab its buffer and send it + * out immediately. We'll be able to acquire + * the flush lock when the I/O completes. + */ + bp = xfs_incore(dqp->q_mount->m_ddev_targp, dqp->q_blkno, + XFS_QI_DQCHUNKLEN(dqp->q_mount), + XFS_INCORE_TRYLOCK); + if (bp != NULL) { + if (XFS_BUF_ISDELAYWRITE(bp)) { + int error; + if (XFS_BUF_ISPINNED(bp)) { + xfs_log_force(dqp->q_mount, + (xfs_lsn_t)0, + XFS_LOG_FORCE); + } + error = xfs_bawrite(dqp->q_mount, bp); + if (error) + xfs_fs_cmn_err(CE_WARN, dqp->q_mount, + "xfs_qm_dqflock_pushbuf_wait: " + "pushbuf error %d on dqp %p, bp %p", + error, dqp, bp); + } else { + xfs_buf_relse(bp); + } + } + xfs_dqflock(dqp); +} diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h new file mode 100644 index 0000000..8958d0f --- /dev/null +++ b/fs/xfs/quota/xfs_dquot.h @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_DQUOT_H__ +#define __XFS_DQUOT_H__ + +/* + * Dquots are structures that hold quota information about a user or a group, + * much like inodes are for files. In fact, dquots share many characteristics + * with inodes. However, dquots can also be a centralized resource, relative + * to a collection of inodes. In this respect, dquots share some characteristics + * of the superblock. + * XFS dquots exploit both those in its algorithms. They make every attempt + * to not be a bottleneck when quotas are on and have minimal impact, if any, + * when quotas are off. + */ + +/* + * The hash chain headers (hash buckets) + */ +typedef struct xfs_dqhash { + struct xfs_dquot *qh_next; + mutex_t qh_lock; + uint qh_version; /* ever increasing version */ + uint qh_nelems; /* number of dquots on the list */ +} xfs_dqhash_t; + +typedef struct xfs_dqlink { + struct xfs_dquot *ql_next; /* forward link */ + struct xfs_dquot **ql_prevp; /* pointer to prev ql_next */ +} xfs_dqlink_t; + +struct xfs_mount; +struct xfs_trans; + +/* + * This is the marker which is designed to occupy the first few + * bytes of the xfs_dquot_t structure. Even inside this, the freelist pointers + * must come first. + * This serves as the marker ("sentinel") when we have to restart list + * iterations because of locking considerations. + */ +typedef struct xfs_dqmarker { + struct xfs_dquot*dqm_flnext; /* link to freelist: must be first */ + struct xfs_dquot*dqm_flprev; + xfs_dqlink_t dqm_mplist; /* link to mount's list of dquots */ + xfs_dqlink_t dqm_hashlist; /* link to the hash chain */ + uint dqm_flags; /* various flags (XFS_DQ_*) */ +} xfs_dqmarker_t; + +/* + * The incore dquot structure + */ +typedef struct xfs_dquot { + xfs_dqmarker_t q_lists; /* list ptrs, q_flags (marker) */ + xfs_dqhash_t *q_hash; /* the hashchain header */ + struct xfs_mount*q_mount; /* filesystem this relates to */ + struct xfs_trans*q_transp; /* trans this belongs to currently */ + uint q_nrefs; /* # active refs from inodes */ + xfs_daddr_t q_blkno; /* blkno of dquot buffer */ + int q_bufoffset; /* off of dq in buffer (# dquots) */ + xfs_fileoff_t q_fileoffset; /* offset in quotas file */ + + struct xfs_dquot*q_gdquot; /* group dquot, hint only */ + xfs_disk_dquot_t q_core; /* actual usage & quotas */ + xfs_dq_logitem_t q_logitem; /* dquot log item */ + xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */ + xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */ + xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */ + mutex_t q_qlock; /* quota lock */ + struct completion q_flush; /* flush completion queue */ + uint q_pincount; /* pin count for this dquot */ + sv_t q_pinwait; /* sync var for pinning */ +#ifdef XFS_DQUOT_TRACE + struct ktrace *q_trace; /* trace header structure */ +#endif +} xfs_dquot_t; + + +#define dq_flnext q_lists.dqm_flnext +#define dq_flprev q_lists.dqm_flprev +#define dq_mplist q_lists.dqm_mplist +#define dq_hashlist q_lists.dqm_hashlist +#define dq_flags q_lists.dqm_flags + +#define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++) + +#ifdef DEBUG +static inline int +XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp) +{ + if (mutex_trylock(&dqp->q_qlock)) { + mutex_unlock(&dqp->q_qlock); + return 0; + } + return 1; +} +#endif + + +/* + * Manage the q_flush completion queue embedded in the dquot. This completion + * queue synchronizes processes attempting to flush the in-core dquot back to + * disk. + */ +static inline void xfs_dqflock(xfs_dquot_t *dqp) +{ + wait_for_completion(&dqp->q_flush); +} + +static inline int xfs_dqflock_nowait(xfs_dquot_t *dqp) +{ + return try_wait_for_completion(&dqp->q_flush); +} + +static inline void xfs_dqfunlock(xfs_dquot_t *dqp) +{ + complete(&dqp->q_flush); +} + +#define XFS_DQ_IS_ON_FREELIST(dqp) ((dqp)->dq_flnext != (dqp)) +#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) +#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) +#define XFS_QM_ISPDQ(dqp) ((dqp)->dq_flags & XFS_DQ_PROJ) +#define XFS_QM_ISGDQ(dqp) ((dqp)->dq_flags & XFS_DQ_GROUP) +#define XFS_DQ_TO_QINF(dqp) ((dqp)->q_mount->m_quotainfo) +#define XFS_DQ_TO_QIP(dqp) (XFS_QM_ISUDQ(dqp) ? \ + XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \ + XFS_DQ_TO_QINF(dqp)->qi_gquotaip) + +#define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \ + (XFS_IS_UQUOTA_ON((d)->q_mount)) : \ + (XFS_IS_OQUOTA_ON((d)->q_mount)))) + +#ifdef XFS_DQUOT_TRACE +/* + * Dquot Tracing stuff. + */ +#define DQUOT_TRACE_SIZE 64 +#define DQUOT_KTRACE_ENTRY 1 + +extern void __xfs_dqtrace_entry(xfs_dquot_t *dqp, char *func, + void *, xfs_inode_t *); +#define xfs_dqtrace_entry_ino(a,b,ip) \ + __xfs_dqtrace_entry((a), (b), (void*)__return_address, (ip)) +#define xfs_dqtrace_entry(a,b) \ + __xfs_dqtrace_entry((a), (b), (void*)__return_address, NULL) +#else +#define xfs_dqtrace_entry(a,b) +#define xfs_dqtrace_entry_ino(a,b,ip) +#endif + +#ifdef QUOTADEBUG +extern void xfs_qm_dqprint(xfs_dquot_t *); +#else +#define xfs_qm_dqprint(a) +#endif + +extern void xfs_qm_dqdestroy(xfs_dquot_t *); +extern int xfs_qm_dqflush(xfs_dquot_t *, uint); +extern int xfs_qm_dqpurge(xfs_dquot_t *); +extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); +extern int xfs_qm_dqlock_nowait(xfs_dquot_t *); +extern void xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp); +extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, + xfs_disk_dquot_t *); +extern void xfs_qm_adjust_dqlimits(xfs_mount_t *, + xfs_disk_dquot_t *); +extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *, + xfs_dqid_t, uint, uint, xfs_dquot_t **); +extern void xfs_qm_dqput(xfs_dquot_t *); +extern void xfs_qm_dqrele(xfs_dquot_t *); +extern void xfs_dqlock(xfs_dquot_t *); +extern void xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *); +extern void xfs_dqunlock(xfs_dquot_t *); +extern void xfs_dqunlock_nonotify(xfs_dquot_t *); + +#endif /* __XFS_DQUOT_H__ */ diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c new file mode 100644 index 0000000..f028644 --- /dev/null +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -0,0 +1,676 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_alloc.h" +#include "xfs_dmapi.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dir2_sf.h" +#include "xfs_attr_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_bmap.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_itable.h" +#include "xfs_rw.h" +#include "xfs_acl.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_trans_priv.h" +#include "xfs_qm.h" + +/* + * returns the number of iovecs needed to log the given dquot item. + */ +/* ARGSUSED */ +STATIC uint +xfs_qm_dquot_logitem_size( + xfs_dq_logitem_t *logitem) +{ + /* + * we need only two iovecs, one for the format, one for the real thing + */ + return (2); +} + +/* + * fills in the vector of log iovecs for the given dquot log item. + */ +STATIC void +xfs_qm_dquot_logitem_format( + xfs_dq_logitem_t *logitem, + xfs_log_iovec_t *logvec) +{ + ASSERT(logitem); + ASSERT(logitem->qli_dquot); + + logvec->i_addr = (xfs_caddr_t)&logitem->qli_format; + logvec->i_len = sizeof(xfs_dq_logformat_t); + XLOG_VEC_SET_TYPE(logvec, XLOG_REG_TYPE_QFORMAT); + logvec++; + logvec->i_addr = (xfs_caddr_t)&logitem->qli_dquot->q_core; + logvec->i_len = sizeof(xfs_disk_dquot_t); + XLOG_VEC_SET_TYPE(logvec, XLOG_REG_TYPE_DQUOT); + + ASSERT(2 == logitem->qli_item.li_desc->lid_size); + logitem->qli_format.qlf_size = 2; + +} + +/* + * Increment the pin count of the given dquot. + * This value is protected by pinlock spinlock in the xQM structure. + */ +STATIC void +xfs_qm_dquot_logitem_pin( + xfs_dq_logitem_t *logitem) +{ + xfs_dquot_t *dqp; + + dqp = logitem->qli_dquot; + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); + dqp->q_pincount++; + spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); +} + +/* + * Decrement the pin count of the given dquot, and wake up + * anyone in xfs_dqwait_unpin() if the count goes to 0. The + * dquot must have been previously pinned with a call to xfs_dqpin(). + */ +/* ARGSUSED */ +STATIC void +xfs_qm_dquot_logitem_unpin( + xfs_dq_logitem_t *logitem, + int stale) +{ + xfs_dquot_t *dqp; + + dqp = logitem->qli_dquot; + ASSERT(dqp->q_pincount > 0); + spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); + dqp->q_pincount--; + if (dqp->q_pincount == 0) { + sv_broadcast(&dqp->q_pinwait); + } + spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); +} + +/* ARGSUSED */ +STATIC void +xfs_qm_dquot_logitem_unpin_remove( + xfs_dq_logitem_t *logitem, + xfs_trans_t *tp) +{ + xfs_qm_dquot_logitem_unpin(logitem, 0); +} + +/* + * Given the logitem, this writes the corresponding dquot entry to disk + * asynchronously. This is called with the dquot entry securely locked; + * we simply get xfs_qm_dqflush() to do the work, and unlock the dquot + * at the end. + */ +STATIC void +xfs_qm_dquot_logitem_push( + xfs_dq_logitem_t *logitem) +{ + xfs_dquot_t *dqp; + int error; + + dqp = logitem->qli_dquot; + + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + ASSERT(!completion_done(&dqp->q_flush)); + + /* + * Since we were able to lock the dquot's flush lock and + * we found it on the AIL, the dquot must be dirty. This + * is because the dquot is removed from the AIL while still + * holding the flush lock in xfs_dqflush_done(). Thus, if + * we found it in the AIL and were able to obtain the flush + * lock without sleeping, then there must not have been + * anyone in the process of flushing the dquot. + */ + error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); + if (error) + xfs_fs_cmn_err(CE_WARN, dqp->q_mount, + "xfs_qm_dquot_logitem_push: push error %d on dqp %p", + error, dqp); + xfs_dqunlock(dqp); +} + +/*ARGSUSED*/ +STATIC xfs_lsn_t +xfs_qm_dquot_logitem_committed( + xfs_dq_logitem_t *l, + xfs_lsn_t lsn) +{ + /* + * We always re-log the entire dquot when it becomes dirty, + * so, the latest copy _is_ the only one that matters. + */ + return (lsn); +} + + +/* + * This is called to wait for the given dquot to be unpinned. + * Most of these pin/unpin routines are plagiarized from inode code. + */ +void +xfs_qm_dqunpin_wait( + xfs_dquot_t *dqp) +{ + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + if (dqp->q_pincount == 0) { + return; + } + + /* + * Give the log a push so we don't wait here too long. + */ + xfs_log_force(dqp->q_mount, (xfs_lsn_t)0, XFS_LOG_FORCE); + spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); + if (dqp->q_pincount == 0) { + spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); + return; + } + sv_wait(&(dqp->q_pinwait), PINOD, + &(XFS_DQ_TO_QINF(dqp)->qi_pinlock), s); +} + +/* + * This is called when IOP_TRYLOCK returns XFS_ITEM_PUSHBUF to indicate that + * the dquot is locked by us, but the flush lock isn't. So, here we are + * going to see if the relevant dquot buffer is incore, waiting on DELWRI. + * If so, we want to push it out to help us take this item off the AIL as soon + * as possible. + * + * We must not be holding the AIL lock at this point. Calling incore() to + * search the buffer cache can be a time consuming thing, and AIL lock is a + * spinlock. + */ +STATIC void +xfs_qm_dquot_logitem_pushbuf( + xfs_dq_logitem_t *qip) +{ + xfs_dquot_t *dqp; + xfs_mount_t *mp; + xfs_buf_t *bp; + uint dopush; + + dqp = qip->qli_dquot; + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + + /* + * The qli_pushbuf_flag keeps others from + * trying to duplicate our effort. + */ + ASSERT(qip->qli_pushbuf_flag != 0); + ASSERT(qip->qli_push_owner == current_pid()); + + /* + * If flushlock isn't locked anymore, chances are that the + * inode flush completed and the inode was taken off the AIL. + * So, just get out. + */ + if (completion_done(&dqp->q_flush) || + ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) { + qip->qli_pushbuf_flag = 0; + xfs_dqunlock(dqp); + return; + } + mp = dqp->q_mount; + bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno, + XFS_QI_DQCHUNKLEN(mp), + XFS_INCORE_TRYLOCK); + if (bp != NULL) { + if (XFS_BUF_ISDELAYWRITE(bp)) { + dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && + !completion_done(&dqp->q_flush)); + qip->qli_pushbuf_flag = 0; + xfs_dqunlock(dqp); + + if (XFS_BUF_ISPINNED(bp)) { + xfs_log_force(mp, (xfs_lsn_t)0, + XFS_LOG_FORCE); + } + if (dopush) { + int error; +#ifdef XFSRACEDEBUG + delay_for_intr(); + delay(300); +#endif + error = xfs_bawrite(mp, bp); + if (error) + xfs_fs_cmn_err(CE_WARN, mp, + "xfs_qm_dquot_logitem_pushbuf: pushbuf error %d on qip %p, bp %p", + error, qip, bp); + } else { + xfs_buf_relse(bp); + } + } else { + qip->qli_pushbuf_flag = 0; + xfs_dqunlock(dqp); + xfs_buf_relse(bp); + } + return; + } + + qip->qli_pushbuf_flag = 0; + xfs_dqunlock(dqp); +} + +/* + * This is called to attempt to lock the dquot associated with this + * dquot log item. Don't sleep on the dquot lock or the flush lock. + * If the flush lock is already held, indicating that the dquot has + * been or is in the process of being flushed, then see if we can + * find the dquot's buffer in the buffer cache without sleeping. If + * we can and it is marked delayed write, then we want to send it out. + * We delay doing so until the push routine, though, to avoid sleeping + * in any device strategy routines. + */ +STATIC uint +xfs_qm_dquot_logitem_trylock( + xfs_dq_logitem_t *qip) +{ + xfs_dquot_t *dqp; + uint retval; + + dqp = qip->qli_dquot; + if (dqp->q_pincount > 0) + return (XFS_ITEM_PINNED); + + if (! xfs_qm_dqlock_nowait(dqp)) + return (XFS_ITEM_LOCKED); + + retval = XFS_ITEM_SUCCESS; + if (!xfs_dqflock_nowait(dqp)) { + /* + * The dquot is already being flushed. It may have been + * flushed delayed write, however, and we don't want to + * get stuck waiting for that to complete. So, we want to check + * to see if we can lock the dquot's buffer without sleeping. + * If we can and it is marked for delayed write, then we + * hold it and send it out from the push routine. We don't + * want to do that now since we might sleep in the device + * strategy routine. We also don't want to grab the buffer lock + * here because we'd like not to call into the buffer cache + * while holding the AIL lock. + * Make sure to only return PUSHBUF if we set pushbuf_flag + * ourselves. If someone else is doing it then we don't + * want to go to the push routine and duplicate their efforts. + */ + if (qip->qli_pushbuf_flag == 0) { + qip->qli_pushbuf_flag = 1; + ASSERT(qip->qli_format.qlf_blkno == dqp->q_blkno); +#ifdef DEBUG + qip->qli_push_owner = current_pid(); +#endif + /* + * The dquot is left locked. + */ + retval = XFS_ITEM_PUSHBUF; + } else { + retval = XFS_ITEM_FLUSHING; + xfs_dqunlock_nonotify(dqp); + } + } + + ASSERT(qip->qli_item.li_flags & XFS_LI_IN_AIL); + return (retval); +} + + +/* + * Unlock the dquot associated with the log item. + * Clear the fields of the dquot and dquot log item that + * are specific to the current transaction. If the + * hold flags is set, do not unlock the dquot. + */ +STATIC void +xfs_qm_dquot_logitem_unlock( + xfs_dq_logitem_t *ql) +{ + xfs_dquot_t *dqp; + + ASSERT(ql != NULL); + dqp = ql->qli_dquot; + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + + /* + * Clear the transaction pointer in the dquot + */ + dqp->q_transp = NULL; + + /* + * dquots are never 'held' from getting unlocked at the end of + * a transaction. Their locking and unlocking is hidden inside the + * transaction layer, within trans_commit. Hence, no LI_HOLD flag + * for the logitem. + */ + xfs_dqunlock(dqp); +} + + +/* + * this needs to stamp an lsn into the dquot, I think. + * rpc's that look at user dquot's would then have to + * push on the dependency recorded in the dquot + */ +/* ARGSUSED */ +STATIC void +xfs_qm_dquot_logitem_committing( + xfs_dq_logitem_t *l, + xfs_lsn_t lsn) +{ + return; +} + + +/* + * This is the ops vector for dquots + */ +static struct xfs_item_ops xfs_dquot_item_ops = { + .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_size, + .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) + xfs_qm_dquot_logitem_format, + .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_pin, + .iop_unpin = (void(*)(xfs_log_item_t*, int)) + xfs_qm_dquot_logitem_unpin, + .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*)) + xfs_qm_dquot_logitem_unpin_remove, + .iop_trylock = (uint(*)(xfs_log_item_t*)) + xfs_qm_dquot_logitem_trylock, + .iop_unlock = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_unlock, + .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) + xfs_qm_dquot_logitem_committed, + .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_push, + .iop_pushbuf = (void(*)(xfs_log_item_t*)) + xfs_qm_dquot_logitem_pushbuf, + .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) + xfs_qm_dquot_logitem_committing +}; + +/* + * Initialize the dquot log item for a newly allocated dquot. + * The dquot isn't locked at this point, but it isn't on any of the lists + * either, so we don't care. + */ +void +xfs_qm_dquot_logitem_init( + struct xfs_dquot *dqp) +{ + xfs_dq_logitem_t *lp; + lp = &dqp->q_logitem; + + lp->qli_item.li_type = XFS_LI_DQUOT; + lp->qli_item.li_ops = &xfs_dquot_item_ops; + lp->qli_item.li_mountp = dqp->q_mount; + lp->qli_dquot = dqp; + lp->qli_format.qlf_type = XFS_LI_DQUOT; + lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id); + lp->qli_format.qlf_blkno = dqp->q_blkno; + lp->qli_format.qlf_len = 1; + /* + * This is just the offset of this dquot within its buffer + * (which is currently 1 FSB and probably won't change). + * Hence 32 bits for this offset should be just fine. + * Alternatively, we can store (bufoffset / sizeof(xfs_dqblk_t)) + * here, and recompute it at recovery time. + */ + lp->qli_format.qlf_boffset = (__uint32_t)dqp->q_bufoffset; +} + +/*------------------ QUOTAOFF LOG ITEMS -------------------*/ + +/* + * This returns the number of iovecs needed to log the given quotaoff item. + * We only need 1 iovec for an quotaoff item. It just logs the + * quotaoff_log_format structure. + */ +/*ARGSUSED*/ +STATIC uint +xfs_qm_qoff_logitem_size(xfs_qoff_logitem_t *qf) +{ + return (1); +} + +/* + * This is called to fill in the vector of log iovecs for the + * given quotaoff log item. We use only 1 iovec, and we point that + * at the quotaoff_log_format structure embedded in the quotaoff item. + * It is at this point that we assert that all of the extent + * slots in the quotaoff item have been filled. + */ +STATIC void +xfs_qm_qoff_logitem_format(xfs_qoff_logitem_t *qf, + xfs_log_iovec_t *log_vector) +{ + ASSERT(qf->qql_format.qf_type == XFS_LI_QUOTAOFF); + + log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format); + log_vector->i_len = sizeof(xfs_qoff_logitem_t); + XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_QUOTAOFF); + qf->qql_format.qf_size = 1; +} + + +/* + * Pinning has no meaning for an quotaoff item, so just return. + */ +/*ARGSUSED*/ +STATIC void +xfs_qm_qoff_logitem_pin(xfs_qoff_logitem_t *qf) +{ + return; +} + + +/* + * Since pinning has no meaning for an quotaoff item, unpinning does + * not either. + */ +/*ARGSUSED*/ +STATIC void +xfs_qm_qoff_logitem_unpin(xfs_qoff_logitem_t *qf, int stale) +{ + return; +} + +/*ARGSUSED*/ +STATIC void +xfs_qm_qoff_logitem_unpin_remove(xfs_qoff_logitem_t *qf, xfs_trans_t *tp) +{ + return; +} + +/* + * Quotaoff items have no locking, so just return success. + */ +/*ARGSUSED*/ +STATIC uint +xfs_qm_qoff_logitem_trylock(xfs_qoff_logitem_t *qf) +{ + return XFS_ITEM_LOCKED; +} + +/* + * Quotaoff items have no locking or pushing, so return failure + * so that the caller doesn't bother with us. + */ +/*ARGSUSED*/ +STATIC void +xfs_qm_qoff_logitem_unlock(xfs_qoff_logitem_t *qf) +{ + return; +} + +/* + * The quotaoff-start-item is logged only once and cannot be moved in the log, + * so simply return the lsn at which it's been logged. + */ +/*ARGSUSED*/ +STATIC xfs_lsn_t +xfs_qm_qoff_logitem_committed(xfs_qoff_logitem_t *qf, xfs_lsn_t lsn) +{ + return (lsn); +} + +/* + * There isn't much you can do to push on an quotaoff item. It is simply + * stuck waiting for the log to be flushed to disk. + */ +/*ARGSUSED*/ +STATIC void +xfs_qm_qoff_logitem_push(xfs_qoff_logitem_t *qf) +{ + return; +} + + +/*ARGSUSED*/ +STATIC xfs_lsn_t +xfs_qm_qoffend_logitem_committed( + xfs_qoff_logitem_t *qfe, + xfs_lsn_t lsn) +{ + xfs_qoff_logitem_t *qfs; + + qfs = qfe->qql_start_lip; + spin_lock(&qfs->qql_item.li_mountp->m_ail_lock); + /* + * Delete the qoff-start logitem from the AIL. + * xfs_trans_delete_ail() drops the AIL lock. + */ + xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs); + kmem_free(qfs); + kmem_free(qfe); + return (xfs_lsn_t)-1; +} + +/* + * XXX rcc - don't know quite what to do with this. I think we can + * just ignore it. The only time that isn't the case is if we allow + * the client to somehow see that quotas have been turned off in which + * we can't allow that to get back until the quotaoff hits the disk. + * So how would that happen? Also, do we need different routines for + * quotaoff start and quotaoff end? I suspect the answer is yes but + * to be sure, I need to look at the recovery code and see how quota off + * recovery is handled (do we roll forward or back or do something else). + * If we roll forwards or backwards, then we need two separate routines, + * one that does nothing and one that stamps in the lsn that matters + * (truly makes the quotaoff irrevocable). If we do something else, + * then maybe we don't need two. + */ +/* ARGSUSED */ +STATIC void +xfs_qm_qoff_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn) +{ + return; +} + +/* ARGSUSED */ +STATIC void +xfs_qm_qoffend_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn) +{ + return; +} + +static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { + .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size, + .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) + xfs_qm_qoff_logitem_format, + .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin, + .iop_unpin = (void(*)(xfs_log_item_t* ,int)) + xfs_qm_qoff_logitem_unpin, + .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*)) + xfs_qm_qoff_logitem_unpin_remove, + .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock, + .iop_unlock = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unlock, + .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) + xfs_qm_qoffend_logitem_committed, + .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push, + .iop_pushbuf = NULL, + .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) + xfs_qm_qoffend_logitem_committing +}; + +/* + * This is the ops vector shared by all quotaoff-start log items. + */ +static struct xfs_item_ops xfs_qm_qoff_logitem_ops = { + .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size, + .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) + xfs_qm_qoff_logitem_format, + .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin, + .iop_unpin = (void(*)(xfs_log_item_t*, int)) + xfs_qm_qoff_logitem_unpin, + .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*)) + xfs_qm_qoff_logitem_unpin_remove, + .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock, + .iop_unlock = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unlock, + .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) + xfs_qm_qoff_logitem_committed, + .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push, + .iop_pushbuf = NULL, + .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) + xfs_qm_qoff_logitem_committing +}; + +/* + * Allocate and initialize an quotaoff item of the correct quota type(s). + */ +xfs_qoff_logitem_t * +xfs_qm_qoff_logitem_init( + struct xfs_mount *mp, + xfs_qoff_logitem_t *start, + uint flags) +{ + xfs_qoff_logitem_t *qf; + + qf = (xfs_qoff_logitem_t*) kmem_zalloc(sizeof(xfs_qoff_logitem_t), KM_SLEEP); + + qf->qql_item.li_type = XFS_LI_QUOTAOFF; + if (start) + qf->qql_item.li_ops = &xfs_qm_qoffend_logitem_ops; + else + qf->qql_item.li_ops = &xfs_qm_qoff_logitem_ops; + qf->qql_item.li_mountp = mp; + qf->qql_format.qf_type = XFS_LI_QUOTAOFF; + qf->qql_format.qf_flags = flags; + qf->qql_start_lip = start; + return (qf); +} diff --git a/fs/xfs/quota/xfs_dquot_item.h b/fs/xfs/quota/xfs_dquot_item.h new file mode 100644 index 0000000..5a63253 --- /dev/null +++ b/fs/xfs/quota/xfs_dquot_item.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_DQUOT_ITEM_H__ +#define __XFS_DQUOT_ITEM_H__ + +struct xfs_dquot; +struct xfs_trans; +struct xfs_mount; +struct xfs_qoff_logitem; + +typedef struct xfs_dq_logitem { + xfs_log_item_t qli_item; /* common portion */ + struct xfs_dquot *qli_dquot; /* dquot ptr */ + xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ + unsigned short qli_pushbuf_flag; /* 1 bit used in push_ail */ +#ifdef DEBUG + uint64_t qli_push_owner; +#endif + xfs_dq_logformat_t qli_format; /* logged structure */ +} xfs_dq_logitem_t; + +typedef struct xfs_qoff_logitem { + xfs_log_item_t qql_item; /* common portion */ + struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */ + xfs_qoff_logformat_t qql_format; /* logged structure */ +} xfs_qoff_logitem_t; + + +extern void xfs_qm_dquot_logitem_init(struct xfs_dquot *); +extern xfs_qoff_logitem_t *xfs_qm_qoff_logitem_init(struct xfs_mount *, + struct xfs_qoff_logitem *, uint); +extern xfs_qoff_logitem_t *xfs_trans_get_qoff_item(struct xfs_trans *, + struct xfs_qoff_logitem *, uint); +extern void xfs_trans_log_quotaoff_item(struct xfs_trans *, + struct xfs_qoff_logitem *); + +#endif /* __XFS_DQUOT_ITEM_H__ */ diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c new file mode 100644 index 0000000..df0ffef --- /dev/null +++ b/fs/xfs/quota/xfs_qm.c @@ -0,0 +1,2833 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_clnt.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_alloc.h" +#include "xfs_dmapi.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dir2_sf.h" +#include "xfs_attr_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" +#include "xfs_itable.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_bmap.h" +#include "xfs_rw.h" +#include "xfs_acl.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_trans_space.h" +#include "xfs_utils.h" +#include "xfs_qm.h" + +/* + * The global quota manager. There is only one of these for the entire + * system, _not_ one per file system. XQM keeps track of the overall + * quota functionality, including maintaining the freelist and hash + * tables of dquots. + */ +mutex_t xfs_Gqm_lock; +struct xfs_qm *xfs_Gqm; +uint ndquot; + +kmem_zone_t *qm_dqzone; +kmem_zone_t *qm_dqtrxzone; + +static cred_t xfs_zerocr; + +STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int); +STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); + +STATIC void xfs_qm_freelist_init(xfs_frlist_t *); +STATIC void xfs_qm_freelist_destroy(xfs_frlist_t *); +STATIC int xfs_qm_mplist_nowait(xfs_mount_t *); +STATIC int xfs_qm_dqhashlock_nowait(xfs_dquot_t *); + +STATIC int xfs_qm_init_quotainos(xfs_mount_t *); +STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); +STATIC int xfs_qm_shake(int, gfp_t); + +static struct shrinker xfs_qm_shaker = { + .shrink = xfs_qm_shake, + .seeks = DEFAULT_SEEKS, +}; + +#ifdef DEBUG +extern mutex_t qcheck_lock; +#endif + +#ifdef QUOTADEBUG +#define XQM_LIST_PRINT(l, NXT, title) \ +{ \ + xfs_dquot_t *dqp; int i = 0; \ + cmn_err(CE_DEBUG, "%s (#%d)", title, (int) (l)->qh_nelems); \ + for (dqp = (l)->qh_next; dqp != NULL; dqp = dqp->NXT) { \ + cmn_err(CE_DEBUG, " %d. \"%d (%s)\" " \ + "bcnt = %d, icnt = %d, refs = %d", \ + ++i, (int) be32_to_cpu(dqp->q_core.d_id), \ + DQFLAGTO_TYPESTR(dqp), \ + (int) be64_to_cpu(dqp->q_core.d_bcount), \ + (int) be64_to_cpu(dqp->q_core.d_icount), \ + (int) dqp->q_nrefs); } \ +} +#else +#define XQM_LIST_PRINT(l, NXT, title) do { } while (0) +#endif + +/* + * Initialize the XQM structure. + * Note that there is not one quota manager per file system. + */ +STATIC struct xfs_qm * +xfs_Gqm_init(void) +{ + xfs_dqhash_t *udqhash, *gdqhash; + xfs_qm_t *xqm; + size_t hsize; + uint i; + + /* + * Initialize the dquot hash tables. + */ + udqhash = kmem_zalloc_greedy(&hsize, + XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t), + XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t), + KM_SLEEP | KM_MAYFAIL | KM_LARGE); + gdqhash = kmem_zalloc(hsize, KM_SLEEP | KM_LARGE); + hsize /= sizeof(xfs_dqhash_t); + ndquot = hsize << 8; + + xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP); + xqm->qm_dqhashmask = hsize - 1; + xqm->qm_usr_dqhtable = udqhash; + xqm->qm_grp_dqhtable = gdqhash; + ASSERT(xqm->qm_usr_dqhtable != NULL); + ASSERT(xqm->qm_grp_dqhtable != NULL); + + for (i = 0; i < hsize; i++) { + xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i); + xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i); + } + + /* + * Freelist of all dquots of all file systems + */ + xfs_qm_freelist_init(&(xqm->qm_dqfreelist)); + + /* + * dquot zone. we register our own low-memory callback. + */ + if (!qm_dqzone) { + xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t), + "xfs_dquots"); + qm_dqzone = xqm->qm_dqzone; + } else + xqm->qm_dqzone = qm_dqzone; + + register_shrinker(&xfs_qm_shaker); + + /* + * The t_dqinfo portion of transactions. + */ + if (!qm_dqtrxzone) { + xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t), + "xfs_dqtrx"); + qm_dqtrxzone = xqm->qm_dqtrxzone; + } else + xqm->qm_dqtrxzone = qm_dqtrxzone; + + atomic_set(&xqm->qm_totaldquots, 0); + xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO; + xqm->qm_nrefs = 0; +#ifdef DEBUG + mutex_init(&qcheck_lock); +#endif + return xqm; +} + +/* + * Destroy the global quota manager when its reference count goes to zero. + */ +STATIC void +xfs_qm_destroy( + struct xfs_qm *xqm) +{ + int hsize, i; + + ASSERT(xqm != NULL); + ASSERT(xqm->qm_nrefs == 0); + unregister_shrinker(&xfs_qm_shaker); + hsize = xqm->qm_dqhashmask + 1; + for (i = 0; i < hsize; i++) { + xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); + xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i])); + } + kmem_free(xqm->qm_usr_dqhtable); + kmem_free(xqm->qm_grp_dqhtable); + xqm->qm_usr_dqhtable = NULL; + xqm->qm_grp_dqhtable = NULL; + xqm->qm_dqhashmask = 0; + xfs_qm_freelist_destroy(&(xqm->qm_dqfreelist)); +#ifdef DEBUG + mutex_destroy(&qcheck_lock); +#endif + kmem_free(xqm); +} + +/* + * Called at mount time to let XQM know that another file system is + * starting quotas. This isn't crucial information as the individual mount + * structures are pretty independent, but it helps the XQM keep a + * global view of what's going on. + */ +/* ARGSUSED */ +STATIC int +xfs_qm_hold_quotafs_ref( + struct xfs_mount *mp) +{ + /* + * Need to lock the xfs_Gqm structure for things like this. For example, + * the structure could disappear between the entry to this routine and + * a HOLD operation if not locked. + */ + XFS_QM_LOCK(xfs_Gqm); + + if (xfs_Gqm == NULL) + xfs_Gqm = xfs_Gqm_init(); + /* + * We can keep a list of all filesystems with quotas mounted for + * debugging and statistical purposes, but ... + * Just take a reference and get out. + */ + XFS_QM_HOLD(xfs_Gqm); + XFS_QM_UNLOCK(xfs_Gqm); + + return 0; +} + + +/* + * Release the reference that a filesystem took at mount time, + * so that we know when we need to destroy the entire quota manager. + */ +/* ARGSUSED */ +STATIC void +xfs_qm_rele_quotafs_ref( + struct xfs_mount *mp) +{ + xfs_dquot_t *dqp, *nextdqp; + + ASSERT(xfs_Gqm); + ASSERT(xfs_Gqm->qm_nrefs > 0); + + /* + * Go thru the freelist and destroy all inactive dquots. + */ + xfs_qm_freelist_lock(xfs_Gqm); + + for (dqp = xfs_Gqm->qm_dqfreelist.qh_next; + dqp != (xfs_dquot_t *)&(xfs_Gqm->qm_dqfreelist); ) { + xfs_dqlock(dqp); + nextdqp = dqp->dq_flnext; + if (dqp->dq_flags & XFS_DQ_INACTIVE) { + ASSERT(dqp->q_mount == NULL); + ASSERT(! XFS_DQ_IS_DIRTY(dqp)); + ASSERT(dqp->HL_PREVP == NULL); + ASSERT(dqp->MPL_PREVP == NULL); + XQM_FREELIST_REMOVE(dqp); + xfs_dqunlock(dqp); + xfs_qm_dqdestroy(dqp); + } else { + xfs_dqunlock(dqp); + } + dqp = nextdqp; + } + xfs_qm_freelist_unlock(xfs_Gqm); + + /* + * Destroy the entire XQM. If somebody mounts with quotaon, this'll + * be restarted. + */ + XFS_QM_LOCK(xfs_Gqm); + XFS_QM_RELE(xfs_Gqm); + if (xfs_Gqm->qm_nrefs == 0) { + xfs_qm_destroy(xfs_Gqm); + xfs_Gqm = NULL; + } + XFS_QM_UNLOCK(xfs_Gqm); +} + +/* + * Just destroy the quotainfo structure. + */ +void +xfs_qm_unmount_quotadestroy( + xfs_mount_t *mp) +{ + if (mp->m_quotainfo) + xfs_qm_destroy_quotainfo(mp); +} + + +/* + * This is called from xfs_mountfs to start quotas and initialize all + * necessary data structures like quotainfo. This is also responsible for + * running a quotacheck as necessary. We are guaranteed that the superblock + * is consistently read in at this point. + * + * If we fail here, the mount will continue with quota turned off. We don't + * need to inidicate success or failure at all. + */ +void +xfs_qm_mount_quotas( + xfs_mount_t *mp) +{ + int error = 0; + uint sbf; + + /* + * If quotas on realtime volumes is not supported, we disable + * quotas immediately. + */ + if (mp->m_sb.sb_rextents) { + cmn_err(CE_NOTE, + "Cannot turn on quotas for realtime filesystem %s", + mp->m_fsname); + mp->m_qflags = 0; + goto write_changes; + } + + ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + + /* + * Allocate the quotainfo structure inside the mount struct, and + * create quotainode(s), and change/rev superblock if necessary. + */ + error = xfs_qm_init_quotainfo(mp); + if (error) { + /* + * We must turn off quotas. + */ + ASSERT(mp->m_quotainfo == NULL); + mp->m_qflags = 0; + goto write_changes; + } + /* + * If any of the quotas are not consistent, do a quotacheck. + */ + if (XFS_QM_NEED_QUOTACHECK(mp)) { + error = xfs_qm_quotacheck(mp); + if (error) { + /* Quotacheck failed and disabled quotas. */ + return; + } + } + /* + * If one type of quotas is off, then it will lose its + * quotachecked status, since we won't be doing accounting for + * that type anymore. + */ + if (!XFS_IS_UQUOTA_ON(mp)) + mp->m_qflags &= ~XFS_UQUOTA_CHKD; + if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp))) + mp->m_qflags &= ~XFS_OQUOTA_CHKD; + + write_changes: + /* + * We actually don't have to acquire the m_sb_lock at all. + * This can only be called from mount, and that's single threaded. XXX + */ + spin_lock(&mp->m_sb_lock); + sbf = mp->m_sb.sb_qflags; + mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL; + spin_unlock(&mp->m_sb_lock); + + if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) { + if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) { + /* + * We could only have been turning quotas off. + * We aren't in very good shape actually because + * the incore structures are convinced that quotas are + * off, but the on disk superblock doesn't know that ! + */ + ASSERT(!(XFS_IS_QUOTA_RUNNING(mp))); + xfs_fs_cmn_err(CE_ALERT, mp, + "XFS mount_quotas: Superblock update failed!"); + } + } + + if (error) { + xfs_fs_cmn_err(CE_WARN, mp, + "Failed to initialize disk quotas."); + } + return; +} + +/* + * Called from the vfsops layer. + */ +int +xfs_qm_unmount_quotas( + xfs_mount_t *mp) +{ + xfs_inode_t *uqp, *gqp; + int error = 0; + + /* + * Release the dquots that root inode, et al might be holding, + * before we flush quotas and blow away the quotainfo structure. + */ + ASSERT(mp->m_rootip); + xfs_qm_dqdetach(mp->m_rootip); + if (mp->m_rbmip) + xfs_qm_dqdetach(mp->m_rbmip); + if (mp->m_rsumip) + xfs_qm_dqdetach(mp->m_rsumip); + + /* + * Flush out the quota inodes. + */ + uqp = gqp = NULL; + if (mp->m_quotainfo) { + if ((uqp = mp->m_quotainfo->qi_uquotaip) != NULL) { + xfs_ilock(uqp, XFS_ILOCK_EXCL); + xfs_iflock(uqp); + error = xfs_iflush(uqp, XFS_IFLUSH_SYNC); + xfs_iunlock(uqp, XFS_ILOCK_EXCL); + if (unlikely(error == EFSCORRUPTED)) { + XFS_ERROR_REPORT("xfs_qm_unmount_quotas(1)", + XFS_ERRLEVEL_LOW, mp); + goto out; + } + } + if ((gqp = mp->m_quotainfo->qi_gquotaip) != NULL) { + xfs_ilock(gqp, XFS_ILOCK_EXCL); + xfs_iflock(gqp); + error = xfs_iflush(gqp, XFS_IFLUSH_SYNC); + xfs_iunlock(gqp, XFS_ILOCK_EXCL); + if (unlikely(error == EFSCORRUPTED)) { + XFS_ERROR_REPORT("xfs_qm_unmount_quotas(2)", + XFS_ERRLEVEL_LOW, mp); + goto out; + } + } + } + if (uqp) { + IRELE(uqp); + mp->m_quotainfo->qi_uquotaip = NULL; + } + if (gqp) { + IRELE(gqp); + mp->m_quotainfo->qi_gquotaip = NULL; + } +out: + return XFS_ERROR(error); +} + +/* + * Flush all dquots of the given file system to disk. The dquots are + * _not_ purged from memory here, just their data written to disk. + */ +STATIC int +xfs_qm_dqflush_all( + xfs_mount_t *mp, + int flags) +{ + int recl; + xfs_dquot_t *dqp; + int niters; + int error; + + if (mp->m_quotainfo == NULL) + return 0; + niters = 0; +again: + xfs_qm_mplist_lock(mp); + FOREACH_DQUOT_IN_MP(dqp, mp) { + xfs_dqlock(dqp); + if (! XFS_DQ_IS_DIRTY(dqp)) { + xfs_dqunlock(dqp); + continue; + } + xfs_dqtrace_entry(dqp, "FLUSHALL: DQDIRTY"); + /* XXX a sentinel would be better */ + recl = XFS_QI_MPLRECLAIMS(mp); + if (!xfs_dqflock_nowait(dqp)) { + /* + * If we can't grab the flush lock then check + * to see if the dquot has been flushed delayed + * write. If so, grab its buffer and send it + * out immediately. We'll be able to acquire + * the flush lock when the I/O completes. + */ + xfs_qm_dqflock_pushbuf_wait(dqp); + } + /* + * Let go of the mplist lock. We don't want to hold it + * across a disk write. + */ + xfs_qm_mplist_unlock(mp); + error = xfs_qm_dqflush(dqp, flags); + xfs_dqunlock(dqp); + if (error) + return error; + + xfs_qm_mplist_lock(mp); + if (recl != XFS_QI_MPLRECLAIMS(mp)) { + xfs_qm_mplist_unlock(mp); + /* XXX restart limit */ + goto again; + } + } + + xfs_qm_mplist_unlock(mp); + /* return ! busy */ + return 0; +} +/* + * Release the group dquot pointers the user dquots may be + * carrying around as a hint. mplist is locked on entry and exit. + */ +STATIC void +xfs_qm_detach_gdquots( + xfs_mount_t *mp) +{ + xfs_dquot_t *dqp, *gdqp; + int nrecl; + + again: + ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp)); + dqp = XFS_QI_MPLNEXT(mp); + while (dqp) { + xfs_dqlock(dqp); + if ((gdqp = dqp->q_gdquot)) { + xfs_dqlock(gdqp); + dqp->q_gdquot = NULL; + } + xfs_dqunlock(dqp); + + if (gdqp) { + /* + * Can't hold the mplist lock across a dqput. + * XXXmust convert to marker based iterations here. + */ + nrecl = XFS_QI_MPLRECLAIMS(mp); + xfs_qm_mplist_unlock(mp); + xfs_qm_dqput(gdqp); + + xfs_qm_mplist_lock(mp); + if (nrecl != XFS_QI_MPLRECLAIMS(mp)) + goto again; + } + dqp = dqp->MPL_NEXT; + } +} + +/* + * Go through all the incore dquots of this file system and take them + * off the mplist and hashlist, if the dquot type matches the dqtype + * parameter. This is used when turning off quota accounting for + * users and/or groups, as well as when the filesystem is unmounting. + */ +STATIC int +xfs_qm_dqpurge_int( + xfs_mount_t *mp, + uint flags) /* QUOTAOFF/UMOUNTING/UQUOTA/PQUOTA/GQUOTA */ +{ + xfs_dquot_t *dqp; + uint dqtype; + int nrecl; + xfs_dquot_t *nextdqp; + int nmisses; + + if (mp->m_quotainfo == NULL) + return 0; + + dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0; + dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0; + dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0; + + xfs_qm_mplist_lock(mp); + + /* + * In the first pass through all incore dquots of this filesystem, + * we release the group dquot pointers the user dquots may be + * carrying around as a hint. We need to do this irrespective of + * what's being turned off. + */ + xfs_qm_detach_gdquots(mp); + + again: + nmisses = 0; + ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp)); + /* + * Try to get rid of all of the unwanted dquots. The idea is to + * get them off mplist and hashlist, but leave them on freelist. + */ + dqp = XFS_QI_MPLNEXT(mp); + while (dqp) { + /* + * It's OK to look at the type without taking dqlock here. + * We're holding the mplist lock here, and that's needed for + * a dqreclaim. + */ + if ((dqp->dq_flags & dqtype) == 0) { + dqp = dqp->MPL_NEXT; + continue; + } + + if (! xfs_qm_dqhashlock_nowait(dqp)) { + nrecl = XFS_QI_MPLRECLAIMS(mp); + xfs_qm_mplist_unlock(mp); + XFS_DQ_HASH_LOCK(dqp->q_hash); + xfs_qm_mplist_lock(mp); + + /* + * XXXTheoretically, we can get into a very long + * ping pong game here. + * No one can be adding dquots to the mplist at + * this point, but somebody might be taking things off. + */ + if (nrecl != XFS_QI_MPLRECLAIMS(mp)) { + XFS_DQ_HASH_UNLOCK(dqp->q_hash); + goto again; + } + } + + /* + * Take the dquot off the mplist and hashlist. It may remain on + * freelist in INACTIVE state. + */ + nextdqp = dqp->MPL_NEXT; + nmisses += xfs_qm_dqpurge(dqp); + dqp = nextdqp; + } + xfs_qm_mplist_unlock(mp); + return nmisses; +} + +int +xfs_qm_dqpurge_all( + xfs_mount_t *mp, + uint flags) +{ + int ndquots; + + /* + * Purge the dquot cache. + * None of the dquots should really be busy at this point. + */ + if (mp->m_quotainfo) { + while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) { + delay(ndquots * 10); + } + } + return 0; +} + +STATIC int +xfs_qm_dqattach_one( + xfs_inode_t *ip, + xfs_dqid_t id, + uint type, + uint doalloc, + uint dolock, + xfs_dquot_t *udqhint, /* hint */ + xfs_dquot_t **IO_idqpp) +{ + xfs_dquot_t *dqp; + int error; + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + error = 0; + /* + * See if we already have it in the inode itself. IO_idqpp is + * &i_udquot or &i_gdquot. This made the code look weird, but + * made the logic a lot simpler. + */ + if ((dqp = *IO_idqpp)) { + if (dolock) + xfs_dqlock(dqp); + xfs_dqtrace_entry(dqp, "DQATTACH: found in ip"); + goto done; + } + + /* + * udqhint is the i_udquot field in inode, and is non-NULL only + * when the type arg is group/project. Its purpose is to save a + * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside + * the user dquot. + */ + ASSERT(!udqhint || type == XFS_DQ_GROUP || type == XFS_DQ_PROJ); + if (udqhint && !dolock) + xfs_dqlock(udqhint); + + /* + * No need to take dqlock to look at the id. + * The ID can't change until it gets reclaimed, and it won't + * be reclaimed as long as we have a ref from inode and we hold + * the ilock. + */ + if (udqhint && + (dqp = udqhint->q_gdquot) && + (be32_to_cpu(dqp->q_core.d_id) == id)) { + ASSERT(XFS_DQ_IS_LOCKED(udqhint)); + xfs_dqlock(dqp); + XFS_DQHOLD(dqp); + ASSERT(*IO_idqpp == NULL); + *IO_idqpp = dqp; + if (!dolock) { + xfs_dqunlock(dqp); + xfs_dqunlock(udqhint); + } + goto done; + } + /* + * We can't hold a dquot lock when we call the dqget code. + * We'll deadlock in no time, because of (not conforming to) + * lock ordering - the inodelock comes before any dquot lock, + * and we may drop and reacquire the ilock in xfs_qm_dqget(). + */ + if (udqhint) + xfs_dqunlock(udqhint); + /* + * Find the dquot from somewhere. This bumps the + * reference count of dquot and returns it locked. + * This can return ENOENT if dquot didn't exist on + * disk and we didn't ask it to allocate; + * ESRCH if quotas got turned off suddenly. + */ + if ((error = xfs_qm_dqget(ip->i_mount, ip, id, type, + doalloc|XFS_QMOPT_DOWARN, &dqp))) { + if (udqhint && dolock) + xfs_dqlock(udqhint); + goto done; + } + + xfs_dqtrace_entry(dqp, "DQATTACH: found by dqget"); + /* + * dqget may have dropped and re-acquired the ilock, but it guarantees + * that the dquot returned is the one that should go in the inode. + */ + *IO_idqpp = dqp; + ASSERT(dqp); + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + if (! dolock) { + xfs_dqunlock(dqp); + goto done; + } + if (! udqhint) + goto done; + + ASSERT(udqhint); + ASSERT(dolock); + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + if (! xfs_qm_dqlock_nowait(udqhint)) { + xfs_dqunlock(dqp); + xfs_dqlock(udqhint); + xfs_dqlock(dqp); + } + done: +#ifdef QUOTADEBUG + if (udqhint) { + if (dolock) + ASSERT(XFS_DQ_IS_LOCKED(udqhint)); + } + if (! error) { + if (dolock) + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + } +#endif + return error; +} + + +/* + * Given a udquot and gdquot, attach a ptr to the group dquot in the + * udquot as a hint for future lookups. The idea sounds simple, but the + * execution isn't, because the udquot might have a group dquot attached + * already and getting rid of that gets us into lock ordering constraints. + * The process is complicated more by the fact that the dquots may or may not + * be locked on entry. + */ +STATIC void +xfs_qm_dqattach_grouphint( + xfs_dquot_t *udq, + xfs_dquot_t *gdq, + uint locked) +{ + xfs_dquot_t *tmp; + +#ifdef QUOTADEBUG + if (locked) { + ASSERT(XFS_DQ_IS_LOCKED(udq)); + ASSERT(XFS_DQ_IS_LOCKED(gdq)); + } +#endif + if (! locked) + xfs_dqlock(udq); + + if ((tmp = udq->q_gdquot)) { + if (tmp == gdq) { + if (! locked) + xfs_dqunlock(udq); + return; + } + + udq->q_gdquot = NULL; + /* + * We can't keep any dqlocks when calling dqrele, + * because the freelist lock comes before dqlocks. + */ + xfs_dqunlock(udq); + if (locked) + xfs_dqunlock(gdq); + /* + * we took a hard reference once upon a time in dqget, + * so give it back when the udquot no longer points at it + * dqput() does the unlocking of the dquot. + */ + xfs_qm_dqrele(tmp); + + xfs_dqlock(udq); + xfs_dqlock(gdq); + + } else { + ASSERT(XFS_DQ_IS_LOCKED(udq)); + if (! locked) { + xfs_dqlock(gdq); + } + } + + ASSERT(XFS_DQ_IS_LOCKED(udq)); + ASSERT(XFS_DQ_IS_LOCKED(gdq)); + /* + * Somebody could have attached a gdquot here, + * when we dropped the uqlock. If so, just do nothing. + */ + if (udq->q_gdquot == NULL) { + XFS_DQHOLD(gdq); + udq->q_gdquot = gdq; + } + if (! locked) { + xfs_dqunlock(gdq); + xfs_dqunlock(udq); + } +} + + +/* + * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON + * into account. + * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed. + * If XFS_QMOPT_DQLOCK, the dquot(s) will be returned locked. This option pretty + * much made this code a complete mess, but it has been pretty useful. + * If XFS_QMOPT_ILOCKED, then inode sent is already locked EXCL. + * Inode may get unlocked and relocked in here, and the caller must deal with + * the consequences. + */ +int +xfs_qm_dqattach( + xfs_inode_t *ip, + uint flags) +{ + xfs_mount_t *mp = ip->i_mount; + uint nquotas = 0; + int error = 0; + + if ((! XFS_IS_QUOTA_ON(mp)) || + (! XFS_NOT_DQATTACHED(mp, ip)) || + (ip->i_ino == mp->m_sb.sb_uquotino) || + (ip->i_ino == mp->m_sb.sb_gquotino)) + return 0; + + ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 || + xfs_isilocked(ip, XFS_ILOCK_EXCL)); + + if (! (flags & XFS_QMOPT_ILOCKED)) + xfs_ilock(ip, XFS_ILOCK_EXCL); + + if (XFS_IS_UQUOTA_ON(mp)) { + error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER, + flags & XFS_QMOPT_DQALLOC, + flags & XFS_QMOPT_DQLOCK, + NULL, &ip->i_udquot); + if (error) + goto done; + nquotas++; + } + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + if (XFS_IS_OQUOTA_ON(mp)) { + error = XFS_IS_GQUOTA_ON(mp) ? + xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, + flags & XFS_QMOPT_DQALLOC, + flags & XFS_QMOPT_DQLOCK, + ip->i_udquot, &ip->i_gdquot) : + xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ, + flags & XFS_QMOPT_DQALLOC, + flags & XFS_QMOPT_DQLOCK, + ip->i_udquot, &ip->i_gdquot); + /* + * Don't worry about the udquot that we may have + * attached above. It'll get detached, if not already. + */ + if (error) + goto done; + nquotas++; + } + + /* + * Attach this group quota to the user quota as a hint. + * This WON'T, in general, result in a thrash. + */ + if (nquotas == 2) { + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(ip->i_udquot); + ASSERT(ip->i_gdquot); + + /* + * We may or may not have the i_udquot locked at this point, + * but this check is OK since we don't depend on the i_gdquot to + * be accurate 100% all the time. It is just a hint, and this + * will succeed in general. + */ + if (ip->i_udquot->q_gdquot == ip->i_gdquot) + goto done; + /* + * Attach i_gdquot to the gdquot hint inside the i_udquot. + */ + xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot, + flags & XFS_QMOPT_DQLOCK); + } + + done: + +#ifdef QUOTADEBUG + if (! error) { + if (ip->i_udquot) { + if (flags & XFS_QMOPT_DQLOCK) + ASSERT(XFS_DQ_IS_LOCKED(ip->i_udquot)); + } + if (ip->i_gdquot) { + if (flags & XFS_QMOPT_DQLOCK) + ASSERT(XFS_DQ_IS_LOCKED(ip->i_gdquot)); + } + if (XFS_IS_UQUOTA_ON(mp)) + ASSERT(ip->i_udquot); + if (XFS_IS_OQUOTA_ON(mp)) + ASSERT(ip->i_gdquot); + } +#endif + + if (! (flags & XFS_QMOPT_ILOCKED)) + xfs_iunlock(ip, XFS_ILOCK_EXCL); + +#ifdef QUOTADEBUG + else + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); +#endif + return error; +} + +/* + * Release dquots (and their references) if any. + * The inode should be locked EXCL except when this's called by + * xfs_ireclaim. + */ +void +xfs_qm_dqdetach( + xfs_inode_t *ip) +{ + if (!(ip->i_udquot || ip->i_gdquot)) + return; + + ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino); + ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino); + if (ip->i_udquot) { + xfs_dqtrace_entry_ino(ip->i_udquot, "DQDETTACH", ip); + xfs_qm_dqrele(ip->i_udquot); + ip->i_udquot = NULL; + } + if (ip->i_gdquot) { + xfs_dqtrace_entry_ino(ip->i_gdquot, "DQDETTACH", ip); + xfs_qm_dqrele(ip->i_gdquot); + ip->i_gdquot = NULL; + } +} + +/* + * This is called by VFS_SYNC and flags arg determines the caller, + * and its motives, as done in xfs_sync. + * + * vfs_sync: SYNC_FSDATA|SYNC_ATTR|SYNC_BDFLUSH 0x31 + * syscall sync: SYNC_FSDATA|SYNC_ATTR|SYNC_DELWRI 0x25 + * umountroot : SYNC_WAIT | SYNC_CLOSE | SYNC_ATTR | SYNC_FSDATA + */ + +int +xfs_qm_sync( + xfs_mount_t *mp, + int flags) +{ + int recl, restarts; + xfs_dquot_t *dqp; + uint flush_flags; + boolean_t nowait; + int error; + + if (! XFS_IS_QUOTA_ON(mp)) + return 0; + + restarts = 0; + /* + * We won't block unless we are asked to. + */ + nowait = (boolean_t)(flags & SYNC_BDFLUSH || (flags & SYNC_WAIT) == 0); + + again: + xfs_qm_mplist_lock(mp); + /* + * dqpurge_all() also takes the mplist lock and iterate thru all dquots + * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared + * when we have the mplist lock, we know that dquots will be consistent + * as long as we have it locked. + */ + if (! XFS_IS_QUOTA_ON(mp)) { + xfs_qm_mplist_unlock(mp); + return 0; + } + FOREACH_DQUOT_IN_MP(dqp, mp) { + /* + * If this is vfs_sync calling, then skip the dquots that + * don't 'seem' to be dirty. ie. don't acquire dqlock. + * This is very similar to what xfs_sync does with inodes. + */ + if (flags & SYNC_BDFLUSH) { + if (! XFS_DQ_IS_DIRTY(dqp)) + continue; + } + + if (nowait) { + /* + * Try to acquire the dquot lock. We are NOT out of + * lock order, but we just don't want to wait for this + * lock, unless somebody wanted us to. + */ + if (! xfs_qm_dqlock_nowait(dqp)) + continue; + } else { + xfs_dqlock(dqp); + } + + /* + * Now, find out for sure if this dquot is dirty or not. + */ + if (! XFS_DQ_IS_DIRTY(dqp)) { + xfs_dqunlock(dqp); + continue; + } + + /* XXX a sentinel would be better */ + recl = XFS_QI_MPLRECLAIMS(mp); + if (!xfs_dqflock_nowait(dqp)) { + if (nowait) { + xfs_dqunlock(dqp); + continue; + } + /* + * If we can't grab the flush lock then if the caller + * really wanted us to give this our best shot, so + * see if we can give a push to the buffer before we wait + * on the flush lock. At this point, we know that + * even though the dquot is being flushed, + * it has (new) dirty data. + */ + xfs_qm_dqflock_pushbuf_wait(dqp); + } + /* + * Let go of the mplist lock. We don't want to hold it + * across a disk write + */ + flush_flags = (nowait) ? XFS_QMOPT_DELWRI : XFS_QMOPT_SYNC; + xfs_qm_mplist_unlock(mp); + xfs_dqtrace_entry(dqp, "XQM_SYNC: DQFLUSH"); + error = xfs_qm_dqflush(dqp, flush_flags); + xfs_dqunlock(dqp); + if (error && XFS_FORCED_SHUTDOWN(mp)) + return 0; /* Need to prevent umount failure */ + else if (error) + return error; + + xfs_qm_mplist_lock(mp); + if (recl != XFS_QI_MPLRECLAIMS(mp)) { + if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS) + break; + + xfs_qm_mplist_unlock(mp); + goto again; + } + } + + xfs_qm_mplist_unlock(mp); + return 0; +} + + +/* + * This initializes all the quota information that's kept in the + * mount structure + */ +STATIC int +xfs_qm_init_quotainfo( + xfs_mount_t *mp) +{ + xfs_quotainfo_t *qinf; + int error; + xfs_dquot_t *dqp; + + ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + + /* + * Tell XQM that we exist as soon as possible. + */ + if ((error = xfs_qm_hold_quotafs_ref(mp))) { + return error; + } + + qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); + + /* + * See if quotainodes are setup, and if not, allocate them, + * and change the superblock accordingly. + */ + if ((error = xfs_qm_init_quotainos(mp))) { + kmem_free(qinf); + mp->m_quotainfo = NULL; + return error; + } + + spin_lock_init(&qinf->qi_pinlock); + xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0); + qinf->qi_dqreclaims = 0; + + /* mutex used to serialize quotaoffs */ + mutex_init(&qinf->qi_quotaofflock); + + /* Precalc some constants */ + qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); + ASSERT(qinf->qi_dqchunklen); + qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen); + do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t)); + + mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD); + + /* + * We try to get the limits from the superuser's limits fields. + * This is quite hacky, but it is standard quota practice. + * We look at the USR dquot with id == 0 first, but if user quotas + * are not enabled we goto the GRP dquot with id == 0. + * We don't really care to keep separate default limits for user + * and group quotas, at least not at this point. + */ + error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0, + XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : + (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP : + XFS_DQ_PROJ), + XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN, + &dqp); + if (! error) { + xfs_disk_dquot_t *ddqp = &dqp->q_core; + + /* + * The warnings and timers set the grace period given to + * a user or group before he or she can not perform any + * more writing. If it is zero, a default is used. + */ + qinf->qi_btimelimit = ddqp->d_btimer ? + be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT; + qinf->qi_itimelimit = ddqp->d_itimer ? + be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT; + qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ? + be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT; + qinf->qi_bwarnlimit = ddqp->d_bwarns ? + be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT; + qinf->qi_iwarnlimit = ddqp->d_iwarns ? + be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT; + qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ? + be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT; + qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit); + qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit); + qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit); + qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit); + qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit); + qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit); + + /* + * We sent the XFS_QMOPT_DQSUSER flag to dqget because + * we don't want this dquot cached. We haven't done a + * quotacheck yet, and quotacheck doesn't like incore dquots. + */ + xfs_qm_dqdestroy(dqp); + } else { + qinf->qi_btimelimit = XFS_QM_BTIMELIMIT; + qinf->qi_itimelimit = XFS_QM_ITIMELIMIT; + qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT; + qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT; + qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT; + qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; + } + + return 0; +} + + +/* + * Gets called when unmounting a filesystem or when all quotas get + * turned off. + * This purges the quota inodes, destroys locks and frees itself. + */ +void +xfs_qm_destroy_quotainfo( + xfs_mount_t *mp) +{ + xfs_quotainfo_t *qi; + + qi = mp->m_quotainfo; + ASSERT(qi != NULL); + ASSERT(xfs_Gqm != NULL); + + /* + * Release the reference that XQM kept, so that we know + * when the XQM structure should be freed. We cannot assume + * that xfs_Gqm is non-null after this point. + */ + xfs_qm_rele_quotafs_ref(mp); + + spinlock_destroy(&qi->qi_pinlock); + xfs_qm_list_destroy(&qi->qi_dqlist); + + if (qi->qi_uquotaip) { + IRELE(qi->qi_uquotaip); + qi->qi_uquotaip = NULL; /* paranoia */ + } + if (qi->qi_gquotaip) { + IRELE(qi->qi_gquotaip); + qi->qi_gquotaip = NULL; + } + mutex_destroy(&qi->qi_quotaofflock); + kmem_free(qi); + mp->m_quotainfo = NULL; +} + + + +/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */ + +/* ARGSUSED */ +STATIC void +xfs_qm_list_init( + xfs_dqlist_t *list, + char *str, + int n) +{ + mutex_init(&list->qh_lock); + list->qh_next = NULL; + list->qh_version = 0; + list->qh_nelems = 0; +} + +STATIC void +xfs_qm_list_destroy( + xfs_dqlist_t *list) +{ + mutex_destroy(&(list->qh_lock)); +} + + +/* + * Stripped down version of dqattach. This doesn't attach, or even look at the + * dquots attached to the inode. The rationale is that there won't be any + * attached at the time this is called from quotacheck. + */ +STATIC int +xfs_qm_dqget_noattach( + xfs_inode_t *ip, + xfs_dquot_t **O_udqpp, + xfs_dquot_t **O_gdqpp) +{ + int error; + xfs_mount_t *mp; + xfs_dquot_t *udqp, *gdqp; + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + mp = ip->i_mount; + udqp = NULL; + gdqp = NULL; + + if (XFS_IS_UQUOTA_ON(mp)) { + ASSERT(ip->i_udquot == NULL); + /* + * We want the dquot allocated if it doesn't exist. + */ + if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_uid, XFS_DQ_USER, + XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, + &udqp))) { + /* + * Shouldn't be able to turn off quotas here. + */ + ASSERT(error != ESRCH); + ASSERT(error != ENOENT); + return error; + } + ASSERT(udqp); + } + + if (XFS_IS_OQUOTA_ON(mp)) { + ASSERT(ip->i_gdquot == NULL); + if (udqp) + xfs_dqunlock(udqp); + error = XFS_IS_GQUOTA_ON(mp) ? + xfs_qm_dqget(mp, ip, + ip->i_d.di_gid, XFS_DQ_GROUP, + XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN, + &gdqp) : + xfs_qm_dqget(mp, ip, + ip->i_d.di_projid, XFS_DQ_PROJ, + XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN, + &gdqp); + if (error) { + if (udqp) + xfs_qm_dqrele(udqp); + ASSERT(error != ESRCH); + ASSERT(error != ENOENT); + return error; + } + ASSERT(gdqp); + + /* Reacquire the locks in the right order */ + if (udqp) { + if (! xfs_qm_dqlock_nowait(udqp)) { + xfs_dqunlock(gdqp); + xfs_dqlock(udqp); + xfs_dqlock(gdqp); + } + } + } + + *O_udqpp = udqp; + *O_gdqpp = gdqp; + +#ifdef QUOTADEBUG + if (udqp) ASSERT(XFS_DQ_IS_LOCKED(udqp)); + if (gdqp) ASSERT(XFS_DQ_IS_LOCKED(gdqp)); +#endif + return 0; +} + +/* + * Create an inode and return with a reference already taken, but unlocked + * This is how we create quota inodes + */ +STATIC int +xfs_qm_qino_alloc( + xfs_mount_t *mp, + xfs_inode_t **ip, + __int64_t sbfields, + uint flags) +{ + xfs_trans_t *tp; + int error; + int committed; + + tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE); + if ((error = xfs_trans_reserve(tp, + XFS_QM_QINOCREATE_SPACE_RES(mp), + XFS_CREATE_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, + XFS_CREATE_LOG_COUNT))) { + xfs_trans_cancel(tp, 0); + return error; + } + + if ((error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, + &xfs_zerocr, 0, 1, ip, &committed))) { + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | + XFS_TRANS_ABORT); + return error; + } + + /* + * Keep an extra reference to this quota inode. This inode is + * locked exclusively and joined to the transaction already. + */ + ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL)); + IHOLD(*ip); + + /* + * Make the changes in the superblock, and log those too. + * sbfields arg may contain fields other than *QUOTINO; + * VERSIONNUM for example. + */ + spin_lock(&mp->m_sb_lock); + if (flags & XFS_QMOPT_SBVERSION) { +#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) + unsigned oldv = mp->m_sb.sb_versionnum; +#endif + ASSERT(!xfs_sb_version_hasquota(&mp->m_sb)); + ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | + XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) == + (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | + XFS_SB_GQUOTINO | XFS_SB_QFLAGS)); + + xfs_sb_version_addquota(&mp->m_sb); + mp->m_sb.sb_uquotino = NULLFSINO; + mp->m_sb.sb_gquotino = NULLFSINO; + + /* qflags will get updated _after_ quotacheck */ + mp->m_sb.sb_qflags = 0; +#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) + cmn_err(CE_NOTE, + "Old superblock version %x, converting to %x.", + oldv, mp->m_sb.sb_versionnum); +#endif + } + if (flags & XFS_QMOPT_UQUOTA) + mp->m_sb.sb_uquotino = (*ip)->i_ino; + else + mp->m_sb.sb_gquotino = (*ip)->i_ino; + spin_unlock(&mp->m_sb_lock); + xfs_mod_sb(tp, sbfields); + + if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { + xfs_fs_cmn_err(CE_ALERT, mp, "XFS qino_alloc failed!"); + return error; + } + return 0; +} + + +STATIC void +xfs_qm_reset_dqcounts( + xfs_mount_t *mp, + xfs_buf_t *bp, + xfs_dqid_t id, + uint type) +{ + xfs_disk_dquot_t *ddq; + int j; + + xfs_buftrace("RESET DQUOTS", bp); + /* + * Reset all counters and timers. They'll be + * started afresh by xfs_qm_quotacheck. + */ +#ifdef DEBUG + j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); + do_div(j, sizeof(xfs_dqblk_t)); + ASSERT(XFS_QM_DQPERBLK(mp) == j); +#endif + ddq = (xfs_disk_dquot_t *)XFS_BUF_PTR(bp); + for (j = 0; j < XFS_QM_DQPERBLK(mp); j++) { + /* + * Do a sanity check, and if needed, repair the dqblk. Don't + * output any warnings because it's perfectly possible to + * find uninitialised dquot blks. See comment in xfs_qm_dqcheck. + */ + (void) xfs_qm_dqcheck(ddq, id+j, type, XFS_QMOPT_DQREPAIR, + "xfs_quotacheck"); + ddq->d_bcount = 0; + ddq->d_icount = 0; + ddq->d_rtbcount = 0; + ddq->d_btimer = 0; + ddq->d_itimer = 0; + ddq->d_rtbtimer = 0; + ddq->d_bwarns = 0; + ddq->d_iwarns = 0; + ddq->d_rtbwarns = 0; + ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1); + } +} + +STATIC int +xfs_qm_dqiter_bufs( + xfs_mount_t *mp, + xfs_dqid_t firstid, + xfs_fsblock_t bno, + xfs_filblks_t blkcnt, + uint flags) +{ + xfs_buf_t *bp; + int error; + int notcommitted; + int incr; + int type; + + ASSERT(blkcnt > 0); + notcommitted = 0; + incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ? + XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt; + type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : + (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP); + error = 0; + + /* + * Blkcnt arg can be a very big number, and might even be + * larger than the log itself. So, we have to break it up into + * manageable-sized transactions. + * Note that we don't start a permanent transaction here; we might + * not be able to get a log reservation for the whole thing up front, + * and we don't really care to either, because we just discard + * everything if we were to crash in the middle of this loop. + */ + while (blkcnt--) { + error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, + XFS_FSB_TO_DADDR(mp, bno), + (int)XFS_QI_DQCHUNKLEN(mp), 0, &bp); + if (error) + break; + + xfs_qm_reset_dqcounts(mp, bp, firstid, type); + xfs_bdwrite(mp, bp); + /* + * goto the next block. + */ + bno++; + firstid += XFS_QM_DQPERBLK(mp); + } + return error; +} + +/* + * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a + * caller supplied function for every chunk of dquots that we find. + */ +STATIC int +xfs_qm_dqiterate( + xfs_mount_t *mp, + xfs_inode_t *qip, + uint flags) +{ + xfs_bmbt_irec_t *map; + int i, nmaps; /* number of map entries */ + int error; /* return value */ + xfs_fileoff_t lblkno; + xfs_filblks_t maxlblkcnt; + xfs_dqid_t firstid; + xfs_fsblock_t rablkno; + xfs_filblks_t rablkcnt; + + error = 0; + /* + * This looks racy, but we can't keep an inode lock across a + * trans_reserve. But, this gets called during quotacheck, and that + * happens only at mount time which is single threaded. + */ + if (qip->i_d.di_nblocks == 0) + return 0; + + map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP); + + lblkno = 0; + maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); + do { + nmaps = XFS_DQITER_MAP_SIZE; + /* + * We aren't changing the inode itself. Just changing + * some of its data. No new blocks are added here, and + * the inode is never added to the transaction. + */ + xfs_ilock(qip, XFS_ILOCK_SHARED); + error = xfs_bmapi(NULL, qip, lblkno, + maxlblkcnt - lblkno, + XFS_BMAPI_METADATA, + NULL, + 0, map, &nmaps, NULL, NULL); + xfs_iunlock(qip, XFS_ILOCK_SHARED); + if (error) + break; + + ASSERT(nmaps <= XFS_DQITER_MAP_SIZE); + for (i = 0; i < nmaps; i++) { + ASSERT(map[i].br_startblock != DELAYSTARTBLOCK); + ASSERT(map[i].br_blockcount); + + + lblkno += map[i].br_blockcount; + + if (map[i].br_startblock == HOLESTARTBLOCK) + continue; + + firstid = (xfs_dqid_t) map[i].br_startoff * + XFS_QM_DQPERBLK(mp); + /* + * Do a read-ahead on the next extent. + */ + if ((i+1 < nmaps) && + (map[i+1].br_startblock != HOLESTARTBLOCK)) { + rablkcnt = map[i+1].br_blockcount; + rablkno = map[i+1].br_startblock; + while (rablkcnt--) { + xfs_baread(mp->m_ddev_targp, + XFS_FSB_TO_DADDR(mp, rablkno), + (int)XFS_QI_DQCHUNKLEN(mp)); + rablkno++; + } + } + /* + * Iterate thru all the blks in the extent and + * reset the counters of all the dquots inside them. + */ + if ((error = xfs_qm_dqiter_bufs(mp, + firstid, + map[i].br_startblock, + map[i].br_blockcount, + flags))) { + break; + } + } + + if (error) + break; + } while (nmaps > 0); + + kmem_free(map); + + return error; +} + +/* + * Called by dqusage_adjust in doing a quotacheck. + * Given the inode, and a dquot (either USR or GRP, doesn't matter), + * this updates its incore copy as well as the buffer copy. This is + * so that once the quotacheck is done, we can just log all the buffers, + * as opposed to logging numerous updates to individual dquots. + */ +STATIC void +xfs_qm_quotacheck_dqadjust( + xfs_dquot_t *dqp, + xfs_qcnt_t nblks, + xfs_qcnt_t rtblks) +{ + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + xfs_dqtrace_entry(dqp, "QCHECK DQADJUST"); + /* + * Adjust the inode count and the block count to reflect this inode's + * resource usage. + */ + be64_add_cpu(&dqp->q_core.d_icount, 1); + dqp->q_res_icount++; + if (nblks) { + be64_add_cpu(&dqp->q_core.d_bcount, nblks); + dqp->q_res_bcount += nblks; + } + if (rtblks) { + be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks); + dqp->q_res_rtbcount += rtblks; + } + + /* + * Set default limits, adjust timers (since we changed usages) + */ + if (! XFS_IS_SUSER_DQUOT(dqp)) { + xfs_qm_adjust_dqlimits(dqp->q_mount, &dqp->q_core); + xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core); + } + + dqp->dq_flags |= XFS_DQ_DIRTY; +} + +STATIC int +xfs_qm_get_rtblks( + xfs_inode_t *ip, + xfs_qcnt_t *O_rtblks) +{ + xfs_filblks_t rtblks; /* total rt blks */ + xfs_extnum_t idx; /* extent record index */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_extnum_t nextents; /* number of extent entries */ + int error; + + ASSERT(XFS_IS_REALTIME_INODE(ip)); + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK))) + return error; + } + rtblks = 0; + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + for (idx = 0; idx < nextents; idx++) + rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx)); + *O_rtblks = (xfs_qcnt_t)rtblks; + return 0; +} + +/* + * callback routine supplied to bulkstat(). Given an inumber, find its + * dquots and update them to account for resources taken by that inode. + */ +/* ARGSUSED */ +STATIC int +xfs_qm_dqusage_adjust( + xfs_mount_t *mp, /* mount point for filesystem */ + xfs_ino_t ino, /* inode number to get data for */ + void __user *buffer, /* not used */ + int ubsize, /* not used */ + void *private_data, /* not used */ + xfs_daddr_t bno, /* starting block of inode cluster */ + int *ubused, /* not used */ + void *dip, /* on-disk inode pointer (not used) */ + int *res) /* result code value */ +{ + xfs_inode_t *ip; + xfs_dquot_t *udqp, *gdqp; + xfs_qcnt_t nblks, rtblks; + int error; + + ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + + /* + * rootino must have its resources accounted for, not so with the quota + * inodes. + */ + if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) { + *res = BULKSTAT_RV_NOTHING; + return XFS_ERROR(EINVAL); + } + + /* + * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget + * interface expects the inode to be exclusively locked because that's + * the case in all other instances. It's OK that we do this because + * quotacheck is done only at mount time. + */ + if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip, bno))) { + *res = BULKSTAT_RV_NOTHING; + return error; + } + + /* + * Obtain the locked dquots. In case of an error (eg. allocation + * fails for ENOSPC), we return the negative of the error number + * to bulkstat, so that it can get propagated to quotacheck() and + * making us disable quotas for the file system. + */ + if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) { + xfs_iput(ip, XFS_ILOCK_EXCL); + *res = BULKSTAT_RV_GIVEUP; + return error; + } + + rtblks = 0; + if (! XFS_IS_REALTIME_INODE(ip)) { + nblks = (xfs_qcnt_t)ip->i_d.di_nblocks; + } else { + /* + * Walk thru the extent list and count the realtime blocks. + */ + if ((error = xfs_qm_get_rtblks(ip, &rtblks))) { + xfs_iput(ip, XFS_ILOCK_EXCL); + if (udqp) + xfs_qm_dqput(udqp); + if (gdqp) + xfs_qm_dqput(gdqp); + *res = BULKSTAT_RV_GIVEUP; + return error; + } + nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks; + } + ASSERT(ip->i_delayed_blks == 0); + + /* + * We can't release the inode while holding its dquot locks. + * The inode can go into inactive and might try to acquire the dquotlocks. + * So, just unlock here and do a vn_rele at the end. + */ + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + /* + * Add the (disk blocks and inode) resources occupied by this + * inode to its dquots. We do this adjustment in the incore dquot, + * and also copy the changes to its buffer. + * We don't care about putting these changes in a transaction + * envelope because if we crash in the middle of a 'quotacheck' + * we have to start from the beginning anyway. + * Once we're done, we'll log all the dquot bufs. + * + * The *QUOTA_ON checks below may look pretty racy, but quotachecks + * and quotaoffs don't race. (Quotachecks happen at mount time only). + */ + if (XFS_IS_UQUOTA_ON(mp)) { + ASSERT(udqp); + xfs_qm_quotacheck_dqadjust(udqp, nblks, rtblks); + xfs_qm_dqput(udqp); + } + if (XFS_IS_OQUOTA_ON(mp)) { + ASSERT(gdqp); + xfs_qm_quotacheck_dqadjust(gdqp, nblks, rtblks); + xfs_qm_dqput(gdqp); + } + /* + * Now release the inode. This will send it to 'inactive', and + * possibly even free blocks. + */ + IRELE(ip); + + /* + * Goto next inode. + */ + *res = BULKSTAT_RV_DIDONE; + return 0; +} + +/* + * Walk thru all the filesystem inodes and construct a consistent view + * of the disk quota world. If the quotacheck fails, disable quotas. + */ +int +xfs_qm_quotacheck( + xfs_mount_t *mp) +{ + int done, count, error; + xfs_ino_t lastino; + size_t structsz; + xfs_inode_t *uip, *gip; + uint flags; + + count = INT_MAX; + structsz = 1; + lastino = 0; + flags = 0; + + ASSERT(XFS_QI_UQIP(mp) || XFS_QI_GQIP(mp)); + ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + + /* + * There should be no cached dquots. The (simplistic) quotacheck + * algorithm doesn't like that. + */ + ASSERT(XFS_QI_MPLNDQUOTS(mp) == 0); + + cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname); + + /* + * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset + * their counters to zero. We need a clean slate. + * We don't log our changes till later. + */ + if ((uip = XFS_QI_UQIP(mp))) { + if ((error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA))) + goto error_return; + flags |= XFS_UQUOTA_CHKD; + } + + if ((gip = XFS_QI_GQIP(mp))) { + if ((error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ? + XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA))) + goto error_return; + flags |= XFS_OQUOTA_CHKD; + } + + do { + /* + * Iterate thru all the inodes in the file system, + * adjusting the corresponding dquot counters in core. + */ + if ((error = xfs_bulkstat(mp, &lastino, &count, + xfs_qm_dqusage_adjust, NULL, + structsz, NULL, BULKSTAT_FG_IGET, &done))) + break; + + } while (! done); + + /* + * We've made all the changes that we need to make incore. + * Flush them down to disk buffers if everything was updated + * successfully. + */ + if (!error) + error = xfs_qm_dqflush_all(mp, XFS_QMOPT_DELWRI); + + /* + * We can get this error if we couldn't do a dquot allocation inside + * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the + * dirty dquots that might be cached, we just want to get rid of them + * and turn quotaoff. The dquots won't be attached to any of the inodes + * at this point (because we intentionally didn't in dqget_noattach). + */ + if (error) { + xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_QUOTAOFF); + goto error_return; + } + + /* + * We didn't log anything, because if we crashed, we'll have to + * start the quotacheck from scratch anyway. However, we must make + * sure that our dquot changes are secure before we put the + * quotacheck'd stamp on the superblock. So, here we do a synchronous + * flush. + */ + XFS_bflush(mp->m_ddev_targp); + + /* + * If one type of quotas is off, then it will lose its + * quotachecked status, since we won't be doing accounting for + * that type anymore. + */ + mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD); + mp->m_qflags |= flags; + + XQM_LIST_PRINT(&(XFS_QI_MPL_LIST(mp)), MPL_NEXT, "++++ Mp list +++"); + + error_return: + if (error) { + cmn_err(CE_WARN, "XFS quotacheck %s: Unsuccessful (Error %d): " + "Disabling quotas.", + mp->m_fsname, error); + /* + * We must turn off quotas. + */ + ASSERT(mp->m_quotainfo != NULL); + ASSERT(xfs_Gqm != NULL); + xfs_qm_destroy_quotainfo(mp); + if (xfs_mount_reset_sbqflags(mp)) { + cmn_err(CE_WARN, "XFS quotacheck %s: " + "Failed to reset quota flags.", mp->m_fsname); + } + } else { + cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname); + } + return (error); +} + +/* + * This is called after the superblock has been read in and we're ready to + * iget the quota inodes. + */ +STATIC int +xfs_qm_init_quotainos( + xfs_mount_t *mp) +{ + xfs_inode_t *uip, *gip; + int error; + __int64_t sbflags; + uint flags; + + ASSERT(mp->m_quotainfo); + uip = gip = NULL; + sbflags = 0; + flags = 0; + + /* + * Get the uquota and gquota inodes + */ + if (xfs_sb_version_hasquota(&mp->m_sb)) { + if (XFS_IS_UQUOTA_ON(mp) && + mp->m_sb.sb_uquotino != NULLFSINO) { + ASSERT(mp->m_sb.sb_uquotino > 0); + if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, + 0, 0, &uip, 0))) + return XFS_ERROR(error); + } + if (XFS_IS_OQUOTA_ON(mp) && + mp->m_sb.sb_gquotino != NULLFSINO) { + ASSERT(mp->m_sb.sb_gquotino > 0); + if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, + 0, 0, &gip, 0))) { + if (uip) + IRELE(uip); + return XFS_ERROR(error); + } + } + } else { + flags |= XFS_QMOPT_SBVERSION; + sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | + XFS_SB_GQUOTINO | XFS_SB_QFLAGS); + } + + /* + * Create the two inodes, if they don't exist already. The changes + * made above will get added to a transaction and logged in one of + * the qino_alloc calls below. If the device is readonly, + * temporarily switch to read-write to do this. + */ + if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) { + if ((error = xfs_qm_qino_alloc(mp, &uip, + sbflags | XFS_SB_UQUOTINO, + flags | XFS_QMOPT_UQUOTA))) + return XFS_ERROR(error); + + flags &= ~XFS_QMOPT_SBVERSION; + } + if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) { + flags |= (XFS_IS_GQUOTA_ON(mp) ? + XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA); + error = xfs_qm_qino_alloc(mp, &gip, + sbflags | XFS_SB_GQUOTINO, flags); + if (error) { + if (uip) + IRELE(uip); + + return XFS_ERROR(error); + } + } + + XFS_QI_UQIP(mp) = uip; + XFS_QI_GQIP(mp) = gip; + + return 0; +} + + +/* + * Traverse the freelist of dquots and attempt to reclaim a maximum of + * 'howmany' dquots. This operation races with dqlookup(), and attempts to + * favor the lookup function ... + * XXXsup merge this with qm_reclaim_one(). + */ +STATIC int +xfs_qm_shake_freelist( + int howmany) +{ + int nreclaimed; + xfs_dqhash_t *hash; + xfs_dquot_t *dqp, *nextdqp; + int restarts; + int nflushes; + + if (howmany <= 0) + return 0; + + nreclaimed = 0; + restarts = 0; + nflushes = 0; + +#ifdef QUOTADEBUG + cmn_err(CE_DEBUG, "Shake free 0x%x", howmany); +#endif + /* lock order is : hashchainlock, freelistlock, mplistlock */ + tryagain: + xfs_qm_freelist_lock(xfs_Gqm); + + for (dqp = xfs_Gqm->qm_dqfreelist.qh_next; + ((dqp != (xfs_dquot_t *) &xfs_Gqm->qm_dqfreelist) && + nreclaimed < howmany); ) { + xfs_dqlock(dqp); + + /* + * We are racing with dqlookup here. Naturally we don't + * want to reclaim a dquot that lookup wants. + */ + if (dqp->dq_flags & XFS_DQ_WANT) { + xfs_dqunlock(dqp); + xfs_qm_freelist_unlock(xfs_Gqm); + if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) + return nreclaimed; + XQM_STATS_INC(xqmstats.xs_qm_dqwants); + goto tryagain; + } + + /* + * If the dquot is inactive, we are assured that it is + * not on the mplist or the hashlist, and that makes our + * life easier. + */ + if (dqp->dq_flags & XFS_DQ_INACTIVE) { + ASSERT(dqp->q_mount == NULL); + ASSERT(! XFS_DQ_IS_DIRTY(dqp)); + ASSERT(dqp->HL_PREVP == NULL); + ASSERT(dqp->MPL_PREVP == NULL); + XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims); + nextdqp = dqp->dq_flnext; + goto off_freelist; + } + + ASSERT(dqp->MPL_PREVP); + /* + * Try to grab the flush lock. If this dquot is in the process of + * getting flushed to disk, we don't want to reclaim it. + */ + if (!xfs_dqflock_nowait(dqp)) { + xfs_dqunlock(dqp); + dqp = dqp->dq_flnext; + continue; + } + + /* + * We have the flush lock so we know that this is not in the + * process of being flushed. So, if this is dirty, flush it + * DELWRI so that we don't get a freelist infested with + * dirty dquots. + */ + if (XFS_DQ_IS_DIRTY(dqp)) { + int error; + xfs_dqtrace_entry(dqp, "DQSHAKE: DQDIRTY"); + /* + * We flush it delayed write, so don't bother + * releasing the mplock. + */ + error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); + if (error) { + xfs_fs_cmn_err(CE_WARN, dqp->q_mount, + "xfs_qm_dqflush_all: dquot %p flush failed", dqp); + } + xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ + dqp = dqp->dq_flnext; + continue; + } + /* + * We're trying to get the hashlock out of order. This races + * with dqlookup; so, we giveup and goto the next dquot if + * we couldn't get the hashlock. This way, we won't starve + * a dqlookup process that holds the hashlock that is + * waiting for the freelist lock. + */ + if (! xfs_qm_dqhashlock_nowait(dqp)) { + xfs_dqfunlock(dqp); + xfs_dqunlock(dqp); + dqp = dqp->dq_flnext; + continue; + } + /* + * This races with dquot allocation code as well as dqflush_all + * and reclaim code. So, if we failed to grab the mplist lock, + * giveup everything and start over. + */ + hash = dqp->q_hash; + ASSERT(hash); + if (! xfs_qm_mplist_nowait(dqp->q_mount)) { + /* XXX put a sentinel so that we can come back here */ + xfs_dqfunlock(dqp); + xfs_dqunlock(dqp); + XFS_DQ_HASH_UNLOCK(hash); + xfs_qm_freelist_unlock(xfs_Gqm); + if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) + return nreclaimed; + goto tryagain; + } + xfs_dqtrace_entry(dqp, "DQSHAKE: UNLINKING"); +#ifdef QUOTADEBUG + cmn_err(CE_DEBUG, "Shake 0x%p, ID 0x%x\n", + dqp, be32_to_cpu(dqp->q_core.d_id)); +#endif + ASSERT(dqp->q_nrefs == 0); + nextdqp = dqp->dq_flnext; + XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp); + XQM_HASHLIST_REMOVE(hash, dqp); + xfs_dqfunlock(dqp); + xfs_qm_mplist_unlock(dqp->q_mount); + XFS_DQ_HASH_UNLOCK(hash); + + off_freelist: + XQM_FREELIST_REMOVE(dqp); + xfs_dqunlock(dqp); + nreclaimed++; + XQM_STATS_INC(xqmstats.xs_qm_dqshake_reclaims); + xfs_qm_dqdestroy(dqp); + dqp = nextdqp; + } + xfs_qm_freelist_unlock(xfs_Gqm); + return nreclaimed; +} + + +/* + * The kmem_shake interface is invoked when memory is running low. + */ +/* ARGSUSED */ +STATIC int +xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask) +{ + int ndqused, nfree, n; + + if (!kmem_shake_allow(gfp_mask)) + return 0; + if (!xfs_Gqm) + return 0; + + nfree = xfs_Gqm->qm_dqfreelist.qh_nelems; /* free dquots */ + /* incore dquots in all f/s's */ + ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree; + + ASSERT(ndqused >= 0); + + if (nfree <= ndqused && nfree < ndquot) + return 0; + + ndqused *= xfs_Gqm->qm_dqfree_ratio; /* target # of free dquots */ + n = nfree - ndqused - ndquot; /* # over target */ + + return xfs_qm_shake_freelist(MAX(nfree, n)); +} + + +/* + * Just pop the least recently used dquot off the freelist and + * recycle it. The returned dquot is locked. + */ +STATIC xfs_dquot_t * +xfs_qm_dqreclaim_one(void) +{ + xfs_dquot_t *dqpout; + xfs_dquot_t *dqp; + int restarts; + int nflushes; + + restarts = 0; + dqpout = NULL; + nflushes = 0; + + /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */ + startagain: + xfs_qm_freelist_lock(xfs_Gqm); + + FOREACH_DQUOT_IN_FREELIST(dqp, &(xfs_Gqm->qm_dqfreelist)) { + xfs_dqlock(dqp); + + /* + * We are racing with dqlookup here. Naturally we don't + * want to reclaim a dquot that lookup wants. We release the + * freelist lock and start over, so that lookup will grab + * both the dquot and the freelistlock. + */ + if (dqp->dq_flags & XFS_DQ_WANT) { + ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE)); + xfs_dqtrace_entry(dqp, "DQRECLAIM: DQWANT"); + xfs_dqunlock(dqp); + xfs_qm_freelist_unlock(xfs_Gqm); + if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) + return NULL; + XQM_STATS_INC(xqmstats.xs_qm_dqwants); + goto startagain; + } + + /* + * If the dquot is inactive, we are assured that it is + * not on the mplist or the hashlist, and that makes our + * life easier. + */ + if (dqp->dq_flags & XFS_DQ_INACTIVE) { + ASSERT(dqp->q_mount == NULL); + ASSERT(! XFS_DQ_IS_DIRTY(dqp)); + ASSERT(dqp->HL_PREVP == NULL); + ASSERT(dqp->MPL_PREVP == NULL); + XQM_FREELIST_REMOVE(dqp); + xfs_dqunlock(dqp); + dqpout = dqp; + XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims); + break; + } + + ASSERT(dqp->q_hash); + ASSERT(dqp->MPL_PREVP); + + /* + * Try to grab the flush lock. If this dquot is in the process of + * getting flushed to disk, we don't want to reclaim it. + */ + if (!xfs_dqflock_nowait(dqp)) { + xfs_dqunlock(dqp); + continue; + } + + /* + * We have the flush lock so we know that this is not in the + * process of being flushed. So, if this is dirty, flush it + * DELWRI so that we don't get a freelist infested with + * dirty dquots. + */ + if (XFS_DQ_IS_DIRTY(dqp)) { + int error; + xfs_dqtrace_entry(dqp, "DQRECLAIM: DQDIRTY"); + /* + * We flush it delayed write, so don't bother + * releasing the freelist lock. + */ + error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); + if (error) { + xfs_fs_cmn_err(CE_WARN, dqp->q_mount, + "xfs_qm_dqreclaim: dquot %p flush failed", dqp); + } + xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ + continue; + } + + if (! xfs_qm_mplist_nowait(dqp->q_mount)) { + xfs_dqfunlock(dqp); + xfs_dqunlock(dqp); + continue; + } + + if (! xfs_qm_dqhashlock_nowait(dqp)) + goto mplistunlock; + + ASSERT(dqp->q_nrefs == 0); + xfs_dqtrace_entry(dqp, "DQRECLAIM: UNLINKING"); + XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp); + XQM_HASHLIST_REMOVE(dqp->q_hash, dqp); + XQM_FREELIST_REMOVE(dqp); + dqpout = dqp; + XFS_DQ_HASH_UNLOCK(dqp->q_hash); + mplistunlock: + xfs_qm_mplist_unlock(dqp->q_mount); + xfs_dqfunlock(dqp); + xfs_dqunlock(dqp); + if (dqpout) + break; + } + + xfs_qm_freelist_unlock(xfs_Gqm); + return dqpout; +} + + +/*------------------------------------------------------------------*/ + +/* + * Return a new incore dquot. Depending on the number of + * dquots in the system, we either allocate a new one on the kernel heap, + * or reclaim a free one. + * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed + * to reclaim an existing one from the freelist. + */ +boolean_t +xfs_qm_dqalloc_incore( + xfs_dquot_t **O_dqpp) +{ + xfs_dquot_t *dqp; + + /* + * Check against high water mark to see if we want to pop + * a nincompoop dquot off the freelist. + */ + if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) { + /* + * Try to recycle a dquot from the freelist. + */ + if ((dqp = xfs_qm_dqreclaim_one())) { + XQM_STATS_INC(xqmstats.xs_qm_dqreclaims); + /* + * Just zero the core here. The rest will get + * reinitialized by caller. XXX we shouldn't even + * do this zero ... + */ + memset(&dqp->q_core, 0, sizeof(dqp->q_core)); + *O_dqpp = dqp; + return B_FALSE; + } + XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses); + } + + /* + * Allocate a brand new dquot on the kernel heap and return it + * to the caller to initialize. + */ + ASSERT(xfs_Gqm->qm_dqzone != NULL); + *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP); + atomic_inc(&xfs_Gqm->qm_totaldquots); + + return B_TRUE; +} + + +/* + * Start a transaction and write the incore superblock changes to + * disk. flags parameter indicates which fields have changed. + */ +int +xfs_qm_write_sb_changes( + xfs_mount_t *mp, + __int64_t flags) +{ + xfs_trans_t *tp; + int error; + +#ifdef QUOTADEBUG + cmn_err(CE_NOTE, "Writing superblock quota changes :%s", mp->m_fsname); +#endif + tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); + if ((error = xfs_trans_reserve(tp, 0, + mp->m_sb.sb_sectsize + 128, 0, + 0, + XFS_DEFAULT_LOG_COUNT))) { + xfs_trans_cancel(tp, 0); + return error; + } + + xfs_mod_sb(tp, flags); + error = xfs_trans_commit(tp, 0); + + return error; +} + + +/* --------------- utility functions for vnodeops ---------------- */ + + +/* + * Given an inode, a uid and gid (from cred_t) make sure that we have + * allocated relevant dquot(s) on disk, and that we won't exceed inode + * quotas by creating this file. + * This also attaches dquot(s) to the given inode after locking it, + * and returns the dquots corresponding to the uid and/or gid. + * + * in : inode (unlocked) + * out : udquot, gdquot with references taken and unlocked + */ +int +xfs_qm_vop_dqalloc( + xfs_mount_t *mp, + xfs_inode_t *ip, + uid_t uid, + gid_t gid, + prid_t prid, + uint flags, + xfs_dquot_t **O_udqpp, + xfs_dquot_t **O_gdqpp) +{ + int error; + xfs_dquot_t *uq, *gq; + uint lockflags; + + if (!XFS_IS_QUOTA_ON(mp)) + return 0; + + lockflags = XFS_ILOCK_EXCL; + xfs_ilock(ip, lockflags); + + if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip)) + gid = ip->i_d.di_gid; + + /* + * Attach the dquot(s) to this inode, doing a dquot allocation + * if necessary. The dquot(s) will not be locked. + */ + if (XFS_NOT_DQATTACHED(mp, ip)) { + if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_DQALLOC | + XFS_QMOPT_ILOCKED))) { + xfs_iunlock(ip, lockflags); + return error; + } + } + + uq = gq = NULL; + if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) { + if (ip->i_d.di_uid != uid) { + /* + * What we need is the dquot that has this uid, and + * if we send the inode to dqget, the uid of the inode + * takes priority over what's sent in the uid argument. + * We must unlock inode here before calling dqget if + * we're not sending the inode, because otherwise + * we'll deadlock by doing trans_reserve while + * holding ilock. + */ + xfs_iunlock(ip, lockflags); + if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid, + XFS_DQ_USER, + XFS_QMOPT_DQALLOC | + XFS_QMOPT_DOWARN, + &uq))) { + ASSERT(error != ENOENT); + return error; + } + /* + * Get the ilock in the right order. + */ + xfs_dqunlock(uq); + lockflags = XFS_ILOCK_SHARED; + xfs_ilock(ip, lockflags); + } else { + /* + * Take an extra reference, because we'll return + * this to caller + */ + ASSERT(ip->i_udquot); + uq = ip->i_udquot; + xfs_dqlock(uq); + XFS_DQHOLD(uq); + xfs_dqunlock(uq); + } + } + if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) { + if (ip->i_d.di_gid != gid) { + xfs_iunlock(ip, lockflags); + if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid, + XFS_DQ_GROUP, + XFS_QMOPT_DQALLOC | + XFS_QMOPT_DOWARN, + &gq))) { + if (uq) + xfs_qm_dqrele(uq); + ASSERT(error != ENOENT); + return error; + } + xfs_dqunlock(gq); + lockflags = XFS_ILOCK_SHARED; + xfs_ilock(ip, lockflags); + } else { + ASSERT(ip->i_gdquot); + gq = ip->i_gdquot; + xfs_dqlock(gq); + XFS_DQHOLD(gq); + xfs_dqunlock(gq); + } + } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { + if (ip->i_d.di_projid != prid) { + xfs_iunlock(ip, lockflags); + if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, + XFS_DQ_PROJ, + XFS_QMOPT_DQALLOC | + XFS_QMOPT_DOWARN, + &gq))) { + if (uq) + xfs_qm_dqrele(uq); + ASSERT(error != ENOENT); + return (error); + } + xfs_dqunlock(gq); + lockflags = XFS_ILOCK_SHARED; + xfs_ilock(ip, lockflags); + } else { + ASSERT(ip->i_gdquot); + gq = ip->i_gdquot; + xfs_dqlock(gq); + XFS_DQHOLD(gq); + xfs_dqunlock(gq); + } + } + if (uq) + xfs_dqtrace_entry_ino(uq, "DQALLOC", ip); + + xfs_iunlock(ip, lockflags); + if (O_udqpp) + *O_udqpp = uq; + else if (uq) + xfs_qm_dqrele(uq); + if (O_gdqpp) + *O_gdqpp = gq; + else if (gq) + xfs_qm_dqrele(gq); + return 0; +} + +/* + * Actually transfer ownership, and do dquot modifications. + * These were already reserved. + */ +xfs_dquot_t * +xfs_qm_vop_chown( + xfs_trans_t *tp, + xfs_inode_t *ip, + xfs_dquot_t **IO_olddq, + xfs_dquot_t *newdq) +{ + xfs_dquot_t *prevdq; + uint bfield = XFS_IS_REALTIME_INODE(ip) ? + XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT; + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); + + /* old dquot */ + prevdq = *IO_olddq; + ASSERT(prevdq); + ASSERT(prevdq != newdq); + + xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks)); + xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1); + + /* the sparkling new dquot */ + xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks); + xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1); + + /* + * Take an extra reference, because the inode + * is going to keep this dquot pointer even + * after the trans_commit. + */ + xfs_dqlock(newdq); + XFS_DQHOLD(newdq); + xfs_dqunlock(newdq); + *IO_olddq = newdq; + + return prevdq; +} + +/* + * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID). + */ +int +xfs_qm_vop_chown_reserve( + xfs_trans_t *tp, + xfs_inode_t *ip, + xfs_dquot_t *udqp, + xfs_dquot_t *gdqp, + uint flags) +{ + int error; + xfs_mount_t *mp; + uint delblks, blkflags, prjflags = 0; + xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq; + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); + mp = ip->i_mount; + ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + + delblks = ip->i_delayed_blks; + delblksudq = delblksgdq = unresudq = unresgdq = NULL; + blkflags = XFS_IS_REALTIME_INODE(ip) ? + XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS; + + if (XFS_IS_UQUOTA_ON(mp) && udqp && + ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) { + delblksudq = udqp; + /* + * If there are delayed allocation blocks, then we have to + * unreserve those from the old dquot, and add them to the + * new dquot. + */ + if (delblks) { + ASSERT(ip->i_udquot); + unresudq = ip->i_udquot; + } + } + if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) { + if (XFS_IS_PQUOTA_ON(ip->i_mount) && + ip->i_d.di_projid != be32_to_cpu(gdqp->q_core.d_id)) + prjflags = XFS_QMOPT_ENOSPC; + + if (prjflags || + (XFS_IS_GQUOTA_ON(ip->i_mount) && + ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) { + delblksgdq = gdqp; + if (delblks) { + ASSERT(ip->i_gdquot); + unresgdq = ip->i_gdquot; + } + } + } + + if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, + delblksudq, delblksgdq, ip->i_d.di_nblocks, 1, + flags | blkflags | prjflags))) + return (error); + + /* + * Do the delayed blks reservations/unreservations now. Since, these + * are done without the help of a transaction, if a reservation fails + * its previous reservations won't be automatically undone by trans + * code. So, we have to do it manually here. + */ + if (delblks) { + /* + * Do the reservations first. Unreservation can't fail. + */ + ASSERT(delblksudq || delblksgdq); + ASSERT(unresudq || unresgdq); + if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, + delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0, + flags | blkflags | prjflags))) + return (error); + xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, + unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0, + blkflags); + } + + return (0); +} + +int +xfs_qm_vop_rename_dqattach( + xfs_inode_t **i_tab) +{ + xfs_inode_t *ip; + int i; + int error; + + ip = i_tab[0]; + + if (! XFS_IS_QUOTA_ON(ip->i_mount)) + return 0; + + if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) { + error = xfs_qm_dqattach(ip, 0); + if (error) + return error; + } + for (i = 1; (i < 4 && i_tab[i]); i++) { + /* + * Watch out for duplicate entries in the table. + */ + if ((ip = i_tab[i]) != i_tab[i-1]) { + if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) { + error = xfs_qm_dqattach(ip, 0); + if (error) + return error; + } + } + } + return 0; +} + +void +xfs_qm_vop_dqattach_and_dqmod_newinode( + xfs_trans_t *tp, + xfs_inode_t *ip, + xfs_dquot_t *udqp, + xfs_dquot_t *gdqp) +{ + if (!XFS_IS_QUOTA_ON(tp->t_mountp)) + return; + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp)); + + if (udqp) { + xfs_dqlock(udqp); + XFS_DQHOLD(udqp); + xfs_dqunlock(udqp); + ASSERT(ip->i_udquot == NULL); + ip->i_udquot = udqp; + ASSERT(XFS_IS_UQUOTA_ON(tp->t_mountp)); + ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); + xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); + } + if (gdqp) { + xfs_dqlock(gdqp); + XFS_DQHOLD(gdqp); + xfs_dqunlock(gdqp); + ASSERT(ip->i_gdquot == NULL); + ip->i_gdquot = gdqp; + ASSERT(XFS_IS_OQUOTA_ON(tp->t_mountp)); + ASSERT((XFS_IS_GQUOTA_ON(tp->t_mountp) ? + ip->i_d.di_gid : ip->i_d.di_projid) == + be32_to_cpu(gdqp->q_core.d_id)); + xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); + } +} + +/* ------------- list stuff -----------------*/ +STATIC void +xfs_qm_freelist_init(xfs_frlist_t *ql) +{ + ql->qh_next = ql->qh_prev = (xfs_dquot_t *) ql; + mutex_init(&ql->qh_lock); + ql->qh_version = 0; + ql->qh_nelems = 0; +} + +STATIC void +xfs_qm_freelist_destroy(xfs_frlist_t *ql) +{ + xfs_dquot_t *dqp, *nextdqp; + + mutex_lock(&ql->qh_lock); + for (dqp = ql->qh_next; + dqp != (xfs_dquot_t *)ql; ) { + xfs_dqlock(dqp); + nextdqp = dqp->dq_flnext; +#ifdef QUOTADEBUG + cmn_err(CE_DEBUG, "FREELIST destroy 0x%p", dqp); +#endif + XQM_FREELIST_REMOVE(dqp); + xfs_dqunlock(dqp); + xfs_qm_dqdestroy(dqp); + dqp = nextdqp; + } + mutex_unlock(&ql->qh_lock); + mutex_destroy(&ql->qh_lock); + + ASSERT(ql->qh_nelems == 0); +} + +STATIC void +xfs_qm_freelist_insert(xfs_frlist_t *ql, xfs_dquot_t *dq) +{ + dq->dq_flnext = ql->qh_next; + dq->dq_flprev = (xfs_dquot_t *)ql; + ql->qh_next = dq; + dq->dq_flnext->dq_flprev = dq; + xfs_Gqm->qm_dqfreelist.qh_nelems++; + xfs_Gqm->qm_dqfreelist.qh_version++; +} + +void +xfs_qm_freelist_unlink(xfs_dquot_t *dq) +{ + xfs_dquot_t *next = dq->dq_flnext; + xfs_dquot_t *prev = dq->dq_flprev; + + next->dq_flprev = prev; + prev->dq_flnext = next; + dq->dq_flnext = dq->dq_flprev = dq; + xfs_Gqm->qm_dqfreelist.qh_nelems--; + xfs_Gqm->qm_dqfreelist.qh_version++; +} + +void +xfs_qm_freelist_append(xfs_frlist_t *ql, xfs_dquot_t *dq) +{ + xfs_qm_freelist_insert((xfs_frlist_t *)ql->qh_prev, dq); +} + +STATIC int +xfs_qm_dqhashlock_nowait( + xfs_dquot_t *dqp) +{ + int locked; + + locked = mutex_trylock(&((dqp)->q_hash->qh_lock)); + return locked; +} + +int +xfs_qm_freelist_lock_nowait( + xfs_qm_t *xqm) +{ + int locked; + + locked = mutex_trylock(&(xqm->qm_dqfreelist.qh_lock)); + return locked; +} + +STATIC int +xfs_qm_mplist_nowait( + xfs_mount_t *mp) +{ + int locked; + + ASSERT(mp->m_quotainfo); + locked = mutex_trylock(&(XFS_QI_MPLLOCK(mp))); + return locked; +} diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h new file mode 100644 index 0000000..44f2534 --- /dev/null +++ b/fs/xfs/quota/xfs_qm.h @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_QM_H__ +#define __XFS_QM_H__ + +#include "xfs_dquot_item.h" +#include "xfs_dquot.h" +#include "xfs_quota_priv.h" +#include "xfs_qm_stats.h" + +struct xfs_qm; +struct xfs_inode; + +extern uint ndquot; +extern mutex_t xfs_Gqm_lock; +extern struct xfs_qm *xfs_Gqm; +extern kmem_zone_t *qm_dqzone; +extern kmem_zone_t *qm_dqtrxzone; + +/* + * Used in xfs_qm_sync called by xfs_sync to count the max times that it can + * iterate over the mountpt's dquot list in one call. + */ +#define XFS_QM_SYNC_MAX_RESTARTS 7 + +/* + * Ditto, for xfs_qm_dqreclaim_one. + */ +#define XFS_QM_RECLAIM_MAX_RESTARTS 4 + +/* + * Ideal ratio of free to in use dquots. Quota manager makes an attempt + * to keep this balance. + */ +#define XFS_QM_DQFREE_RATIO 2 + +/* + * Dquot hashtable constants/threshold values. + */ +#define XFS_QM_HASHSIZE_LOW (PAGE_SIZE / sizeof(xfs_dqhash_t)) +#define XFS_QM_HASHSIZE_HIGH ((PAGE_SIZE * 4) / sizeof(xfs_dqhash_t)) + +/* + * This defines the unit of allocation of dquots. + * Currently, it is just one file system block, and a 4K blk contains 30 + * (136 * 30 = 4080) dquots. It's probably not worth trying to make + * this more dynamic. + * XXXsup However, if this number is changed, we have to make sure that we don't + * implicitly assume that we do allocations in chunks of a single filesystem + * block in the dquot/xqm code. + */ +#define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 +/* + * When doing a quotacheck, we log dquot clusters of this many FSBs at most + * in a single transaction. We don't want to ask for too huge a log reservation. + */ +#define XFS_QM_MAX_DQCLUSTER_LOGSZ 3 + +typedef xfs_dqhash_t xfs_dqlist_t; +/* + * The freelist head. The first two fields match the first two in the + * xfs_dquot_t structure (in xfs_dqmarker_t) + */ +typedef struct xfs_frlist { + struct xfs_dquot *qh_next; + struct xfs_dquot *qh_prev; + mutex_t qh_lock; + uint qh_version; + uint qh_nelems; +} xfs_frlist_t; + +/* + * Quota Manager (global) structure. Lives only in core. + */ +typedef struct xfs_qm { + xfs_dqlist_t *qm_usr_dqhtable;/* udquot hash table */ + xfs_dqlist_t *qm_grp_dqhtable;/* gdquot hash table */ + uint qm_dqhashmask; /* # buckets in dq hashtab - 1 */ + xfs_frlist_t qm_dqfreelist; /* freelist of dquots */ + atomic_t qm_totaldquots; /* total incore dquots */ + uint qm_nrefs; /* file systems with quota on */ + int qm_dqfree_ratio;/* ratio of free to inuse dquots */ + kmem_zone_t *qm_dqzone; /* dquot mem-alloc zone */ + kmem_zone_t *qm_dqtrxzone; /* t_dqinfo of transactions */ +} xfs_qm_t; + +/* + * Various quota information for individual filesystems. + * The mount structure keeps a pointer to this. + */ +typedef struct xfs_quotainfo { + xfs_inode_t *qi_uquotaip; /* user quota inode */ + xfs_inode_t *qi_gquotaip; /* group quota inode */ + spinlock_t qi_pinlock; /* dquot pinning lock */ + xfs_dqlist_t qi_dqlist; /* all dquots in filesys */ + int qi_dqreclaims; /* a change here indicates + a removal in the dqlist */ + time_t qi_btimelimit; /* limit for blks timer */ + time_t qi_itimelimit; /* limit for inodes timer */ + time_t qi_rtbtimelimit;/* limit for rt blks timer */ + xfs_qwarncnt_t qi_bwarnlimit; /* limit for blks warnings */ + xfs_qwarncnt_t qi_iwarnlimit; /* limit for inodes warnings */ + xfs_qwarncnt_t qi_rtbwarnlimit;/* limit for rt blks warnings */ + mutex_t qi_quotaofflock;/* to serialize quotaoff */ + xfs_filblks_t qi_dqchunklen; /* # BBs in a chunk of dqs */ + uint qi_dqperchunk; /* # ondisk dqs in above chunk */ + xfs_qcnt_t qi_bhardlimit; /* default data blk hard limit */ + xfs_qcnt_t qi_bsoftlimit; /* default data blk soft limit */ + xfs_qcnt_t qi_ihardlimit; /* default inode count hard limit */ + xfs_qcnt_t qi_isoftlimit; /* default inode count soft limit */ + xfs_qcnt_t qi_rtbhardlimit;/* default realtime blk hard limit */ + xfs_qcnt_t qi_rtbsoftlimit;/* default realtime blk soft limit */ +} xfs_quotainfo_t; + + +extern xfs_dqtrxops_t xfs_trans_dquot_ops; + +extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long); +extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *, + xfs_dquot_t *, xfs_dquot_t *, long, long, uint); +extern void xfs_trans_dqjoin(xfs_trans_t *, xfs_dquot_t *); +extern void xfs_trans_log_dquot(xfs_trans_t *, xfs_dquot_t *); + +/* + * We keep the usr and grp dquots separately so that locking will be easier + * to do at commit time. All transactions that we know of at this point + * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value. + */ +#define XFS_QM_TRANS_MAXDQS 2 +typedef struct xfs_dquot_acct { + xfs_dqtrx_t dqa_usrdquots[XFS_QM_TRANS_MAXDQS]; + xfs_dqtrx_t dqa_grpdquots[XFS_QM_TRANS_MAXDQS]; +} xfs_dquot_acct_t; + +/* + * Users are allowed to have a usage exceeding their softlimit for + * a period this long. + */ +#define XFS_QM_BTIMELIMIT (7 * 24*60*60) /* 1 week */ +#define XFS_QM_RTBTIMELIMIT (7 * 24*60*60) /* 1 week */ +#define XFS_QM_ITIMELIMIT (7 * 24*60*60) /* 1 week */ + +#define XFS_QM_BWARNLIMIT 5 +#define XFS_QM_IWARNLIMIT 5 +#define XFS_QM_RTBWARNLIMIT 5 + +#define XFS_QM_LOCK(xqm) (mutex_lock(&xqm##_lock)) +#define XFS_QM_UNLOCK(xqm) (mutex_unlock(&xqm##_lock)) +#define XFS_QM_HOLD(xqm) ((xqm)->qm_nrefs++) +#define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--) + +extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); +extern void xfs_qm_mount_quotas(xfs_mount_t *); +extern int xfs_qm_quotacheck(xfs_mount_t *); +extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *); +extern int xfs_qm_unmount_quotas(xfs_mount_t *); +extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); +extern int xfs_qm_sync(xfs_mount_t *, int); + +/* dquot stuff */ +extern boolean_t xfs_qm_dqalloc_incore(xfs_dquot_t **); +extern int xfs_qm_dqattach(xfs_inode_t *, uint); +extern void xfs_qm_dqdetach(xfs_inode_t *); +extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint); +extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); + +/* vop stuff */ +extern int xfs_qm_vop_dqalloc(xfs_mount_t *, xfs_inode_t *, + uid_t, gid_t, prid_t, uint, + xfs_dquot_t **, xfs_dquot_t **); +extern void xfs_qm_vop_dqattach_and_dqmod_newinode( + xfs_trans_t *, xfs_inode_t *, + xfs_dquot_t *, xfs_dquot_t *); +extern int xfs_qm_vop_rename_dqattach(xfs_inode_t **); +extern xfs_dquot_t * xfs_qm_vop_chown(xfs_trans_t *, xfs_inode_t *, + xfs_dquot_t **, xfs_dquot_t *); +extern int xfs_qm_vop_chown_reserve(xfs_trans_t *, xfs_inode_t *, + xfs_dquot_t *, xfs_dquot_t *, uint); + +/* list stuff */ +extern void xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *); +extern void xfs_qm_freelist_unlink(xfs_dquot_t *); +extern int xfs_qm_freelist_lock_nowait(xfs_qm_t *); + +/* system call interface */ +extern int xfs_qm_quotactl(struct xfs_mount *, int, int, + xfs_caddr_t); + +#ifdef DEBUG +extern int xfs_qm_internalqcheck(xfs_mount_t *); +#else +#define xfs_qm_internalqcheck(mp) (0) +#endif + +#endif /* __XFS_QM_H__ */ diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c new file mode 100644 index 0000000..eea2e60 --- /dev/null +++ b/fs/xfs/quota/xfs_qm_bhv.c @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_clnt.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_alloc.h" +#include "xfs_dmapi.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dir2_sf.h" +#include "xfs_attr_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_ialloc.h" +#include "xfs_itable.h" +#include "xfs_btree.h" +#include "xfs_bmap.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_rw.h" +#include "xfs_acl.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_qm.h" + + +STATIC void +xfs_fill_statvfs_from_dquot( + bhv_statvfs_t *statp, + xfs_disk_dquot_t *dp) +{ + __uint64_t limit; + + limit = dp->d_blk_softlimit ? + be64_to_cpu(dp->d_blk_softlimit) : + be64_to_cpu(dp->d_blk_hardlimit); + if (limit && statp->f_blocks > limit) { + statp->f_blocks = limit; + statp->f_bfree = + (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ? + (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0; + } + + limit = dp->d_ino_softlimit ? + be64_to_cpu(dp->d_ino_softlimit) : + be64_to_cpu(dp->d_ino_hardlimit); + if (limit && statp->f_files > limit) { + statp->f_files = limit; + statp->f_ffree = + (statp->f_files > be64_to_cpu(dp->d_icount)) ? + (statp->f_ffree - be64_to_cpu(dp->d_icount)) : 0; + } +} + + +/* + * Directory tree accounting is implemented using project quotas, where + * the project identifier is inherited from parent directories. + * A statvfs (df, etc.) of a directory that is using project quota should + * return a statvfs of the project, not the entire filesystem. + * This makes such trees appear as if they are filesystems in themselves. + */ +STATIC void +xfs_qm_statvfs( + xfs_inode_t *ip, + bhv_statvfs_t *statp) +{ + xfs_mount_t *mp = ip->i_mount; + xfs_dquot_t *dqp; + + if (!(ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) || + !((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) == + (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD)) + return; + + if (!xfs_qm_dqget(mp, NULL, ip->i_d.di_projid, XFS_DQ_PROJ, 0, &dqp)) { + xfs_disk_dquot_t *dp = &dqp->q_core; + + xfs_fill_statvfs_from_dquot(statp, dp); + xfs_qm_dqput(dqp); + } +} + +STATIC int +xfs_qm_newmount( + xfs_mount_t *mp, + uint *needquotamount, + uint *quotaflags) +{ + uint quotaondisk; + uint uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0; + + *quotaflags = 0; + *needquotamount = B_FALSE; + + quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) && + (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT); + + if (quotaondisk) { + uquotaondisk = mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT; + pquotaondisk = mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT; + gquotaondisk = mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT; + } + + /* + * If the device itself is read-only, we can't allow + * the user to change the state of quota on the mount - + * this would generate a transaction on the ro device, + * which would lead to an I/O error and shutdown + */ + + if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) || + (!uquotaondisk && XFS_IS_UQUOTA_ON(mp)) || + (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) || + (!pquotaondisk && XFS_IS_PQUOTA_ON(mp)) || + (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) || + (!gquotaondisk && XFS_IS_OQUOTA_ON(mp))) && + xfs_dev_is_read_only(mp, "changing quota state")) { + cmn_err(CE_WARN, + "XFS: please mount with%s%s%s%s.", + (!quotaondisk ? "out quota" : ""), + (uquotaondisk ? " usrquota" : ""), + (pquotaondisk ? " prjquota" : ""), + (gquotaondisk ? " grpquota" : "")); + return XFS_ERROR(EPERM); + } + + if (XFS_IS_QUOTA_ON(mp) || quotaondisk) { + /* + * Call mount_quotas at this point only if we won't have to do + * a quotacheck. + */ + if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) { + /* + * If an error occured, qm_mount_quotas code + * has already disabled quotas. So, just finish + * mounting, and get on with the boring life + * without disk quotas. + */ + xfs_qm_mount_quotas(mp); + } else { + /* + * Clear the quota flags, but remember them. This + * is so that the quota code doesn't get invoked + * before we're ready. This can happen when an + * inode goes inactive and wants to free blocks, + * or via xfs_log_mount_finish. + */ + *needquotamount = B_TRUE; + *quotaflags = mp->m_qflags; + mp->m_qflags = 0; + } + } + + return 0; +} + +STATIC int +xfs_qm_endmount( + xfs_mount_t *mp, + uint needquotamount, + uint quotaflags) +{ + if (needquotamount) { + ASSERT(mp->m_qflags == 0); + mp->m_qflags = quotaflags; + xfs_qm_mount_quotas(mp); + } + +#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) + if (! (XFS_IS_QUOTA_ON(mp))) + xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on"); + else + xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on"); +#endif + +#ifdef QUOTADEBUG + if (XFS_IS_QUOTA_ON(mp) && xfs_qm_internalqcheck(mp)) + cmn_err(CE_WARN, "XFS: mount internalqcheck failed"); +#endif + + return 0; +} + +STATIC void +xfs_qm_dqrele_null( + xfs_dquot_t *dq) +{ + /* + * Called from XFS, where we always check first for a NULL dquot. + */ + if (!dq) + return; + xfs_qm_dqrele(dq); +} + + +struct xfs_qmops xfs_qmcore_xfs = { + .xfs_qminit = xfs_qm_newmount, + .xfs_qmdone = xfs_qm_unmount_quotadestroy, + .xfs_qmmount = xfs_qm_endmount, + .xfs_qmunmount = xfs_qm_unmount_quotas, + .xfs_dqrele = xfs_qm_dqrele_null, + .xfs_dqattach = xfs_qm_dqattach, + .xfs_dqdetach = xfs_qm_dqdetach, + .xfs_dqpurgeall = xfs_qm_dqpurge_all, + .xfs_dqvopalloc = xfs_qm_vop_dqalloc, + .xfs_dqvopcreate = xfs_qm_vop_dqattach_and_dqmod_newinode, + .xfs_dqvoprename = xfs_qm_vop_rename_dqattach, + .xfs_dqvopchown = xfs_qm_vop_chown, + .xfs_dqvopchownresv = xfs_qm_vop_chown_reserve, + .xfs_dqstatvfs = xfs_qm_statvfs, + .xfs_dqsync = xfs_qm_sync, + .xfs_quotactl = xfs_qm_quotactl, + .xfs_dqtrxops = &xfs_trans_dquot_ops, +}; +EXPORT_SYMBOL(xfs_qmcore_xfs); + +void __init +xfs_qm_init(void) +{ + printk(KERN_INFO "SGI XFS Quota Management subsystem\n"); + mutex_init(&xfs_Gqm_lock); + xfs_qm_init_procfs(); +} + +void __exit +xfs_qm_exit(void) +{ + xfs_qm_cleanup_procfs(); + if (qm_dqzone) + kmem_zone_destroy(qm_dqzone); + if (qm_dqtrxzone) + kmem_zone_destroy(qm_dqtrxzone); +} diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c new file mode 100644 index 0000000..709f5f5 --- /dev/null +++ b/fs/xfs/quota/xfs_qm_stats.c @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_alloc.h" +#include "xfs_dmapi.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dir2_sf.h" +#include "xfs_attr_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_ialloc.h" +#include "xfs_itable.h" +#include "xfs_bmap.h" +#include "xfs_btree.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_rw.h" +#include "xfs_acl.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_qm.h" + +struct xqmstats xqmstats; + +STATIC int +xfs_qm_read_xfsquota( + char *buffer, + char **start, + off_t offset, + int count, + int *eof, + void *data) +{ + int len; + + /* maximum; incore; ratio free to inuse; freelist */ + len = sprintf(buffer, "%d\t%d\t%d\t%u\n", + ndquot, + xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0, + xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0, + xfs_Gqm? xfs_Gqm->qm_dqfreelist.qh_nelems : 0); + + if (offset >= len) { + *start = buffer; + *eof = 1; + return 0; + } + *start = buffer + offset; + if ((len -= offset) > count) + return count; + *eof = 1; + + return len; +} + +STATIC int +xfs_qm_read_stats( + char *buffer, + char **start, + off_t offset, + int count, + int *eof, + void *data) +{ + int len; + + /* quota performance statistics */ + len = sprintf(buffer, "qm %u %u %u %u %u %u %u %u\n", + xqmstats.xs_qm_dqreclaims, + xqmstats.xs_qm_dqreclaim_misses, + xqmstats.xs_qm_dquot_dups, + xqmstats.xs_qm_dqcachemisses, + xqmstats.xs_qm_dqcachehits, + xqmstats.xs_qm_dqwants, + xqmstats.xs_qm_dqshake_reclaims, + xqmstats.xs_qm_dqinact_reclaims); + + if (offset >= len) { + *start = buffer; + *eof = 1; + return 0; + } + *start = buffer + offset; + if ((len -= offset) > count) + return count; + *eof = 1; + + return len; +} + +void +xfs_qm_init_procfs(void) +{ + create_proc_read_entry("fs/xfs/xqmstat", 0, NULL, xfs_qm_read_stats, NULL); + create_proc_read_entry("fs/xfs/xqm", 0, NULL, xfs_qm_read_xfsquota, NULL); +} + +void +xfs_qm_cleanup_procfs(void) +{ + remove_proc_entry("fs/xfs/xqm", NULL); + remove_proc_entry("fs/xfs/xqmstat", NULL); +} diff --git a/fs/xfs/quota/xfs_qm_stats.h b/fs/xfs/quota/xfs_qm_stats.h new file mode 100644 index 0000000..5b964fc --- /dev/null +++ b/fs/xfs/quota/xfs_qm_stats.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2002 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_QM_STATS_H__ +#define __XFS_QM_STATS_H__ + +#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF) + +/* + * XQM global statistics + */ +struct xqmstats { + __uint32_t xs_qm_dqreclaims; + __uint32_t xs_qm_dqreclaim_misses; + __uint32_t xs_qm_dquot_dups; + __uint32_t xs_qm_dqcachemisses; + __uint32_t xs_qm_dqcachehits; + __uint32_t xs_qm_dqwants; + __uint32_t xs_qm_dqshake_reclaims; + __uint32_t xs_qm_dqinact_reclaims; +}; + +extern struct xqmstats xqmstats; + +# define XQM_STATS_INC(count) ( (count)++ ) + +extern void xfs_qm_init_procfs(void); +extern void xfs_qm_cleanup_procfs(void); + +#else + +# define XQM_STATS_INC(count) do { } while (0) + +static inline void xfs_qm_init_procfs(void) { }; +static inline void xfs_qm_cleanup_procfs(void) { }; + +#endif + +#endif /* __XFS_QM_STATS_H__ */ diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c new file mode 100644 index 0000000..1a3b803 --- /dev/null +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -0,0 +1,1476 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <linux/capability.h> + +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_alloc.h" +#include "xfs_dmapi.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dir2_sf.h" +#include "xfs_attr_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_ialloc.h" +#include "xfs_itable.h" +#include "xfs_bmap.h" +#include "xfs_btree.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_rw.h" +#include "xfs_acl.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_utils.h" +#include "xfs_qm.h" + +#ifdef DEBUG +# define qdprintk(s, args...) cmn_err(CE_DEBUG, s, ## args) +#else +# define qdprintk(s, args...) do { } while (0) +#endif + +STATIC int xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint); +STATIC int xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint, + fs_disk_quota_t *); +STATIC int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *); +STATIC int xfs_qm_scall_setqlim(xfs_mount_t *, xfs_dqid_t, uint, + fs_disk_quota_t *); +STATIC int xfs_qm_scall_quotaon(xfs_mount_t *, uint); +STATIC int xfs_qm_scall_quotaoff(xfs_mount_t *, uint, boolean_t); +STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); +STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, + uint); +STATIC uint xfs_qm_import_flags(uint); +STATIC uint xfs_qm_export_flags(uint); +STATIC uint xfs_qm_import_qtype_flags(uint); +STATIC uint xfs_qm_export_qtype_flags(uint); +STATIC void xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *, + fs_disk_quota_t *); + + +/* + * The main distribution switch of all XFS quotactl system calls. + */ +int +xfs_qm_quotactl( + xfs_mount_t *mp, + int cmd, + int id, + xfs_caddr_t addr) +{ + int error; + + ASSERT(addr != NULL || cmd == Q_XQUOTASYNC); + + /* + * The following commands are valid even when quotaoff. + */ + switch (cmd) { + case Q_XQUOTARM: + /* + * Truncate quota files. quota must be off. + */ + if (XFS_IS_QUOTA_ON(mp)) + return XFS_ERROR(EINVAL); + if (mp->m_flags & XFS_MOUNT_RDONLY) + return XFS_ERROR(EROFS); + return (xfs_qm_scall_trunc_qfiles(mp, + xfs_qm_import_qtype_flags(*(uint *)addr))); + + case Q_XGETQSTAT: + /* + * Get quota status information. + */ + return (xfs_qm_scall_getqstat(mp, (fs_quota_stat_t *)addr)); + + case Q_XQUOTAON: + /* + * QUOTAON - enabling quota enforcement. + * Quota accounting must be turned on at mount time. + */ + if (mp->m_flags & XFS_MOUNT_RDONLY) + return XFS_ERROR(EROFS); + return (xfs_qm_scall_quotaon(mp, + xfs_qm_import_flags(*(uint *)addr))); + + case Q_XQUOTAOFF: + if (mp->m_flags & XFS_MOUNT_RDONLY) + return XFS_ERROR(EROFS); + break; + + case Q_XQUOTASYNC: + return (xfs_sync_inodes(mp, SYNC_DELWRI, NULL)); + + default: + break; + } + + if (! XFS_IS_QUOTA_ON(mp)) + return XFS_ERROR(ESRCH); + + switch (cmd) { + case Q_XQUOTAOFF: + if (mp->m_flags & XFS_MOUNT_RDONLY) + return XFS_ERROR(EROFS); + error = xfs_qm_scall_quotaoff(mp, + xfs_qm_import_flags(*(uint *)addr), + B_FALSE); + break; + + case Q_XGETQUOTA: + error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_USER, + (fs_disk_quota_t *)addr); + break; + case Q_XGETGQUOTA: + error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_GROUP, + (fs_disk_quota_t *)addr); + break; + case Q_XGETPQUOTA: + error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_PROJ, + (fs_disk_quota_t *)addr); + break; + + case Q_XSETQLIM: + if (mp->m_flags & XFS_MOUNT_RDONLY) + return XFS_ERROR(EROFS); + error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_USER, + (fs_disk_quota_t *)addr); + break; + case Q_XSETGQLIM: + if (mp->m_flags & XFS_MOUNT_RDONLY) + return XFS_ERROR(EROFS); + error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_GROUP, + (fs_disk_quota_t *)addr); + break; + case Q_XSETPQLIM: + if (mp->m_flags & XFS_MOUNT_RDONLY) + return XFS_ERROR(EROFS); + error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_PROJ, + (fs_disk_quota_t *)addr); + break; + + default: + error = XFS_ERROR(EINVAL); + break; + } + + return (error); +} + +/* + * Turn off quota accounting and/or enforcement for all udquots and/or + * gdquots. Called only at unmount time. + * + * This assumes that there are no dquots of this file system cached + * incore, and modifies the ondisk dquot directly. Therefore, for example, + * it is an error to call this twice, without purging the cache. + */ +STATIC int +xfs_qm_scall_quotaoff( + xfs_mount_t *mp, + uint flags, + boolean_t force) +{ + uint dqtype; + int error; + uint inactivate_flags; + xfs_qoff_logitem_t *qoffstart; + int nculprits; + + if (!force && !capable(CAP_SYS_ADMIN)) + return XFS_ERROR(EPERM); + /* + * No file system can have quotas enabled on disk but not in core. + * Note that quota utilities (like quotaoff) _expect_ + * errno == EEXIST here. + */ + if ((mp->m_qflags & flags) == 0) + return XFS_ERROR(EEXIST); + error = 0; + + flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); + + /* + * We don't want to deal with two quotaoffs messing up each other, + * so we're going to serialize it. quotaoff isn't exactly a performance + * critical thing. + * If quotaoff, then we must be dealing with the root filesystem. + */ + ASSERT(mp->m_quotainfo); + if (mp->m_quotainfo) + mutex_lock(&(XFS_QI_QOFFLOCK(mp))); + + ASSERT(mp->m_quotainfo); + + /* + * If we're just turning off quota enforcement, change mp and go. + */ + if ((flags & XFS_ALL_QUOTA_ACCT) == 0) { + mp->m_qflags &= ~(flags); + + spin_lock(&mp->m_sb_lock); + mp->m_sb.sb_qflags = mp->m_qflags; + spin_unlock(&mp->m_sb_lock); + mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); + + /* XXX what to do if error ? Revert back to old vals incore ? */ + error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS); + return (error); + } + + dqtype = 0; + inactivate_flags = 0; + /* + * If accounting is off, we must turn enforcement off, clear the + * quota 'CHKD' certificate to make it known that we have to + * do a quotacheck the next time this quota is turned on. + */ + if (flags & XFS_UQUOTA_ACCT) { + dqtype |= XFS_QMOPT_UQUOTA; + flags |= (XFS_UQUOTA_CHKD | XFS_UQUOTA_ENFD); + inactivate_flags |= XFS_UQUOTA_ACTIVE; + } + if (flags & XFS_GQUOTA_ACCT) { + dqtype |= XFS_QMOPT_GQUOTA; + flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD); + inactivate_flags |= XFS_GQUOTA_ACTIVE; + } else if (flags & XFS_PQUOTA_ACCT) { + dqtype |= XFS_QMOPT_PQUOTA; + flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD); + inactivate_flags |= XFS_PQUOTA_ACTIVE; + } + + /* + * Nothing to do? Don't complain. This happens when we're just + * turning off quota enforcement. + */ + if ((mp->m_qflags & flags) == 0) { + mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); + return (0); + } + + /* + * Write the LI_QUOTAOFF log record, and do SB changes atomically, + * and synchronously. If we fail to write, we should abort the + * operation as it cannot be recovered safely if we crash. + */ + error = xfs_qm_log_quotaoff(mp, &qoffstart, flags); + if (error) + goto out_error; + + /* + * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct + * to take care of the race between dqget and quotaoff. We don't take + * any special locks to reset these bits. All processes need to check + * these bits *after* taking inode lock(s) to see if the particular + * quota type is in the process of being turned off. If *ACTIVE, it is + * guaranteed that all dquot structures and all quotainode ptrs will all + * stay valid as long as that inode is kept locked. + * + * There is no turning back after this. + */ + mp->m_qflags &= ~inactivate_flags; + + /* + * Give back all the dquot reference(s) held by inodes. + * Here we go thru every single incore inode in this file system, and + * do a dqrele on the i_udquot/i_gdquot that it may have. + * Essentially, as long as somebody has an inode locked, this guarantees + * that quotas will not be turned off. This is handy because in a + * transaction once we lock the inode(s) and check for quotaon, we can + * depend on the quota inodes (and other things) being valid as long as + * we keep the lock(s). + */ + xfs_qm_dqrele_all_inodes(mp, flags); + + /* + * Next we make the changes in the quota flag in the mount struct. + * This isn't protected by a particular lock directly, because we + * don't want to take a mrlock everytime we depend on quotas being on. + */ + mp->m_qflags &= ~(flags); + + /* + * Go through all the dquots of this file system and purge them, + * according to what was turned off. We may not be able to get rid + * of all dquots, because dquots can have temporary references that + * are not attached to inodes. eg. xfs_setattr, xfs_create. + * So, if we couldn't purge all the dquots from the filesystem, + * we can't get rid of the incore data structures. + */ + while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype|XFS_QMOPT_QUOTAOFF))) + delay(10 * nculprits); + + /* + * Transactions that had started before ACTIVE state bit was cleared + * could have logged many dquots, so they'd have higher LSNs than + * the first QUOTAOFF log record does. If we happen to crash when + * the tail of the log has gone past the QUOTAOFF record, but + * before the last dquot modification, those dquots __will__ + * recover, and that's not good. + * + * So, we have QUOTAOFF start and end logitems; the start + * logitem won't get overwritten until the end logitem appears... + */ + error = xfs_qm_log_quotaoff_end(mp, qoffstart, flags); + if (error) { + /* We're screwed now. Shutdown is the only option. */ + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + goto out_error; + } + + /* + * If quotas is completely disabled, close shop. + */ + if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) || + ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) { + mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); + xfs_qm_destroy_quotainfo(mp); + return (0); + } + + /* + * Release our quotainode references, and vn_purge them, + * if we don't need them anymore. + */ + if ((dqtype & XFS_QMOPT_UQUOTA) && XFS_QI_UQIP(mp)) { + IRELE(XFS_QI_UQIP(mp)); + XFS_QI_UQIP(mp) = NULL; + } + if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && XFS_QI_GQIP(mp)) { + IRELE(XFS_QI_GQIP(mp)); + XFS_QI_GQIP(mp) = NULL; + } +out_error: + mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); + + return (error); +} + +STATIC int +xfs_qm_scall_trunc_qfiles( + xfs_mount_t *mp, + uint flags) +{ + int error = 0, error2 = 0; + xfs_inode_t *qip; + + if (!capable(CAP_SYS_ADMIN)) + return XFS_ERROR(EPERM); + if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { + qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags); + return XFS_ERROR(EINVAL); + } + + if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) { + error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip, 0); + if (!error) { + error = xfs_truncate_file(mp, qip); + IRELE(qip); + } + } + + if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) && + mp->m_sb.sb_gquotino != NULLFSINO) { + error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0); + if (!error2) { + error2 = xfs_truncate_file(mp, qip); + IRELE(qip); + } + } + + return error ? error : error2; +} + + +/* + * Switch on (a given) quota enforcement for a filesystem. This takes + * effect immediately. + * (Switching on quota accounting must be done at mount time.) + */ +STATIC int +xfs_qm_scall_quotaon( + xfs_mount_t *mp, + uint flags) +{ + int error; + uint qf; + uint accflags; + __int64_t sbflags; + + if (!capable(CAP_SYS_ADMIN)) + return XFS_ERROR(EPERM); + + flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); + /* + * Switching on quota accounting must be done at mount time. + */ + accflags = flags & XFS_ALL_QUOTA_ACCT; + flags &= ~(XFS_ALL_QUOTA_ACCT); + + sbflags = 0; + + if (flags == 0) { + qdprintk("quotaon: zero flags, m_qflags=%x\n", mp->m_qflags); + return XFS_ERROR(EINVAL); + } + + /* No fs can turn on quotas with a delayed effect */ + ASSERT((flags & XFS_ALL_QUOTA_ACCT) == 0); + + /* + * Can't enforce without accounting. We check the superblock + * qflags here instead of m_qflags because rootfs can have + * quota acct on ondisk without m_qflags' knowing. + */ + if (((flags & XFS_UQUOTA_ACCT) == 0 && + (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 && + (flags & XFS_UQUOTA_ENFD)) + || + ((flags & XFS_PQUOTA_ACCT) == 0 && + (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 && + (flags & XFS_GQUOTA_ACCT) == 0 && + (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && + (flags & XFS_OQUOTA_ENFD))) { + qdprintk("Can't enforce without acct, flags=%x sbflags=%x\n", + flags, mp->m_sb.sb_qflags); + return XFS_ERROR(EINVAL); + } + /* + * If everything's upto-date incore, then don't waste time. + */ + if ((mp->m_qflags & flags) == flags) + return XFS_ERROR(EEXIST); + + /* + * Change sb_qflags on disk but not incore mp->qflags + * if this is the root filesystem. + */ + spin_lock(&mp->m_sb_lock); + qf = mp->m_sb.sb_qflags; + mp->m_sb.sb_qflags = qf | flags; + spin_unlock(&mp->m_sb_lock); + + /* + * There's nothing to change if it's the same. + */ + if ((qf & flags) == flags && sbflags == 0) + return XFS_ERROR(EEXIST); + sbflags |= XFS_SB_QFLAGS; + + if ((error = xfs_qm_write_sb_changes(mp, sbflags))) + return (error); + /* + * If we aren't trying to switch on quota enforcement, we are done. + */ + if (((mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) != + (mp->m_qflags & XFS_UQUOTA_ACCT)) || + ((mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) != + (mp->m_qflags & XFS_PQUOTA_ACCT)) || + ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) != + (mp->m_qflags & XFS_GQUOTA_ACCT)) || + (flags & XFS_ALL_QUOTA_ENFD) == 0) + return (0); + + if (! XFS_IS_QUOTA_RUNNING(mp)) + return XFS_ERROR(ESRCH); + + /* + * Switch on quota enforcement in core. + */ + mutex_lock(&(XFS_QI_QOFFLOCK(mp))); + mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD); + mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); + + return (0); +} + + +/* + * Return quota status information, such as uquota-off, enforcements, etc. + */ +STATIC int +xfs_qm_scall_getqstat( + xfs_mount_t *mp, + fs_quota_stat_t *out) +{ + xfs_inode_t *uip, *gip; + boolean_t tempuqip, tempgqip; + + uip = gip = NULL; + tempuqip = tempgqip = B_FALSE; + memset(out, 0, sizeof(fs_quota_stat_t)); + + out->qs_version = FS_QSTAT_VERSION; + if (!xfs_sb_version_hasquota(&mp->m_sb)) { + out->qs_uquota.qfs_ino = NULLFSINO; + out->qs_gquota.qfs_ino = NULLFSINO; + return (0); + } + out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags & + (XFS_ALL_QUOTA_ACCT| + XFS_ALL_QUOTA_ENFD)); + out->qs_pad = 0; + out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino; + out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino; + + if (mp->m_quotainfo) { + uip = mp->m_quotainfo->qi_uquotaip; + gip = mp->m_quotainfo->qi_gquotaip; + } + if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { + if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, + 0, 0, &uip, 0) == 0) + tempuqip = B_TRUE; + } + if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) { + if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, + 0, 0, &gip, 0) == 0) + tempgqip = B_TRUE; + } + if (uip) { + out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks; + out->qs_uquota.qfs_nextents = uip->i_d.di_nextents; + if (tempuqip) + IRELE(uip); + } + if (gip) { + out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks; + out->qs_gquota.qfs_nextents = gip->i_d.di_nextents; + if (tempgqip) + IRELE(gip); + } + if (mp->m_quotainfo) { + out->qs_incoredqs = XFS_QI_MPLNDQUOTS(mp); + out->qs_btimelimit = XFS_QI_BTIMELIMIT(mp); + out->qs_itimelimit = XFS_QI_ITIMELIMIT(mp); + out->qs_rtbtimelimit = XFS_QI_RTBTIMELIMIT(mp); + out->qs_bwarnlimit = XFS_QI_BWARNLIMIT(mp); + out->qs_iwarnlimit = XFS_QI_IWARNLIMIT(mp); + } + return (0); +} + +/* + * Adjust quota limits, and start/stop timers accordingly. + */ +STATIC int +xfs_qm_scall_setqlim( + xfs_mount_t *mp, + xfs_dqid_t id, + uint type, + fs_disk_quota_t *newlim) +{ + xfs_disk_dquot_t *ddq; + xfs_dquot_t *dqp; + xfs_trans_t *tp; + int error; + xfs_qcnt_t hard, soft; + + if (!capable(CAP_SYS_ADMIN)) + return XFS_ERROR(EPERM); + + if ((newlim->d_fieldmask & + (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK|FS_DQ_WARNS_MASK)) == 0) + return (0); + + tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); + if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128, + 0, 0, XFS_DEFAULT_LOG_COUNT))) { + xfs_trans_cancel(tp, 0); + return (error); + } + + /* + * We don't want to race with a quotaoff so take the quotaoff lock. + * (We don't hold an inode lock, so there's nothing else to stop + * a quotaoff from happening). (XXXThis doesn't currently happen + * because we take the vfslock before calling xfs_qm_sysent). + */ + mutex_lock(&(XFS_QI_QOFFLOCK(mp))); + + /* + * Get the dquot (locked), and join it to the transaction. + * Allocate the dquot if this doesn't exist. + */ + if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) { + xfs_trans_cancel(tp, XFS_TRANS_ABORT); + mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); + ASSERT(error != ENOENT); + return (error); + } + xfs_dqtrace_entry(dqp, "Q_SETQLIM: AFT DQGET"); + xfs_trans_dqjoin(tp, dqp); + ddq = &dqp->q_core; + + /* + * Make sure that hardlimits are >= soft limits before changing. + */ + hard = (newlim->d_fieldmask & FS_DQ_BHARD) ? + (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_hardlimit) : + be64_to_cpu(ddq->d_blk_hardlimit); + soft = (newlim->d_fieldmask & FS_DQ_BSOFT) ? + (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_softlimit) : + be64_to_cpu(ddq->d_blk_softlimit); + if (hard == 0 || hard >= soft) { + ddq->d_blk_hardlimit = cpu_to_be64(hard); + ddq->d_blk_softlimit = cpu_to_be64(soft); + if (id == 0) { + mp->m_quotainfo->qi_bhardlimit = hard; + mp->m_quotainfo->qi_bsoftlimit = soft; + } + } else { + qdprintk("blkhard %Ld < blksoft %Ld\n", hard, soft); + } + hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ? + (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) : + be64_to_cpu(ddq->d_rtb_hardlimit); + soft = (newlim->d_fieldmask & FS_DQ_RTBSOFT) ? + (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_softlimit) : + be64_to_cpu(ddq->d_rtb_softlimit); + if (hard == 0 || hard >= soft) { + ddq->d_rtb_hardlimit = cpu_to_be64(hard); + ddq->d_rtb_softlimit = cpu_to_be64(soft); + if (id == 0) { + mp->m_quotainfo->qi_rtbhardlimit = hard; + mp->m_quotainfo->qi_rtbsoftlimit = soft; + } + } else { + qdprintk("rtbhard %Ld < rtbsoft %Ld\n", hard, soft); + } + + hard = (newlim->d_fieldmask & FS_DQ_IHARD) ? + (xfs_qcnt_t) newlim->d_ino_hardlimit : + be64_to_cpu(ddq->d_ino_hardlimit); + soft = (newlim->d_fieldmask & FS_DQ_ISOFT) ? + (xfs_qcnt_t) newlim->d_ino_softlimit : + be64_to_cpu(ddq->d_ino_softlimit); + if (hard == 0 || hard >= soft) { + ddq->d_ino_hardlimit = cpu_to_be64(hard); + ddq->d_ino_softlimit = cpu_to_be64(soft); + if (id == 0) { + mp->m_quotainfo->qi_ihardlimit = hard; + mp->m_quotainfo->qi_isoftlimit = soft; + } + } else { + qdprintk("ihard %Ld < isoft %Ld\n", hard, soft); + } + + /* + * Update warnings counter(s) if requested + */ + if (newlim->d_fieldmask & FS_DQ_BWARNS) + ddq->d_bwarns = cpu_to_be16(newlim->d_bwarns); + if (newlim->d_fieldmask & FS_DQ_IWARNS) + ddq->d_iwarns = cpu_to_be16(newlim->d_iwarns); + if (newlim->d_fieldmask & FS_DQ_RTBWARNS) + ddq->d_rtbwarns = cpu_to_be16(newlim->d_rtbwarns); + + if (id == 0) { + /* + * Timelimits for the super user set the relative time + * the other users can be over quota for this file system. + * If it is zero a default is used. Ditto for the default + * soft and hard limit values (already done, above), and + * for warnings. + */ + if (newlim->d_fieldmask & FS_DQ_BTIMER) { + mp->m_quotainfo->qi_btimelimit = newlim->d_btimer; + ddq->d_btimer = cpu_to_be32(newlim->d_btimer); + } + if (newlim->d_fieldmask & FS_DQ_ITIMER) { + mp->m_quotainfo->qi_itimelimit = newlim->d_itimer; + ddq->d_itimer = cpu_to_be32(newlim->d_itimer); + } + if (newlim->d_fieldmask & FS_DQ_RTBTIMER) { + mp->m_quotainfo->qi_rtbtimelimit = newlim->d_rtbtimer; + ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer); + } + if (newlim->d_fieldmask & FS_DQ_BWARNS) + mp->m_quotainfo->qi_bwarnlimit = newlim->d_bwarns; + if (newlim->d_fieldmask & FS_DQ_IWARNS) + mp->m_quotainfo->qi_iwarnlimit = newlim->d_iwarns; + if (newlim->d_fieldmask & FS_DQ_RTBWARNS) + mp->m_quotainfo->qi_rtbwarnlimit = newlim->d_rtbwarns; + } else { + /* + * If the user is now over quota, start the timelimit. + * The user will not be 'warned'. + * Note that we keep the timers ticking, whether enforcement + * is on or off. We don't really want to bother with iterating + * over all ondisk dquots and turning the timers on/off. + */ + xfs_qm_adjust_dqtimers(mp, ddq); + } + dqp->dq_flags |= XFS_DQ_DIRTY; + xfs_trans_log_dquot(tp, dqp); + + xfs_dqtrace_entry(dqp, "Q_SETQLIM: COMMIT"); + error = xfs_trans_commit(tp, 0); + xfs_qm_dqprint(dqp); + xfs_qm_dqrele(dqp); + mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); + + return error; +} + +STATIC int +xfs_qm_scall_getquota( + xfs_mount_t *mp, + xfs_dqid_t id, + uint type, + fs_disk_quota_t *out) +{ + xfs_dquot_t *dqp; + int error; + + /* + * Try to get the dquot. We don't want it allocated on disk, so + * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't + * exist, we'll get ENOENT back. + */ + if ((error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp))) { + return (error); + } + + xfs_dqtrace_entry(dqp, "Q_GETQUOTA SUCCESS"); + /* + * If everything's NULL, this dquot doesn't quite exist as far as + * our utility programs are concerned. + */ + if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) { + xfs_qm_dqput(dqp); + return XFS_ERROR(ENOENT); + } + /* xfs_qm_dqprint(dqp); */ + /* + * Convert the disk dquot to the exportable format + */ + xfs_qm_export_dquot(mp, &dqp->q_core, out); + xfs_qm_dqput(dqp); + return (error ? XFS_ERROR(EFAULT) : 0); +} + + +STATIC int +xfs_qm_log_quotaoff_end( + xfs_mount_t *mp, + xfs_qoff_logitem_t *startqoff, + uint flags) +{ + xfs_trans_t *tp; + int error; + xfs_qoff_logitem_t *qoffi; + + tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END); + + if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_qoff_logitem_t) * 2, + 0, 0, XFS_DEFAULT_LOG_COUNT))) { + xfs_trans_cancel(tp, 0); + return (error); + } + + qoffi = xfs_trans_get_qoff_item(tp, startqoff, + flags & XFS_ALL_QUOTA_ACCT); + xfs_trans_log_quotaoff_item(tp, qoffi); + + /* + * We have to make sure that the transaction is secure on disk before we + * return and actually stop quota accounting. So, make it synchronous. + * We don't care about quotoff's performance. + */ + xfs_trans_set_sync(tp); + error = xfs_trans_commit(tp, 0); + return (error); +} + + +STATIC int +xfs_qm_log_quotaoff( + xfs_mount_t *mp, + xfs_qoff_logitem_t **qoffstartp, + uint flags) +{ + xfs_trans_t *tp; + int error; + xfs_qoff_logitem_t *qoffi=NULL; + uint oldsbqflag=0; + + tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF); + if ((error = xfs_trans_reserve(tp, 0, + sizeof(xfs_qoff_logitem_t) * 2 + + mp->m_sb.sb_sectsize + 128, + 0, + 0, + XFS_DEFAULT_LOG_COUNT))) { + goto error0; + } + + qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT); + xfs_trans_log_quotaoff_item(tp, qoffi); + + spin_lock(&mp->m_sb_lock); + oldsbqflag = mp->m_sb.sb_qflags; + mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL; + spin_unlock(&mp->m_sb_lock); + + xfs_mod_sb(tp, XFS_SB_QFLAGS); + + /* + * We have to make sure that the transaction is secure on disk before we + * return and actually stop quota accounting. So, make it synchronous. + * We don't care about quotoff's performance. + */ + xfs_trans_set_sync(tp); + error = xfs_trans_commit(tp, 0); + +error0: + if (error) { + xfs_trans_cancel(tp, 0); + /* + * No one else is modifying sb_qflags, so this is OK. + * We still hold the quotaofflock. + */ + spin_lock(&mp->m_sb_lock); + mp->m_sb.sb_qflags = oldsbqflag; + spin_unlock(&mp->m_sb_lock); + } + *qoffstartp = qoffi; + return (error); +} + + +/* + * Translate an internal style on-disk-dquot to the exportable format. + * The main differences are that the counters/limits are all in Basic + * Blocks (BBs) instead of the internal FSBs, and all on-disk data has + * to be converted to the native endianness. + */ +STATIC void +xfs_qm_export_dquot( + xfs_mount_t *mp, + xfs_disk_dquot_t *src, + struct fs_disk_quota *dst) +{ + memset(dst, 0, sizeof(*dst)); + dst->d_version = FS_DQUOT_VERSION; /* different from src->d_version */ + dst->d_flags = xfs_qm_export_qtype_flags(src->d_flags); + dst->d_id = be32_to_cpu(src->d_id); + dst->d_blk_hardlimit = + XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_hardlimit)); + dst->d_blk_softlimit = + XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_softlimit)); + dst->d_ino_hardlimit = be64_to_cpu(src->d_ino_hardlimit); + dst->d_ino_softlimit = be64_to_cpu(src->d_ino_softlimit); + dst->d_bcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_bcount)); + dst->d_icount = be64_to_cpu(src->d_icount); + dst->d_btimer = be32_to_cpu(src->d_btimer); + dst->d_itimer = be32_to_cpu(src->d_itimer); + dst->d_iwarns = be16_to_cpu(src->d_iwarns); + dst->d_bwarns = be16_to_cpu(src->d_bwarns); + dst->d_rtb_hardlimit = + XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_hardlimit)); + dst->d_rtb_softlimit = + XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_softlimit)); + dst->d_rtbcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtbcount)); + dst->d_rtbtimer = be32_to_cpu(src->d_rtbtimer); + dst->d_rtbwarns = be16_to_cpu(src->d_rtbwarns); + + /* + * Internally, we don't reset all the timers when quota enforcement + * gets turned off. No need to confuse the user level code, + * so return zeroes in that case. + */ + if ((!XFS_IS_UQUOTA_ENFORCED(mp) && src->d_flags == XFS_DQ_USER) || + (!XFS_IS_OQUOTA_ENFORCED(mp) && + (src->d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) { + dst->d_btimer = 0; + dst->d_itimer = 0; + dst->d_rtbtimer = 0; + } + +#ifdef DEBUG + if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == XFS_USER_QUOTA) || + (XFS_IS_OQUOTA_ENFORCED(mp) && + (dst->d_flags & (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)))) && + dst->d_id != 0) { + if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) && + (dst->d_blk_softlimit > 0)) { + ASSERT(dst->d_btimer != 0); + } + if (((int) dst->d_icount >= (int) dst->d_ino_softlimit) && + (dst->d_ino_softlimit > 0)) { + ASSERT(dst->d_itimer != 0); + } + } +#endif +} + +STATIC uint +xfs_qm_import_qtype_flags( + uint uflags) +{ + uint oflags = 0; + + /* + * Can't be more than one, or none. + */ + if (((uflags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) == + (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) || + ((uflags & (XFS_GROUP_QUOTA | XFS_PROJ_QUOTA)) == + (XFS_GROUP_QUOTA | XFS_PROJ_QUOTA)) || + ((uflags & (XFS_USER_QUOTA | XFS_PROJ_QUOTA)) == + (XFS_USER_QUOTA | XFS_PROJ_QUOTA)) || + ((uflags & (XFS_GROUP_QUOTA|XFS_USER_QUOTA|XFS_PROJ_QUOTA)) == 0)) + return (0); + + oflags |= (uflags & XFS_USER_QUOTA) ? XFS_DQ_USER : 0; + oflags |= (uflags & XFS_PROJ_QUOTA) ? XFS_DQ_PROJ : 0; + oflags |= (uflags & XFS_GROUP_QUOTA) ? XFS_DQ_GROUP: 0; + return oflags; +} + +STATIC uint +xfs_qm_export_qtype_flags( + uint flags) +{ + /* + * Can't be more than one, or none. + */ + ASSERT((flags & (XFS_PROJ_QUOTA | XFS_USER_QUOTA)) != + (XFS_PROJ_QUOTA | XFS_USER_QUOTA)); + ASSERT((flags & (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)) != + (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)); + ASSERT((flags & (XFS_USER_QUOTA | XFS_GROUP_QUOTA)) != + (XFS_USER_QUOTA | XFS_GROUP_QUOTA)); + ASSERT((flags & (XFS_PROJ_QUOTA|XFS_USER_QUOTA|XFS_GROUP_QUOTA)) != 0); + + return (flags & XFS_DQ_USER) ? + XFS_USER_QUOTA : (flags & XFS_DQ_PROJ) ? + XFS_PROJ_QUOTA : XFS_GROUP_QUOTA; +} + +STATIC uint +xfs_qm_import_flags( + uint uflags) +{ + uint flags = 0; + + if (uflags & XFS_QUOTA_UDQ_ACCT) + flags |= XFS_UQUOTA_ACCT; + if (uflags & XFS_QUOTA_PDQ_ACCT) + flags |= XFS_PQUOTA_ACCT; + if (uflags & XFS_QUOTA_GDQ_ACCT) + flags |= XFS_GQUOTA_ACCT; + if (uflags & XFS_QUOTA_UDQ_ENFD) + flags |= XFS_UQUOTA_ENFD; + if (uflags & (XFS_QUOTA_PDQ_ENFD|XFS_QUOTA_GDQ_ENFD)) + flags |= XFS_OQUOTA_ENFD; + return (flags); +} + + +STATIC uint +xfs_qm_export_flags( + uint flags) +{ + uint uflags; + + uflags = 0; + if (flags & XFS_UQUOTA_ACCT) + uflags |= XFS_QUOTA_UDQ_ACCT; + if (flags & XFS_PQUOTA_ACCT) + uflags |= XFS_QUOTA_PDQ_ACCT; + if (flags & XFS_GQUOTA_ACCT) + uflags |= XFS_QUOTA_GDQ_ACCT; + if (flags & XFS_UQUOTA_ENFD) + uflags |= XFS_QUOTA_UDQ_ENFD; + if (flags & (XFS_OQUOTA_ENFD)) { + uflags |= (flags & XFS_GQUOTA_ACCT) ? + XFS_QUOTA_GDQ_ENFD : XFS_QUOTA_PDQ_ENFD; + } + return (uflags); +} + + +/* + * Go thru all the inodes in the file system, releasing their dquots. + * Note that the mount structure gets modified to indicate that quotas are off + * AFTER this, in the case of quotaoff. This also gets called from + * xfs_rootumount. + */ +void +xfs_qm_dqrele_all_inodes( + struct xfs_mount *mp, + uint flags) +{ + xfs_inode_t *ip, *topino; + uint ireclaims; + struct inode *vp; + boolean_t vnode_refd; + + ASSERT(mp->m_quotainfo); + + XFS_MOUNT_ILOCK(mp); +again: + ip = mp->m_inodes; + if (ip == NULL) { + XFS_MOUNT_IUNLOCK(mp); + return; + } + do { + /* Skip markers inserted by xfs_sync */ + if (ip->i_mount == NULL) { + ip = ip->i_mnext; + continue; + } + /* Root inode, rbmip and rsumip have associated blocks */ + if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) { + ASSERT(ip->i_udquot == NULL); + ASSERT(ip->i_gdquot == NULL); + ip = ip->i_mnext; + continue; + } + vp = VFS_I(ip); + if (!vp) { + ASSERT(ip->i_udquot == NULL); + ASSERT(ip->i_gdquot == NULL); + ip = ip->i_mnext; + continue; + } + vnode_refd = B_FALSE; + if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { + ireclaims = mp->m_ireclaims; + topino = mp->m_inodes; + vp = vn_grab(vp); + if (!vp) + goto again; + + XFS_MOUNT_IUNLOCK(mp); + /* XXX restart limit ? */ + xfs_ilock(ip, XFS_ILOCK_EXCL); + vnode_refd = B_TRUE; + } else { + ireclaims = mp->m_ireclaims; + topino = mp->m_inodes; + XFS_MOUNT_IUNLOCK(mp); + } + + /* + * We don't keep the mountlock across the dqrele() call, + * since it can take a while.. + */ + if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { + xfs_qm_dqrele(ip->i_udquot); + ip->i_udquot = NULL; + } + if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) { + xfs_qm_dqrele(ip->i_gdquot); + ip->i_gdquot = NULL; + } + xfs_iunlock(ip, XFS_ILOCK_EXCL); + /* + * Wait until we've dropped the ilock and mountlock to + * do the vn_rele. Or be condemned to an eternity in the + * inactive code in hell. + */ + if (vnode_refd) + IRELE(ip); + XFS_MOUNT_ILOCK(mp); + /* + * If an inode was inserted or removed, we gotta + * start over again. + */ + if (topino != mp->m_inodes || mp->m_ireclaims != ireclaims) { + /* XXX use a sentinel */ + goto again; + } + ip = ip->i_mnext; + } while (ip != mp->m_inodes); + + XFS_MOUNT_IUNLOCK(mp); +} + +/*------------------------------------------------------------------------*/ +#ifdef DEBUG +/* + * This contains all the test functions for XFS disk quotas. + * Currently it does a quota accounting check. ie. it walks through + * all inodes in the file system, calculating the dquot accounting fields, + * and prints out any inconsistencies. + */ +xfs_dqhash_t *qmtest_udqtab; +xfs_dqhash_t *qmtest_gdqtab; +int qmtest_hashmask; +int qmtest_nfails; +mutex_t qcheck_lock; + +#define DQTEST_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \ + (__psunsigned_t)(id)) & \ + (qmtest_hashmask - 1)) + +#define DQTEST_HASH(mp, id, type) ((type & XFS_DQ_USER) ? \ + (qmtest_udqtab + \ + DQTEST_HASHVAL(mp, id)) : \ + (qmtest_gdqtab + \ + DQTEST_HASHVAL(mp, id))) + +#define DQTEST_LIST_PRINT(l, NXT, title) \ +{ \ + xfs_dqtest_t *dqp; int i = 0;\ + cmn_err(CE_DEBUG, "%s (#%d)", title, (int) (l)->qh_nelems); \ + for (dqp = (xfs_dqtest_t *)(l)->qh_next; dqp != NULL; \ + dqp = (xfs_dqtest_t *)dqp->NXT) { \ + cmn_err(CE_DEBUG, " %d. \"%d (%s)\" bcnt = %d, icnt = %d", \ + ++i, dqp->d_id, DQFLAGTO_TYPESTR(dqp), \ + dqp->d_bcount, dqp->d_icount); } \ +} + +typedef struct dqtest { + xfs_dqmarker_t q_lists; + xfs_dqhash_t *q_hash; /* the hashchain header */ + xfs_mount_t *q_mount; /* filesystem this relates to */ + xfs_dqid_t d_id; /* user id or group id */ + xfs_qcnt_t d_bcount; /* # disk blocks owned by the user */ + xfs_qcnt_t d_icount; /* # inodes owned by the user */ +} xfs_dqtest_t; + +STATIC void +xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp) +{ + xfs_dquot_t *d; + if (((d) = (h)->qh_next)) + (d)->HL_PREVP = &((dqp)->HL_NEXT); + (dqp)->HL_NEXT = d; + (dqp)->HL_PREVP = &((h)->qh_next); + (h)->qh_next = (xfs_dquot_t *)dqp; + (h)->qh_version++; + (h)->qh_nelems++; +} +STATIC void +xfs_qm_dqtest_print( + xfs_dqtest_t *d) +{ + cmn_err(CE_DEBUG, "-----------DQTEST DQUOT----------------"); + cmn_err(CE_DEBUG, "---- dquot ID = %d", d->d_id); + cmn_err(CE_DEBUG, "---- fs = 0x%p", d->q_mount); + cmn_err(CE_DEBUG, "---- bcount = %Lu (0x%x)", + d->d_bcount, (int)d->d_bcount); + cmn_err(CE_DEBUG, "---- icount = %Lu (0x%x)", + d->d_icount, (int)d->d_icount); + cmn_err(CE_DEBUG, "---------------------------"); +} + +STATIC void +xfs_qm_dqtest_failed( + xfs_dqtest_t *d, + xfs_dquot_t *dqp, + char *reason, + xfs_qcnt_t a, + xfs_qcnt_t b, + int error) +{ + qmtest_nfails++; + if (error) + cmn_err(CE_DEBUG, "quotacheck failed id=%d, err=%d\nreason: %s", + d->d_id, error, reason); + else + cmn_err(CE_DEBUG, "quotacheck failed id=%d (%s) [%d != %d]", + d->d_id, reason, (int)a, (int)b); + xfs_qm_dqtest_print(d); + if (dqp) + xfs_qm_dqprint(dqp); +} + +STATIC int +xfs_dqtest_cmp2( + xfs_dqtest_t *d, + xfs_dquot_t *dqp) +{ + int err = 0; + if (be64_to_cpu(dqp->q_core.d_icount) != d->d_icount) { + xfs_qm_dqtest_failed(d, dqp, "icount mismatch", + be64_to_cpu(dqp->q_core.d_icount), + d->d_icount, 0); + err++; + } + if (be64_to_cpu(dqp->q_core.d_bcount) != d->d_bcount) { + xfs_qm_dqtest_failed(d, dqp, "bcount mismatch", + be64_to_cpu(dqp->q_core.d_bcount), + d->d_bcount, 0); + err++; + } + if (dqp->q_core.d_blk_softlimit && + be64_to_cpu(dqp->q_core.d_bcount) >= + be64_to_cpu(dqp->q_core.d_blk_softlimit)) { + if (!dqp->q_core.d_btimer && dqp->q_core.d_id) { + cmn_err(CE_DEBUG, + "%d [%s] [0x%p] BLK TIMER NOT STARTED", + d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount); + err++; + } + } + if (dqp->q_core.d_ino_softlimit && + be64_to_cpu(dqp->q_core.d_icount) >= + be64_to_cpu(dqp->q_core.d_ino_softlimit)) { + if (!dqp->q_core.d_itimer && dqp->q_core.d_id) { + cmn_err(CE_DEBUG, + "%d [%s] [0x%p] INO TIMER NOT STARTED", + d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount); + err++; + } + } +#ifdef QUOTADEBUG + if (!err) { + cmn_err(CE_DEBUG, "%d [%s] [0x%p] qchecked", + d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount); + } +#endif + return (err); +} + +STATIC void +xfs_dqtest_cmp( + xfs_dqtest_t *d) +{ + xfs_dquot_t *dqp; + int error; + + /* xfs_qm_dqtest_print(d); */ + if ((error = xfs_qm_dqget(d->q_mount, NULL, d->d_id, d->dq_flags, 0, + &dqp))) { + xfs_qm_dqtest_failed(d, NULL, "dqget failed", 0, 0, error); + return; + } + xfs_dqtest_cmp2(d, dqp); + xfs_qm_dqput(dqp); +} + +STATIC int +xfs_qm_internalqcheck_dqget( + xfs_mount_t *mp, + xfs_dqid_t id, + uint type, + xfs_dqtest_t **O_dq) +{ + xfs_dqtest_t *d; + xfs_dqhash_t *h; + + h = DQTEST_HASH(mp, id, type); + for (d = (xfs_dqtest_t *) h->qh_next; d != NULL; + d = (xfs_dqtest_t *) d->HL_NEXT) { + /* DQTEST_LIST_PRINT(h, HL_NEXT, "@@@@@ dqtestlist @@@@@"); */ + if (d->d_id == id && mp == d->q_mount) { + *O_dq = d; + return (0); + } + } + d = kmem_zalloc(sizeof(xfs_dqtest_t), KM_SLEEP); + d->dq_flags = type; + d->d_id = id; + d->q_mount = mp; + d->q_hash = h; + xfs_qm_hashinsert(h, d); + *O_dq = d; + return (0); +} + +STATIC void +xfs_qm_internalqcheck_get_dquots( + xfs_mount_t *mp, + xfs_dqid_t uid, + xfs_dqid_t projid, + xfs_dqid_t gid, + xfs_dqtest_t **ud, + xfs_dqtest_t **gd) +{ + if (XFS_IS_UQUOTA_ON(mp)) + xfs_qm_internalqcheck_dqget(mp, uid, XFS_DQ_USER, ud); + if (XFS_IS_GQUOTA_ON(mp)) + xfs_qm_internalqcheck_dqget(mp, gid, XFS_DQ_GROUP, gd); + else if (XFS_IS_PQUOTA_ON(mp)) + xfs_qm_internalqcheck_dqget(mp, projid, XFS_DQ_PROJ, gd); +} + + +STATIC void +xfs_qm_internalqcheck_dqadjust( + xfs_inode_t *ip, + xfs_dqtest_t *d) +{ + d->d_icount++; + d->d_bcount += (xfs_qcnt_t)ip->i_d.di_nblocks; +} + +STATIC int +xfs_qm_internalqcheck_adjust( + xfs_mount_t *mp, /* mount point for filesystem */ + xfs_ino_t ino, /* inode number to get data for */ + void __user *buffer, /* not used */ + int ubsize, /* not used */ + void *private_data, /* not used */ + xfs_daddr_t bno, /* starting block of inode cluster */ + int *ubused, /* not used */ + void *dip, /* not used */ + int *res) /* bulkstat result code */ +{ + xfs_inode_t *ip; + xfs_dqtest_t *ud, *gd; + uint lock_flags; + boolean_t ipreleased; + int error; + + ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + + if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) { + *res = BULKSTAT_RV_NOTHING; + qdprintk("internalqcheck: ino=%llu, uqino=%llu, gqino=%llu\n", + (unsigned long long) ino, + (unsigned long long) mp->m_sb.sb_uquotino, + (unsigned long long) mp->m_sb.sb_gquotino); + return XFS_ERROR(EINVAL); + } + ipreleased = B_FALSE; + again: + lock_flags = XFS_ILOCK_SHARED; + if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip, bno))) { + *res = BULKSTAT_RV_NOTHING; + return (error); + } + + /* + * This inode can have blocks after eof which can get released + * when we send it to inactive. Since we don't check the dquot + * until the after all our calculations are done, we must get rid + * of those now. + */ + if (! ipreleased) { + xfs_iput(ip, lock_flags); + ipreleased = B_TRUE; + goto again; + } + xfs_qm_internalqcheck_get_dquots(mp, + (xfs_dqid_t) ip->i_d.di_uid, + (xfs_dqid_t) ip->i_d.di_projid, + (xfs_dqid_t) ip->i_d.di_gid, + &ud, &gd); + if (XFS_IS_UQUOTA_ON(mp)) { + ASSERT(ud); + xfs_qm_internalqcheck_dqadjust(ip, ud); + } + if (XFS_IS_OQUOTA_ON(mp)) { + ASSERT(gd); + xfs_qm_internalqcheck_dqadjust(ip, gd); + } + xfs_iput(ip, lock_flags); + *res = BULKSTAT_RV_DIDONE; + return (0); +} + + +/* PRIVATE, debugging */ +int +xfs_qm_internalqcheck( + xfs_mount_t *mp) +{ + xfs_ino_t lastino; + int done, count; + int i; + xfs_dqtest_t *d, *e; + xfs_dqhash_t *h1; + int error; + + lastino = 0; + qmtest_hashmask = 32; + count = 5; + done = 0; + qmtest_nfails = 0; + + if (! XFS_IS_QUOTA_ON(mp)) + return XFS_ERROR(ESRCH); + + xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); + XFS_bflush(mp->m_ddev_targp); + xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); + XFS_bflush(mp->m_ddev_targp); + + mutex_lock(&qcheck_lock); + /* There should be absolutely no quota activity while this + is going on. */ + qmtest_udqtab = kmem_zalloc(qmtest_hashmask * + sizeof(xfs_dqhash_t), KM_SLEEP); + qmtest_gdqtab = kmem_zalloc(qmtest_hashmask * + sizeof(xfs_dqhash_t), KM_SLEEP); + do { + /* + * Iterate thru all the inodes in the file system, + * adjusting the corresponding dquot counters + */ + if ((error = xfs_bulkstat(mp, &lastino, &count, + xfs_qm_internalqcheck_adjust, NULL, + 0, NULL, BULKSTAT_FG_IGET, &done))) { + break; + } + } while (! done); + if (error) { + cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error); + } + cmn_err(CE_DEBUG, "Checking results against system dquots"); + for (i = 0; i < qmtest_hashmask; i++) { + h1 = &qmtest_udqtab[i]; + for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) { + xfs_dqtest_cmp(d); + e = (xfs_dqtest_t *) d->HL_NEXT; + kmem_free(d); + d = e; + } + h1 = &qmtest_gdqtab[i]; + for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) { + xfs_dqtest_cmp(d); + e = (xfs_dqtest_t *) d->HL_NEXT; + kmem_free(d); + d = e; + } + } + + if (qmtest_nfails) { + cmn_err(CE_DEBUG, "******** quotacheck failed ********"); + cmn_err(CE_DEBUG, "failures = %d", qmtest_nfails); + } else { + cmn_err(CE_DEBUG, "******** quotacheck successful! ********"); + } + kmem_free(qmtest_udqtab); + kmem_free(qmtest_gdqtab); + mutex_unlock(&qcheck_lock); + return (qmtest_nfails); +} + +#endif /* DEBUG */ diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h new file mode 100644 index 0000000..c4fcea6 --- /dev/null +++ b/fs/xfs/quota/xfs_quota_priv.h @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_QUOTA_PRIV_H__ +#define __XFS_QUOTA_PRIV_H__ + +/* + * Number of bmaps that we ask from bmapi when doing a quotacheck. + * We make this restriction to keep the memory usage to a minimum. + */ +#define XFS_DQITER_MAP_SIZE 10 + +/* Number of dquots that fit in to a dquot block */ +#define XFS_QM_DQPERBLK(mp) ((mp)->m_quotainfo->qi_dqperchunk) + +#define XFS_DQ_IS_ADDEDTO_TRX(t, d) ((d)->q_transp == (t)) + +#define XFS_QI_MPLRECLAIMS(mp) ((mp)->m_quotainfo->qi_dqreclaims) +#define XFS_QI_UQIP(mp) ((mp)->m_quotainfo->qi_uquotaip) +#define XFS_QI_GQIP(mp) ((mp)->m_quotainfo->qi_gquotaip) +#define XFS_QI_DQCHUNKLEN(mp) ((mp)->m_quotainfo->qi_dqchunklen) +#define XFS_QI_BTIMELIMIT(mp) ((mp)->m_quotainfo->qi_btimelimit) +#define XFS_QI_RTBTIMELIMIT(mp) ((mp)->m_quotainfo->qi_rtbtimelimit) +#define XFS_QI_ITIMELIMIT(mp) ((mp)->m_quotainfo->qi_itimelimit) +#define XFS_QI_BWARNLIMIT(mp) ((mp)->m_quotainfo->qi_bwarnlimit) +#define XFS_QI_RTBWARNLIMIT(mp) ((mp)->m_quotainfo->qi_rtbwarnlimit) +#define XFS_QI_IWARNLIMIT(mp) ((mp)->m_quotainfo->qi_iwarnlimit) +#define XFS_QI_QOFFLOCK(mp) ((mp)->m_quotainfo->qi_quotaofflock) + +#define XFS_QI_MPL_LIST(mp) ((mp)->m_quotainfo->qi_dqlist) +#define XFS_QI_MPLLOCK(mp) ((mp)->m_quotainfo->qi_dqlist.qh_lock) +#define XFS_QI_MPLNEXT(mp) ((mp)->m_quotainfo->qi_dqlist.qh_next) +#define XFS_QI_MPLNDQUOTS(mp) ((mp)->m_quotainfo->qi_dqlist.qh_nelems) + +#define XQMLCK(h) (mutex_lock(&((h)->qh_lock))) +#define XQMUNLCK(h) (mutex_unlock(&((h)->qh_lock))) +#ifdef DEBUG +struct xfs_dqhash; +static inline int XQMISLCKD(struct xfs_dqhash *h) +{ + if (mutex_trylock(&h->qh_lock)) { + mutex_unlock(&h->qh_lock); + return 0; + } + return 1; +} +#endif + +#define XFS_DQ_HASH_LOCK(h) XQMLCK(h) +#define XFS_DQ_HASH_UNLOCK(h) XQMUNLCK(h) +#define XFS_DQ_IS_HASH_LOCKED(h) XQMISLCKD(h) + +#define xfs_qm_mplist_lock(mp) XQMLCK(&(XFS_QI_MPL_LIST(mp))) +#define xfs_qm_mplist_unlock(mp) XQMUNLCK(&(XFS_QI_MPL_LIST(mp))) +#define XFS_QM_IS_MPLIST_LOCKED(mp) XQMISLCKD(&(XFS_QI_MPL_LIST(mp))) + +#define xfs_qm_freelist_lock(qm) XQMLCK(&((qm)->qm_dqfreelist)) +#define xfs_qm_freelist_unlock(qm) XQMUNLCK(&((qm)->qm_dqfreelist)) + +/* + * Hash into a bucket in the dquot hash table, based on <mp, id>. + */ +#define XFS_DQ_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \ + (__psunsigned_t)(id)) & \ + (xfs_Gqm->qm_dqhashmask - 1)) +#define XFS_DQ_HASH(mp, id, type) (type == XFS_DQ_USER ? \ + (xfs_Gqm->qm_usr_dqhtable + \ + XFS_DQ_HASHVAL(mp, id)) : \ + (xfs_Gqm->qm_grp_dqhtable + \ + XFS_DQ_HASHVAL(mp, id))) +#define XFS_IS_DQTYPE_ON(mp, type) (type == XFS_DQ_USER ? \ + XFS_IS_UQUOTA_ON(mp) : \ + XFS_IS_OQUOTA_ON(mp)) +#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \ + !dqp->q_core.d_blk_hardlimit && \ + !dqp->q_core.d_blk_softlimit && \ + !dqp->q_core.d_rtb_hardlimit && \ + !dqp->q_core.d_rtb_softlimit && \ + !dqp->q_core.d_ino_hardlimit && \ + !dqp->q_core.d_ino_softlimit && \ + !dqp->q_core.d_bcount && \ + !dqp->q_core.d_rtbcount && \ + !dqp->q_core.d_icount) + +#define HL_PREVP dq_hashlist.ql_prevp +#define HL_NEXT dq_hashlist.ql_next +#define MPL_PREVP dq_mplist.ql_prevp +#define MPL_NEXT dq_mplist.ql_next + + +#define _LIST_REMOVE(h, dqp, PVP, NXT) \ + { \ + xfs_dquot_t *d; \ + if (((d) = (dqp)->NXT)) \ + (d)->PVP = (dqp)->PVP; \ + *((dqp)->PVP) = d; \ + (dqp)->NXT = NULL; \ + (dqp)->PVP = NULL; \ + (h)->qh_version++; \ + (h)->qh_nelems--; \ + } + +#define _LIST_INSERT(h, dqp, PVP, NXT) \ + { \ + xfs_dquot_t *d; \ + if (((d) = (h)->qh_next)) \ + (d)->PVP = &((dqp)->NXT); \ + (dqp)->NXT = d; \ + (dqp)->PVP = &((h)->qh_next); \ + (h)->qh_next = dqp; \ + (h)->qh_version++; \ + (h)->qh_nelems++; \ + } + +#define FOREACH_DQUOT_IN_MP(dqp, mp) \ + for ((dqp) = XFS_QI_MPLNEXT(mp); (dqp) != NULL; (dqp) = (dqp)->MPL_NEXT) + +#define FOREACH_DQUOT_IN_FREELIST(dqp, qlist) \ +for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \ + (dqp) = (dqp)->dq_flnext) + +#define XQM_HASHLIST_INSERT(h, dqp) \ + _LIST_INSERT(h, dqp, HL_PREVP, HL_NEXT) + +#define XQM_FREELIST_INSERT(h, dqp) \ + xfs_qm_freelist_append(h, dqp) + +#define XQM_MPLIST_INSERT(h, dqp) \ + _LIST_INSERT(h, dqp, MPL_PREVP, MPL_NEXT) + +#define XQM_HASHLIST_REMOVE(h, dqp) \ + _LIST_REMOVE(h, dqp, HL_PREVP, HL_NEXT) +#define XQM_FREELIST_REMOVE(dqp) \ + xfs_qm_freelist_unlink(dqp) +#define XQM_MPLIST_REMOVE(h, dqp) \ + { _LIST_REMOVE(h, dqp, MPL_PREVP, MPL_NEXT); \ + XFS_QI_MPLRECLAIMS((dqp)->q_mount)++; } + +#define XFS_DQ_IS_LOGITEM_INITD(dqp) ((dqp)->q_logitem.qli_dquot == (dqp)) + +#define XFS_QM_DQP_TO_DQACCT(tp, dqp) (XFS_QM_ISUDQ(dqp) ? \ + (tp)->t_dqinfo->dqa_usrdquots : \ + (tp)->t_dqinfo->dqa_grpdquots) +#define XFS_IS_SUSER_DQUOT(dqp) \ + (!((dqp)->q_core.d_id)) + +#define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \ + (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \ + (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???"))) + +#endif /* __XFS_QUOTA_PRIV_H__ */ diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c new file mode 100644 index 0000000..9961138 --- /dev/null +++ b/fs/xfs/quota/xfs_trans_dquot.c @@ -0,0 +1,923 @@ +/* + * Copyright (c) 2000-2002 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_alloc.h" +#include "xfs_dmapi.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_attr_sf.h" +#include "xfs_dir2_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_ialloc.h" +#include "xfs_itable.h" +#include "xfs_btree.h" +#include "xfs_bmap.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_rw.h" +#include "xfs_acl.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_trans_priv.h" +#include "xfs_qm.h" + +STATIC void xfs_trans_alloc_dqinfo(xfs_trans_t *); + +/* + * Add the locked dquot to the transaction. + * The dquot must be locked, and it cannot be associated with any + * transaction. + */ +void +xfs_trans_dqjoin( + xfs_trans_t *tp, + xfs_dquot_t *dqp) +{ + xfs_dq_logitem_t *lp; + + ASSERT(! XFS_DQ_IS_ADDEDTO_TRX(tp, dqp)); + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + ASSERT(XFS_DQ_IS_LOGITEM_INITD(dqp)); + lp = &dqp->q_logitem; + + /* + * Get a log_item_desc to point at the new item. + */ + (void) xfs_trans_add_item(tp, (xfs_log_item_t*)(lp)); + + /* + * Initialize i_transp so we can later determine if this dquot is + * associated with this transaction. + */ + dqp->q_transp = tp; +} + + +/* + * This is called to mark the dquot as needing + * to be logged when the transaction is committed. The dquot must + * already be associated with the given transaction. + * Note that it marks the entire transaction as dirty. In the ordinary + * case, this gets called via xfs_trans_commit, after the transaction + * is already dirty. However, there's nothing stop this from getting + * called directly, as done by xfs_qm_scall_setqlim. Hence, the TRANS_DIRTY + * flag. + */ +void +xfs_trans_log_dquot( + xfs_trans_t *tp, + xfs_dquot_t *dqp) +{ + xfs_log_item_desc_t *lidp; + + ASSERT(XFS_DQ_IS_ADDEDTO_TRX(tp, dqp)); + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + + lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(&dqp->q_logitem)); + ASSERT(lidp != NULL); + + tp->t_flags |= XFS_TRANS_DIRTY; + lidp->lid_flags |= XFS_LID_DIRTY; +} + +/* + * Carry forward whatever is left of the quota blk reservation to + * the spanky new transaction + */ +STATIC void +xfs_trans_dup_dqinfo( + xfs_trans_t *otp, + xfs_trans_t *ntp) +{ + xfs_dqtrx_t *oq, *nq; + int i,j; + xfs_dqtrx_t *oqa, *nqa; + + if (!otp->t_dqinfo) + return; + + xfs_trans_alloc_dqinfo(ntp); + oqa = otp->t_dqinfo->dqa_usrdquots; + nqa = ntp->t_dqinfo->dqa_usrdquots; + + /* + * Because the quota blk reservation is carried forward, + * it is also necessary to carry forward the DQ_DIRTY flag. + */ + if(otp->t_flags & XFS_TRANS_DQ_DIRTY) + ntp->t_flags |= XFS_TRANS_DQ_DIRTY; + + for (j = 0; j < 2; j++) { + for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { + if (oqa[i].qt_dquot == NULL) + break; + oq = &oqa[i]; + nq = &nqa[i]; + + nq->qt_dquot = oq->qt_dquot; + nq->qt_bcount_delta = nq->qt_icount_delta = 0; + nq->qt_rtbcount_delta = 0; + + /* + * Transfer whatever is left of the reservations. + */ + nq->qt_blk_res = oq->qt_blk_res - oq->qt_blk_res_used; + oq->qt_blk_res = oq->qt_blk_res_used; + + nq->qt_rtblk_res = oq->qt_rtblk_res - + oq->qt_rtblk_res_used; + oq->qt_rtblk_res = oq->qt_rtblk_res_used; + + nq->qt_ino_res = oq->qt_ino_res - oq->qt_ino_res_used; + oq->qt_ino_res = oq->qt_ino_res_used; + + } + oqa = otp->t_dqinfo->dqa_grpdquots; + nqa = ntp->t_dqinfo->dqa_grpdquots; + } +} + +/* + * Wrap around mod_dquot to account for both user and group quotas. + */ +STATIC void +xfs_trans_mod_dquot_byino( + xfs_trans_t *tp, + xfs_inode_t *ip, + uint field, + long delta) +{ + xfs_mount_t *mp; + + ASSERT(tp); + mp = tp->t_mountp; + + if (!XFS_IS_QUOTA_ON(mp) || + ip->i_ino == mp->m_sb.sb_uquotino || + ip->i_ino == mp->m_sb.sb_gquotino) + return; + + if (tp->t_dqinfo == NULL) + xfs_trans_alloc_dqinfo(tp); + + if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot) + (void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta); + if (XFS_IS_OQUOTA_ON(mp) && ip->i_gdquot) + (void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta); +} + +STATIC xfs_dqtrx_t * +xfs_trans_get_dqtrx( + xfs_trans_t *tp, + xfs_dquot_t *dqp) +{ + int i; + xfs_dqtrx_t *qa; + + for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { + qa = XFS_QM_DQP_TO_DQACCT(tp, dqp); + + if (qa[i].qt_dquot == NULL || + qa[i].qt_dquot == dqp) { + return (&qa[i]); + } + } + + return (NULL); +} + +/* + * Make the changes in the transaction structure. + * The moral equivalent to xfs_trans_mod_sb(). + * We don't touch any fields in the dquot, so we don't care + * if it's locked or not (most of the time it won't be). + */ +void +xfs_trans_mod_dquot( + xfs_trans_t *tp, + xfs_dquot_t *dqp, + uint field, + long delta) +{ + xfs_dqtrx_t *qtrx; + + ASSERT(tp); + qtrx = NULL; + + if (tp->t_dqinfo == NULL) + xfs_trans_alloc_dqinfo(tp); + /* + * Find either the first free slot or the slot that belongs + * to this dquot. + */ + qtrx = xfs_trans_get_dqtrx(tp, dqp); + ASSERT(qtrx); + if (qtrx->qt_dquot == NULL) + qtrx->qt_dquot = dqp; + + switch (field) { + + /* + * regular disk blk reservation + */ + case XFS_TRANS_DQ_RES_BLKS: + qtrx->qt_blk_res += (ulong)delta; + break; + + /* + * inode reservation + */ + case XFS_TRANS_DQ_RES_INOS: + qtrx->qt_ino_res += (ulong)delta; + break; + + /* + * disk blocks used. + */ + case XFS_TRANS_DQ_BCOUNT: + if (qtrx->qt_blk_res && delta > 0) { + qtrx->qt_blk_res_used += (ulong)delta; + ASSERT(qtrx->qt_blk_res >= qtrx->qt_blk_res_used); + } + qtrx->qt_bcount_delta += delta; + break; + + case XFS_TRANS_DQ_DELBCOUNT: + qtrx->qt_delbcnt_delta += delta; + break; + + /* + * Inode Count + */ + case XFS_TRANS_DQ_ICOUNT: + if (qtrx->qt_ino_res && delta > 0) { + qtrx->qt_ino_res_used += (ulong)delta; + ASSERT(qtrx->qt_ino_res >= qtrx->qt_ino_res_used); + } + qtrx->qt_icount_delta += delta; + break; + + /* + * rtblk reservation + */ + case XFS_TRANS_DQ_RES_RTBLKS: + qtrx->qt_rtblk_res += (ulong)delta; + break; + + /* + * rtblk count + */ + case XFS_TRANS_DQ_RTBCOUNT: + if (qtrx->qt_rtblk_res && delta > 0) { + qtrx->qt_rtblk_res_used += (ulong)delta; + ASSERT(qtrx->qt_rtblk_res >= qtrx->qt_rtblk_res_used); + } + qtrx->qt_rtbcount_delta += delta; + break; + + case XFS_TRANS_DQ_DELRTBCOUNT: + qtrx->qt_delrtb_delta += delta; + break; + + default: + ASSERT(0); + } + tp->t_flags |= XFS_TRANS_DQ_DIRTY; +} + + +/* + * Given an array of dqtrx structures, lock all the dquots associated + * and join them to the transaction, provided they have been modified. + * We know that the highest number of dquots (of one type - usr OR grp), + * involved in a transaction is 2 and that both usr and grp combined - 3. + * So, we don't attempt to make this very generic. + */ +STATIC void +xfs_trans_dqlockedjoin( + xfs_trans_t *tp, + xfs_dqtrx_t *q) +{ + ASSERT(q[0].qt_dquot != NULL); + if (q[1].qt_dquot == NULL) { + xfs_dqlock(q[0].qt_dquot); + xfs_trans_dqjoin(tp, q[0].qt_dquot); + } else { + ASSERT(XFS_QM_TRANS_MAXDQS == 2); + xfs_dqlock2(q[0].qt_dquot, q[1].qt_dquot); + xfs_trans_dqjoin(tp, q[0].qt_dquot); + xfs_trans_dqjoin(tp, q[1].qt_dquot); + } +} + + +/* + * Called by xfs_trans_commit() and similar in spirit to + * xfs_trans_apply_sb_deltas(). + * Go thru all the dquots belonging to this transaction and modify the + * INCORE dquot to reflect the actual usages. + * Unreserve just the reservations done by this transaction. + * dquot is still left locked at exit. + */ +STATIC void +xfs_trans_apply_dquot_deltas( + xfs_trans_t *tp) +{ + int i, j; + xfs_dquot_t *dqp; + xfs_dqtrx_t *qtrx, *qa; + xfs_disk_dquot_t *d; + long totalbdelta; + long totalrtbdelta; + + if (! (tp->t_flags & XFS_TRANS_DQ_DIRTY)) + return; + + ASSERT(tp->t_dqinfo); + qa = tp->t_dqinfo->dqa_usrdquots; + for (j = 0; j < 2; j++) { + if (qa[0].qt_dquot == NULL) { + qa = tp->t_dqinfo->dqa_grpdquots; + continue; + } + + /* + * Lock all of the dquots and join them to the transaction. + */ + xfs_trans_dqlockedjoin(tp, qa); + + for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { + qtrx = &qa[i]; + /* + * The array of dquots is filled + * sequentially, not sparsely. + */ + if ((dqp = qtrx->qt_dquot) == NULL) + break; + + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + ASSERT(XFS_DQ_IS_ADDEDTO_TRX(tp, dqp)); + + /* + * adjust the actual number of blocks used + */ + d = &dqp->q_core; + + /* + * The issue here is - sometimes we don't make a blkquota + * reservation intentionally to be fair to users + * (when the amount is small). On the other hand, + * delayed allocs do make reservations, but that's + * outside of a transaction, so we have no + * idea how much was really reserved. + * So, here we've accumulated delayed allocation blks and + * non-delay blks. The assumption is that the + * delayed ones are always reserved (outside of a + * transaction), and the others may or may not have + * quota reservations. + */ + totalbdelta = qtrx->qt_bcount_delta + + qtrx->qt_delbcnt_delta; + totalrtbdelta = qtrx->qt_rtbcount_delta + + qtrx->qt_delrtb_delta; +#ifdef QUOTADEBUG + if (totalbdelta < 0) + ASSERT(be64_to_cpu(d->d_bcount) >= + (xfs_qcnt_t) -totalbdelta); + + if (totalrtbdelta < 0) + ASSERT(be64_to_cpu(d->d_rtbcount) >= + (xfs_qcnt_t) -totalrtbdelta); + + if (qtrx->qt_icount_delta < 0) + ASSERT(be64_to_cpu(d->d_icount) >= + (xfs_qcnt_t) -qtrx->qt_icount_delta); +#endif + if (totalbdelta) + be64_add_cpu(&d->d_bcount, (xfs_qcnt_t)totalbdelta); + + if (qtrx->qt_icount_delta) + be64_add_cpu(&d->d_icount, (xfs_qcnt_t)qtrx->qt_icount_delta); + + if (totalrtbdelta) + be64_add_cpu(&d->d_rtbcount, (xfs_qcnt_t)totalrtbdelta); + + /* + * Get any default limits in use. + * Start/reset the timer(s) if needed. + */ + if (d->d_id) { + xfs_qm_adjust_dqlimits(tp->t_mountp, d); + xfs_qm_adjust_dqtimers(tp->t_mountp, d); + } + + dqp->dq_flags |= XFS_DQ_DIRTY; + /* + * add this to the list of items to get logged + */ + xfs_trans_log_dquot(tp, dqp); + /* + * Take off what's left of the original reservation. + * In case of delayed allocations, there's no + * reservation that a transaction structure knows of. + */ + if (qtrx->qt_blk_res != 0) { + if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) { + if (qtrx->qt_blk_res > + qtrx->qt_blk_res_used) + dqp->q_res_bcount -= (xfs_qcnt_t) + (qtrx->qt_blk_res - + qtrx->qt_blk_res_used); + else + dqp->q_res_bcount -= (xfs_qcnt_t) + (qtrx->qt_blk_res_used - + qtrx->qt_blk_res); + } + } else { + /* + * These blks were never reserved, either inside + * a transaction or outside one (in a delayed + * allocation). Also, this isn't always a + * negative number since we sometimes + * deliberately skip quota reservations. + */ + if (qtrx->qt_bcount_delta) { + dqp->q_res_bcount += + (xfs_qcnt_t)qtrx->qt_bcount_delta; + } + } + /* + * Adjust the RT reservation. + */ + if (qtrx->qt_rtblk_res != 0) { + if (qtrx->qt_rtblk_res != qtrx->qt_rtblk_res_used) { + if (qtrx->qt_rtblk_res > + qtrx->qt_rtblk_res_used) + dqp->q_res_rtbcount -= (xfs_qcnt_t) + (qtrx->qt_rtblk_res - + qtrx->qt_rtblk_res_used); + else + dqp->q_res_rtbcount -= (xfs_qcnt_t) + (qtrx->qt_rtblk_res_used - + qtrx->qt_rtblk_res); + } + } else { + if (qtrx->qt_rtbcount_delta) + dqp->q_res_rtbcount += + (xfs_qcnt_t)qtrx->qt_rtbcount_delta; + } + + /* + * Adjust the inode reservation. + */ + if (qtrx->qt_ino_res != 0) { + ASSERT(qtrx->qt_ino_res >= + qtrx->qt_ino_res_used); + if (qtrx->qt_ino_res > qtrx->qt_ino_res_used) + dqp->q_res_icount -= (xfs_qcnt_t) + (qtrx->qt_ino_res - + qtrx->qt_ino_res_used); + } else { + if (qtrx->qt_icount_delta) + dqp->q_res_icount += + (xfs_qcnt_t)qtrx->qt_icount_delta; + } + + ASSERT(dqp->q_res_bcount >= + be64_to_cpu(dqp->q_core.d_bcount)); + ASSERT(dqp->q_res_icount >= + be64_to_cpu(dqp->q_core.d_icount)); + ASSERT(dqp->q_res_rtbcount >= + be64_to_cpu(dqp->q_core.d_rtbcount)); + } + /* + * Do the group quotas next + */ + qa = tp->t_dqinfo->dqa_grpdquots; + } +} + +/* + * Release the reservations, and adjust the dquots accordingly. + * This is called only when the transaction is being aborted. If by + * any chance we have done dquot modifications incore (ie. deltas) already, + * we simply throw those away, since that's the expected behavior + * when a transaction is curtailed without a commit. + */ +STATIC void +xfs_trans_unreserve_and_mod_dquots( + xfs_trans_t *tp) +{ + int i, j; + xfs_dquot_t *dqp; + xfs_dqtrx_t *qtrx, *qa; + boolean_t locked; + + if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY)) + return; + + qa = tp->t_dqinfo->dqa_usrdquots; + + for (j = 0; j < 2; j++) { + for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { + qtrx = &qa[i]; + /* + * We assume that the array of dquots is filled + * sequentially, not sparsely. + */ + if ((dqp = qtrx->qt_dquot) == NULL) + break; + /* + * Unreserve the original reservation. We don't care + * about the number of blocks used field, or deltas. + * Also we don't bother to zero the fields. + */ + locked = B_FALSE; + if (qtrx->qt_blk_res) { + xfs_dqlock(dqp); + locked = B_TRUE; + dqp->q_res_bcount -= + (xfs_qcnt_t)qtrx->qt_blk_res; + } + if (qtrx->qt_ino_res) { + if (!locked) { + xfs_dqlock(dqp); + locked = B_TRUE; + } + dqp->q_res_icount -= + (xfs_qcnt_t)qtrx->qt_ino_res; + } + + if (qtrx->qt_rtblk_res) { + if (!locked) { + xfs_dqlock(dqp); + locked = B_TRUE; + } + dqp->q_res_rtbcount -= + (xfs_qcnt_t)qtrx->qt_rtblk_res; + } + if (locked) + xfs_dqunlock(dqp); + + } + qa = tp->t_dqinfo->dqa_grpdquots; + } +} + +STATIC int +xfs_quota_error(uint flags) +{ + if (flags & XFS_QMOPT_ENOSPC) + return ENOSPC; + return EDQUOT; +} + +/* + * This reserves disk blocks and inodes against a dquot. + * Flags indicate if the dquot is to be locked here and also + * if the blk reservation is for RT or regular blocks. + * Sending in XFS_QMOPT_FORCE_RES flag skips the quota check. + */ +STATIC int +xfs_trans_dqresv( + xfs_trans_t *tp, + xfs_mount_t *mp, + xfs_dquot_t *dqp, + long nblks, + long ninos, + uint flags) +{ + int error; + xfs_qcnt_t hardlimit; + xfs_qcnt_t softlimit; + time_t timer; + xfs_qwarncnt_t warns; + xfs_qwarncnt_t warnlimit; + xfs_qcnt_t count; + xfs_qcnt_t *resbcountp; + xfs_quotainfo_t *q = mp->m_quotainfo; + + if (! (flags & XFS_QMOPT_DQLOCK)) { + xfs_dqlock(dqp); + } + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + if (flags & XFS_TRANS_DQ_RES_BLKS) { + hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit); + if (!hardlimit) + hardlimit = q->qi_bhardlimit; + softlimit = be64_to_cpu(dqp->q_core.d_blk_softlimit); + if (!softlimit) + softlimit = q->qi_bsoftlimit; + timer = be32_to_cpu(dqp->q_core.d_btimer); + warns = be16_to_cpu(dqp->q_core.d_bwarns); + warnlimit = XFS_QI_BWARNLIMIT(dqp->q_mount); + resbcountp = &dqp->q_res_bcount; + } else { + ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS); + hardlimit = be64_to_cpu(dqp->q_core.d_rtb_hardlimit); + if (!hardlimit) + hardlimit = q->qi_rtbhardlimit; + softlimit = be64_to_cpu(dqp->q_core.d_rtb_softlimit); + if (!softlimit) + softlimit = q->qi_rtbsoftlimit; + timer = be32_to_cpu(dqp->q_core.d_rtbtimer); + warns = be16_to_cpu(dqp->q_core.d_rtbwarns); + warnlimit = XFS_QI_RTBWARNLIMIT(dqp->q_mount); + resbcountp = &dqp->q_res_rtbcount; + } + error = 0; + + if ((flags & XFS_QMOPT_FORCE_RES) == 0 && + dqp->q_core.d_id && + ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) || + (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) && + (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) { +#ifdef QUOTADEBUG + cmn_err(CE_DEBUG, "BLK Res: nblks=%ld + resbcount=%Ld" + " > hardlimit=%Ld?", nblks, *resbcountp, hardlimit); +#endif + if (nblks > 0) { + /* + * dquot is locked already. See if we'd go over the + * hardlimit or exceed the timelimit if we allocate + * nblks. + */ + if (hardlimit > 0ULL && + (hardlimit <= nblks + *resbcountp)) { + error = xfs_quota_error(flags); + goto error_return; + } + + if (softlimit > 0ULL && + (softlimit <= nblks + *resbcountp)) { + if ((timer != 0 && get_seconds() > timer) || + (warns != 0 && warns >= warnlimit)) { + error = xfs_quota_error(flags); + goto error_return; + } + } + } + if (ninos > 0) { + count = be64_to_cpu(dqp->q_core.d_icount); + timer = be32_to_cpu(dqp->q_core.d_itimer); + warns = be16_to_cpu(dqp->q_core.d_iwarns); + warnlimit = XFS_QI_IWARNLIMIT(dqp->q_mount); + hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit); + if (!hardlimit) + hardlimit = q->qi_ihardlimit; + softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit); + if (!softlimit) + softlimit = q->qi_isoftlimit; + if (hardlimit > 0ULL && count >= hardlimit) { + error = xfs_quota_error(flags); + goto error_return; + } else if (softlimit > 0ULL && count >= softlimit) { + if ((timer != 0 && get_seconds() > timer) || + (warns != 0 && warns >= warnlimit)) { + error = xfs_quota_error(flags); + goto error_return; + } + } + } + } + + /* + * Change the reservation, but not the actual usage. + * Note that q_res_bcount = q_core.d_bcount + resv + */ + (*resbcountp) += (xfs_qcnt_t)nblks; + if (ninos != 0) + dqp->q_res_icount += (xfs_qcnt_t)ninos; + + /* + * note the reservation amt in the trans struct too, + * so that the transaction knows how much was reserved by + * it against this particular dquot. + * We don't do this when we are reserving for a delayed allocation, + * because we don't have the luxury of a transaction envelope then. + */ + if (tp) { + ASSERT(tp->t_dqinfo); + ASSERT(flags & XFS_QMOPT_RESBLK_MASK); + if (nblks != 0) + xfs_trans_mod_dquot(tp, dqp, + flags & XFS_QMOPT_RESBLK_MASK, + nblks); + if (ninos != 0) + xfs_trans_mod_dquot(tp, dqp, + XFS_TRANS_DQ_RES_INOS, + ninos); + } + ASSERT(dqp->q_res_bcount >= be64_to_cpu(dqp->q_core.d_bcount)); + ASSERT(dqp->q_res_rtbcount >= be64_to_cpu(dqp->q_core.d_rtbcount)); + ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount)); + +error_return: + if (! (flags & XFS_QMOPT_DQLOCK)) { + xfs_dqunlock(dqp); + } + return (error); +} + + +/* + * Given dquot(s), make disk block and/or inode reservations against them. + * The fact that this does the reservation against both the usr and + * grp/prj quotas is important, because this follows a both-or-nothing + * approach. + * + * flags = XFS_QMOPT_DQLOCK indicate if dquot(s) need to be locked. + * XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown. + * XFS_QMOPT_ENOSPC returns ENOSPC not EDQUOT. Used by pquota. + * XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks + * XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks + * dquots are unlocked on return, if they were not locked by caller. + */ +int +xfs_trans_reserve_quota_bydquots( + xfs_trans_t *tp, + xfs_mount_t *mp, + xfs_dquot_t *udqp, + xfs_dquot_t *gdqp, + long nblks, + long ninos, + uint flags) +{ + int resvd = 0, error; + + if (!XFS_IS_QUOTA_ON(mp)) + return 0; + + if (tp && tp->t_dqinfo == NULL) + xfs_trans_alloc_dqinfo(tp); + + ASSERT(flags & XFS_QMOPT_RESBLK_MASK); + + if (udqp) { + error = xfs_trans_dqresv(tp, mp, udqp, nblks, ninos, + (flags & ~XFS_QMOPT_ENOSPC)); + if (error) + return error; + resvd = 1; + } + + if (gdqp) { + error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags); + if (error) { + /* + * can't do it, so backout previous reservation + */ + if (resvd) { + flags |= XFS_QMOPT_FORCE_RES; + xfs_trans_dqresv(tp, mp, udqp, + -nblks, -ninos, flags); + } + return error; + } + } + + /* + * Didn't change anything critical, so, no need to log + */ + return 0; +} + + +/* + * Lock the dquot and change the reservation if we can. + * This doesn't change the actual usage, just the reservation. + * The inode sent in is locked. + */ +STATIC int +xfs_trans_reserve_quota_nblks( + xfs_trans_t *tp, + xfs_mount_t *mp, + xfs_inode_t *ip, + long nblks, + long ninos, + uint flags) +{ + int error; + + if (!XFS_IS_QUOTA_ON(mp)) + return 0; + if (XFS_IS_PQUOTA_ON(mp)) + flags |= XFS_QMOPT_ENOSPC; + + ASSERT(ip->i_ino != mp->m_sb.sb_uquotino); + ASSERT(ip->i_ino != mp->m_sb.sb_gquotino); + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); + ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == + XFS_TRANS_DQ_RES_RTBLKS || + (flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == + XFS_TRANS_DQ_RES_BLKS); + + /* + * Reserve nblks against these dquots, with trans as the mediator. + */ + error = xfs_trans_reserve_quota_bydquots(tp, mp, + ip->i_udquot, ip->i_gdquot, + nblks, ninos, + flags); + return error; +} + +/* + * This routine is called to allocate a quotaoff log item. + */ +xfs_qoff_logitem_t * +xfs_trans_get_qoff_item( + xfs_trans_t *tp, + xfs_qoff_logitem_t *startqoff, + uint flags) +{ + xfs_qoff_logitem_t *q; + + ASSERT(tp != NULL); + + q = xfs_qm_qoff_logitem_init(tp->t_mountp, startqoff, flags); + ASSERT(q != NULL); + + /* + * Get a log_item_desc to point at the new item. + */ + (void) xfs_trans_add_item(tp, (xfs_log_item_t*)q); + + return (q); +} + + +/* + * This is called to mark the quotaoff logitem as needing + * to be logged when the transaction is committed. The logitem must + * already be associated with the given transaction. + */ +void +xfs_trans_log_quotaoff_item( + xfs_trans_t *tp, + xfs_qoff_logitem_t *qlp) +{ + xfs_log_item_desc_t *lidp; + + lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)qlp); + ASSERT(lidp != NULL); + + tp->t_flags |= XFS_TRANS_DIRTY; + lidp->lid_flags |= XFS_LID_DIRTY; +} + +STATIC void +xfs_trans_alloc_dqinfo( + xfs_trans_t *tp) +{ + (tp)->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP); +} + +STATIC void +xfs_trans_free_dqinfo( + xfs_trans_t *tp) +{ + if (!tp->t_dqinfo) + return; + kmem_zone_free(xfs_Gqm->qm_dqtrxzone, (tp)->t_dqinfo); + (tp)->t_dqinfo = NULL; +} + +xfs_dqtrxops_t xfs_trans_dquot_ops = { + .qo_dup_dqinfo = xfs_trans_dup_dqinfo, + .qo_free_dqinfo = xfs_trans_free_dqinfo, + .qo_mod_dquot_byino = xfs_trans_mod_dquot_byino, + .qo_apply_dquot_deltas = xfs_trans_apply_dquot_deltas, + .qo_reserve_quota_nblks = xfs_trans_reserve_quota_nblks, + .qo_reserve_quota_bydquots = xfs_trans_reserve_quota_bydquots, + .qo_unreserve_and_mod_dquots = xfs_trans_unreserve_and_mod_dquots, +}; |