diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/bio.c | 10 | ||||
-rw-r--r-- | fs/ceph/caps.c | 4 | ||||
-rw-r--r-- | fs/ceph/dir.c | 4 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 10 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 7 | ||||
-rw-r--r-- | fs/ceph/xattr.c | 4 | ||||
-rw-r--r-- | fs/cifs/Kconfig | 4 | ||||
-rw-r--r-- | fs/cifs/connect.c | 23 | ||||
-rw-r--r-- | fs/cifs/dir.c | 2 | ||||
-rw-r--r-- | fs/cifs/sess.c | 11 | ||||
-rw-r--r-- | fs/exec.c | 33 | ||||
-rw-r--r-- | fs/fs-writeback.c | 16 | ||||
-rw-r--r-- | fs/ioprio.c | 2 | ||||
-rw-r--r-- | fs/jffs2/erase.c | 2 | ||||
-rw-r--r-- | fs/logfs/dev_mtd.c | 6 | ||||
-rw-r--r-- | fs/nilfs2/ioctl.c | 2 | ||||
-rw-r--r-- | fs/proc/base.c | 126 | ||||
-rw-r--r-- | fs/xfs/kmem.h | 6 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot.c | 103 | ||||
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_qm.c | 291 | ||||
-rw-r--r-- | fs/xfs/xfs_qm.h | 14 | ||||
-rw-r--r-- | fs/xfs/xfs_qm_stats.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h | 5 |
24 files changed, 260 insertions, 431 deletions
@@ -505,13 +505,9 @@ EXPORT_SYMBOL(bio_clone); int bio_get_nr_vecs(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); - int nr_pages; - - nr_pages = ((queue_max_sectors(q) << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT; - if (nr_pages > queue_max_segments(q)) - nr_pages = queue_max_segments(q); - - return nr_pages; + return min_t(unsigned, + queue_max_segments(q), + queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1); } EXPORT_SYMBOL(bio_get_nr_vecs); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index b60fc8bf..620daad 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -641,10 +641,10 @@ static int __cap_is_valid(struct ceph_cap *cap) unsigned long ttl; u32 gen; - spin_lock(&cap->session->s_cap_lock); + spin_lock(&cap->session->s_gen_ttl_lock); gen = cap->session->s_cap_gen; ttl = cap->session->s_cap_ttl; - spin_unlock(&cap->session->s_cap_lock); + spin_unlock(&cap->session->s_gen_ttl_lock); if (cap->cap_gen < gen || time_after_eq(jiffies, ttl)) { dout("__cap_is_valid %p cap %p issued %s " diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 618246b..3e8094b 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -975,10 +975,10 @@ static int dentry_lease_is_valid(struct dentry *dentry) di = ceph_dentry(dentry); if (di->lease_session) { s = di->lease_session; - spin_lock(&s->s_cap_lock); + spin_lock(&s->s_gen_ttl_lock); gen = s->s_cap_gen; ttl = s->s_cap_ttl; - spin_unlock(&s->s_cap_lock); + spin_unlock(&s->s_gen_ttl_lock); if (di->lease_gen == gen && time_before(jiffies, dentry->d_time) && diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 23ab6a3..866e8d7 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -262,6 +262,7 @@ static int parse_reply_info(struct ceph_msg *msg, /* trace */ ceph_decode_32_safe(&p, end, len, bad); if (len > 0) { + ceph_decode_need(&p, end, len, bad); err = parse_reply_info_trace(&p, p+len, info, features); if (err < 0) goto out_bad; @@ -270,6 +271,7 @@ static int parse_reply_info(struct ceph_msg *msg, /* extra */ ceph_decode_32_safe(&p, end, len, bad); if (len > 0) { + ceph_decode_need(&p, end, len, bad); err = parse_reply_info_extra(&p, p+len, info, features); if (err < 0) goto out_bad; @@ -398,9 +400,11 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS; s->s_con.peer_name.num = cpu_to_le64(mds); - spin_lock_init(&s->s_cap_lock); + spin_lock_init(&s->s_gen_ttl_lock); s->s_cap_gen = 0; s->s_cap_ttl = 0; + + spin_lock_init(&s->s_cap_lock); s->s_renew_requested = 0; s->s_renew_seq = 0; INIT_LIST_HEAD(&s->s_caps); @@ -2326,10 +2330,10 @@ static void handle_session(struct ceph_mds_session *session, case CEPH_SESSION_STALE: pr_info("mds%d caps went stale, renewing\n", session->s_mds); - spin_lock(&session->s_cap_lock); + spin_lock(&session->s_gen_ttl_lock); session->s_cap_gen++; session->s_cap_ttl = 0; - spin_unlock(&session->s_cap_lock); + spin_unlock(&session->s_gen_ttl_lock); send_renew_caps(mdsc, session); break; diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index a50ca0e..8c7c04e 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -117,10 +117,13 @@ struct ceph_mds_session { void *s_authorizer_buf, *s_authorizer_reply_buf; size_t s_authorizer_buf_len, s_authorizer_reply_buf_len; - /* protected by s_cap_lock */ - spinlock_t s_cap_lock; + /* protected by s_gen_ttl_lock */ + spinlock_t s_gen_ttl_lock; u32 s_cap_gen; /* inc each time we get mds stale msg */ unsigned long s_cap_ttl; /* when session caps expire */ + + /* protected by s_cap_lock */ + spinlock_t s_cap_lock; struct list_head s_caps; /* all caps issued by this session */ int s_nr_caps, s_trim_caps; int s_num_cap_releases; diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 857214a..a76f697 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -111,8 +111,10 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, } static struct ceph_vxattr_cb ceph_file_vxattrs[] = { + { true, "ceph.file.layout", ceph_vxattrcb_layout}, + /* The following extended attribute name is deprecated */ { true, "ceph.layout", ceph_vxattrcb_layout}, - { NULL, NULL } + { true, NULL, NULL } }; static struct ceph_vxattr_cb *ceph_inode_vxattrs(struct inode *inode) diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 0554b00..2b243af 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -139,7 +139,7 @@ config CIFS_DFS_UPCALL points. If unsure, say N. config CIFS_FSCACHE - bool "Provide CIFS client caching support (EXPERIMENTAL)" + bool "Provide CIFS client caching support" depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y help Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data @@ -147,7 +147,7 @@ config CIFS_FSCACHE manager. If unsure, say N. config CIFS_ACL - bool "Provide CIFS ACL support (EXPERIMENTAL)" + bool "Provide CIFS ACL support" depends on CIFS_XATTR && KEYS help Allows to fetch CIFS/NTFS ACL from the server. The DACL blob diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 986709a..602f77c 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -773,10 +773,11 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid) cifs_dump_mem("Bad SMB: ", buf, min_t(unsigned int, server->total_read, 48)); - if (mid) - handle_mid(mid, server, smb_buffer, length); + if (!mid) + return length; - return length; + handle_mid(mid, server, smb_buffer, length); + return 0; } static int @@ -2125,7 +2126,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) down_read(&key->sem); upayload = key->payload.data; if (IS_ERR_OR_NULL(upayload)) { - rc = PTR_ERR(key); + rc = upayload ? PTR_ERR(upayload) : -EINVAL; goto out_key_put; } @@ -2142,14 +2143,14 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) len = delim - payload; if (len > MAX_USERNAME_SIZE || len <= 0) { - cFYI(1, "Bad value from username search (len=%ld)", len); + cFYI(1, "Bad value from username search (len=%zd)", len); rc = -EINVAL; goto out_key_put; } vol->username = kstrndup(payload, len, GFP_KERNEL); if (!vol->username) { - cFYI(1, "Unable to allocate %ld bytes for username", len); + cFYI(1, "Unable to allocate %zd bytes for username", len); rc = -ENOMEM; goto out_key_put; } @@ -2157,7 +2158,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) len = key->datalen - (len + 1); if (len > MAX_PASSWORD_SIZE || len <= 0) { - cFYI(1, "Bad len for password search (len=%ld)", len); + cFYI(1, "Bad len for password search (len=%zd)", len); rc = -EINVAL; kfree(vol->username); vol->username = NULL; @@ -2167,7 +2168,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) ++delim; vol->password = kstrndup(delim, len, GFP_KERNEL); if (!vol->password) { - cFYI(1, "Unable to allocate %ld bytes for password", len); + cFYI(1, "Unable to allocate %zd bytes for password", len); rc = -ENOMEM; kfree(vol->username); vol->username = NULL; @@ -3857,10 +3858,8 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid) struct smb_vol *vol_info; vol_info = kzalloc(sizeof(*vol_info), GFP_KERNEL); - if (vol_info == NULL) { - tcon = ERR_PTR(-ENOMEM); - goto out; - } + if (vol_info == NULL) + return ERR_PTR(-ENOMEM); vol_info->local_nls = cifs_sb->local_nls; vol_info->linux_uid = fsuid; diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index df8fecb..63a196b 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -492,7 +492,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, { int xid; int rc = 0; /* to get around spurious gcc warning, set to zero here */ - __u32 oplock = 0; + __u32 oplock = enable_oplocks ? REQ_OPLOCK : 0; __u16 fileHandle = 0; bool posix_open = false; struct cifs_sb_info *cifs_sb; diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index d85efad..551d0c2 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -246,16 +246,15 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses, /* copy user */ /* BB what about null user mounts - check that we do this BB */ /* copy user */ - if (ses->user_name != NULL) + if (ses->user_name != NULL) { strncpy(bcc_ptr, ses->user_name, MAX_USERNAME_SIZE); + bcc_ptr += strnlen(ses->user_name, MAX_USERNAME_SIZE); + } /* else null user mount */ - - bcc_ptr += strnlen(ses->user_name, MAX_USERNAME_SIZE); *bcc_ptr = 0; bcc_ptr++; /* account for null termination */ /* copy domain */ - if (ses->domainName != NULL) { strncpy(bcc_ptr, ses->domainName, 256); bcc_ptr += strnlen(ses->domainName, 256); @@ -395,6 +394,10 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, ses->ntlmssp->server_flags = le32_to_cpu(pblob->NegotiateFlags); tioffset = le32_to_cpu(pblob->TargetInfoArray.BufferOffset); tilen = le16_to_cpu(pblob->TargetInfoArray.Length); + if (tioffset > blob_len || tioffset + tilen > blob_len) { + cERROR(1, "tioffset + tilen too high %u + %u", tioffset, tilen); + return -EINVAL; + } if (tilen) { ses->auth_key.response = kmalloc(tilen, GFP_KERNEL); if (!ses->auth_key.response) { @@ -1071,6 +1071,21 @@ void set_task_comm(struct task_struct *tsk, char *buf) perf_event_comm(tsk); } +static void filename_to_taskname(char *tcomm, const char *fn, unsigned int len) +{ + int i, ch; + + /* Copies the binary name from after last slash */ + for (i = 0; (ch = *(fn++)) != '\0';) { + if (ch == '/') + i = 0; /* overwrite what we wrote */ + else + if (i < len - 1) + tcomm[i++] = ch; + } + tcomm[i] = '\0'; +} + int flush_old_exec(struct linux_binprm * bprm) { int retval; @@ -1085,6 +1100,7 @@ int flush_old_exec(struct linux_binprm * bprm) set_mm_exe_file(bprm->mm, bprm->file); + filename_to_taskname(bprm->tcomm, bprm->filename, sizeof(bprm->tcomm)); /* * Release all of the old mmap stuff */ @@ -1116,10 +1132,6 @@ EXPORT_SYMBOL(would_dump); void setup_new_exec(struct linux_binprm * bprm) { - int i, ch; - const char *name; - char tcomm[sizeof(current->comm)]; - arch_pick_mmap_layout(current->mm); /* This is the point of no return */ @@ -1130,18 +1142,7 @@ void setup_new_exec(struct linux_binprm * bprm) else set_dumpable(current->mm, suid_dumpable); - name = bprm->filename; - - /* Copies the binary name from after last slash */ - for (i=0; (ch = *(name++)) != '\0';) { - if (ch == '/') - i = 0; /* overwrite what we wrote */ - else - if (i < (sizeof(tcomm) - 1)) - tcomm[i++] = ch; - } - tcomm[i] = '\0'; - set_task_comm(current, tcomm); + set_task_comm(current, bprm->tcomm); /* Set the new mm task size. We have to do that late because it may * depend on TIF_32BIT which is only updated in flush_thread() on diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index f855916..5b4a936 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -53,14 +53,6 @@ struct wb_writeback_work { }; /* - * Include the creation of the trace points after defining the - * wb_writeback_work structure so that the definition remains local to this - * file. - */ -#define CREATE_TRACE_POINTS -#include <trace/events/writeback.h> - -/* * We don't actually have pdflush, but this one is exported though /proc... */ int nr_pdflush_threads; @@ -92,6 +84,14 @@ static inline struct inode *wb_inode(struct list_head *head) return list_entry(head, struct inode, i_wb_list); } +/* + * Include the creation of the trace points after defining the + * wb_writeback_work structure and inline functions so that the definition + * remains local to this file. + */ +#define CREATE_TRACE_POINTS +#include <trace/events/writeback.h> + /* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */ static void bdi_wakeup_flusher(struct backing_dev_info *bdi) { diff --git a/fs/ioprio.c b/fs/ioprio.c index f84b380..0f1b951 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -51,7 +51,7 @@ int set_task_ioprio(struct task_struct *task, int ioprio) ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE); if (ioc) { ioc_ioprio_changed(ioc, ioprio); - put_io_context(ioc, NULL); + put_io_context(ioc); } return err; diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index a01cdad..eafb8d3 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c @@ -335,7 +335,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl void *ebuf; uint32_t ofs; size_t retlen; - int ret = -EIO; + int ret; unsigned long *wordebuf; ret = mtd_point(c->mtd, jeb->offset, c->sector_size, &retlen, diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c index e97404d..9c50144 100644 --- a/fs/logfs/dev_mtd.c +++ b/fs/logfs/dev_mtd.c @@ -152,9 +152,6 @@ static struct page *logfs_mtd_find_first_sb(struct super_block *sb, u64 *ofs) filler_t *filler = logfs_mtd_readpage; struct mtd_info *mtd = super->s_mtd; - if (!mtd_can_have_bb(mtd)) - return NULL; - *ofs = 0; while (mtd_block_isbad(mtd, *ofs)) { *ofs += mtd->erasesize; @@ -172,9 +169,6 @@ static struct page *logfs_mtd_find_last_sb(struct super_block *sb, u64 *ofs) filler_t *filler = logfs_mtd_readpage; struct mtd_info *mtd = super->s_mtd; - if (!mtd_can_have_bb(mtd)) - return NULL; - *ofs = mtd->size - mtd->erasesize; while (mtd_block_isbad(mtd, *ofs)) { *ofs -= mtd->erasesize; diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 8866496..2a70fce 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -603,6 +603,8 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, nsegs = argv[4].v_nmembs; if (argv[4].v_size != argsz[4]) goto out; + if (nsegs > UINT_MAX / sizeof(__u64)) + goto out; /* * argv[4] points to segment numbers this ioctl cleans. We diff --git a/fs/proc/base.c b/fs/proc/base.c index 9cde9edf..d4548dd 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -198,26 +198,6 @@ static int proc_root_link(struct dentry *dentry, struct path *path) return result; } -static struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) -{ - struct mm_struct *mm; - int err; - - err = mutex_lock_killable(&task->signal->cred_guard_mutex); - if (err) - return ERR_PTR(err); - - mm = get_task_mm(task); - if (mm && mm != current->mm && - !ptrace_may_access(task, mode)) { - mmput(mm); - mm = ERR_PTR(-EACCES); - } - mutex_unlock(&task->signal->cred_guard_mutex); - - return mm; -} - struct mm_struct *mm_for_maps(struct task_struct *task) { return mm_access(task, PTRACE_MODE_READ); @@ -711,6 +691,13 @@ static int mem_open(struct inode* inode, struct file* file) if (IS_ERR(mm)) return PTR_ERR(mm); + if (mm) { + /* ensure this mm_struct can't be freed */ + atomic_inc(&mm->mm_count); + /* but do not pin its memory */ + mmput(mm); + } + /* OK to pass negative loff_t, we can catch out-of-range */ file->f_mode |= FMODE_UNSIGNED_OFFSET; file->private_data = mm; @@ -718,57 +705,13 @@ static int mem_open(struct inode* inode, struct file* file) return 0; } -static ssize_t mem_read(struct file * file, char __user * buf, - size_t count, loff_t *ppos) +static ssize_t mem_rw(struct file *file, char __user *buf, + size_t count, loff_t *ppos, int write) { - int ret; - char *page; - unsigned long src = *ppos; struct mm_struct *mm = file->private_data; - - if (!mm) - return 0; - - page = (char *)__get_free_page(GFP_TEMPORARY); - if (!page) - return -ENOMEM; - - ret = 0; - - while (count > 0) { - int this_len, retval; - - this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; - retval = access_remote_vm(mm, src, page, this_len, 0); - if (!retval) { - if (!ret) - ret = -EIO; - break; - } - - if (copy_to_user(buf, page, retval)) { - ret = -EFAULT; - break; - } - - ret += retval; - src += retval; - buf += retval; - count -= retval; - } - *ppos = src; - - free_page((unsigned long) page); - return ret; -} - -static ssize_t mem_write(struct file * file, const char __user *buf, - size_t count, loff_t *ppos) -{ - int copied; + unsigned long addr = *ppos; + ssize_t copied; char *page; - unsigned long dst = *ppos; - struct mm_struct *mm = file->private_data; if (!mm) return 0; @@ -778,31 +721,54 @@ static ssize_t mem_write(struct file * file, const char __user *buf, return -ENOMEM; copied = 0; + if (!atomic_inc_not_zero(&mm->mm_users)) + goto free; + while (count > 0) { - int this_len, retval; + int this_len = min_t(int, count, PAGE_SIZE); - this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; - if (copy_from_user(page, buf, this_len)) { + if (write && copy_from_user(page, buf, this_len)) { copied = -EFAULT; break; } - retval = access_remote_vm(mm, dst, page, this_len, 1); - if (!retval) { + + this_len = access_remote_vm(mm, addr, page, this_len, write); + if (!this_len) { if (!copied) copied = -EIO; break; } - copied += retval; - buf += retval; - dst += retval; - count -= retval; + + if (!write && copy_to_user(buf, page, this_len)) { + copied = -EFAULT; + break; + } + + buf += this_len; + addr += this_len; + copied += this_len; + count -= this_len; } - *ppos = dst; + *ppos = addr; + mmput(mm); +free: free_page((unsigned long) page); return copied; } +static ssize_t mem_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + return mem_rw(file, buf, count, ppos, 0); +} + +static ssize_t mem_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + return mem_rw(file, (char __user*)buf, count, ppos, 1); +} + loff_t mem_lseek(struct file *file, loff_t offset, int orig) { switch (orig) { @@ -822,8 +788,8 @@ loff_t mem_lseek(struct file *file, loff_t offset, int orig) static int mem_release(struct inode *inode, struct file *file) { struct mm_struct *mm = file->private_data; - - mmput(mm); + if (mm) + mmdrop(mm); return 0; } diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 292eff1..ab7c53f 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -110,10 +110,4 @@ kmem_zone_destroy(kmem_zone_t *zone) extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); -static inline int -kmem_shake_allow(gfp_t gfp_mask) -{ - return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)); -} - #endif /* __XFS_SUPPORT_KMEM_H__ */ diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index b4ff40b..cbcb7be 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -63,82 +63,6 @@ int xfs_dqerror_mod = 33; static struct lock_class_key xfs_dquot_other_class; /* - * Allocate and initialize a dquot. We don't always allocate fresh memory; - * we try to reclaim a free dquot if the number of incore dquots are above - * a threshold. - * The only field inside the core that gets initialized at this point - * is the d_id field. The idea is to fill in the entire q_core - * when we read in the on disk dquot. - */ -STATIC xfs_dquot_t * -xfs_qm_dqinit( - xfs_mount_t *mp, - xfs_dqid_t id, - uint type) -{ - xfs_dquot_t *dqp; - boolean_t brandnewdquot; - - brandnewdquot = xfs_qm_dqalloc_incore(&dqp); - dqp->dq_flags = type; - dqp->q_core.d_id = cpu_to_be32(id); - dqp->q_mount = mp; - - /* - * No need to re-initialize these if this is a reclaimed dquot. - */ - if (brandnewdquot) { - INIT_LIST_HEAD(&dqp->q_freelist); - mutex_init(&dqp->q_qlock); - init_waitqueue_head(&dqp->q_pinwait); - - /* - * Because we want to use a counting completion, complete - * the flush completion once to allow a single access to - * the flush completion without blocking. - */ - init_completion(&dqp->q_flush); - complete(&dqp->q_flush); - - trace_xfs_dqinit(dqp); - } else { - /* - * Only the q_core portion was zeroed in dqreclaim_one(). - * So, we need to reset others. - */ - dqp->q_nrefs = 0; - dqp->q_blkno = 0; - INIT_LIST_HEAD(&dqp->q_mplist); - INIT_LIST_HEAD(&dqp->q_hashlist); - dqp->q_bufoffset = 0; - dqp->q_fileoffset = 0; - dqp->q_transp = NULL; - dqp->q_gdquot = NULL; - dqp->q_res_bcount = 0; - dqp->q_res_icount = 0; - dqp->q_res_rtbcount = 0; - atomic_set(&dqp->q_pincount, 0); - dqp->q_hash = NULL; - ASSERT(list_empty(&dqp->q_freelist)); - - trace_xfs_dqreuse(dqp); - } - - /* - * In either case we need to make sure group quotas have a different - * lock class than user quotas, to make sure lockdep knows we can - * locks of one of each at the same time. - */ - if (!(type & XFS_DQ_USER)) - lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class); - - /* - * log item gets initialized later - */ - return (dqp); -} - -/* * This is called to free all the memory associated with a dquot */ void @@ -567,7 +491,32 @@ xfs_qm_dqread( int error; int cancelflags = 0; - dqp = xfs_qm_dqinit(mp, id, type); + + dqp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP); + + dqp->dq_flags = type; + dqp->q_core.d_id = cpu_to_be32(id); + dqp->q_mount = mp; + INIT_LIST_HEAD(&dqp->q_freelist); + mutex_init(&dqp->q_qlock); + init_waitqueue_head(&dqp->q_pinwait); + + /* + * Because we want to use a counting completion, complete + * the flush completion once to allow a single access to + * the flush completion without blocking. + */ + init_completion(&dqp->q_flush); + complete(&dqp->q_flush); + + /* + * Make sure group quotas have a different lock class than user + * quotas. + */ + if (!(type & XFS_DQ_USER)) + lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class); + + atomic_inc(&xfs_Gqm->qm_totaldquots); trace_xfs_dqread(dqp); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 541a508..15ff539 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1489,7 +1489,7 @@ xlog_recover_add_to_cont_trans( old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; old_len = item->ri_buf[item->ri_cnt-1].i_len; - ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u); + ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP); memcpy(&ptr[old_len], dp, len); /* d, s, l */ item->ri_buf[item->ri_cnt-1].i_len += len; item->ri_buf[item->ri_cnt-1].i_addr = ptr; diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 671f37e..c436def 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -50,7 +50,6 @@ */ struct mutex xfs_Gqm_lock; struct xfs_qm *xfs_Gqm; -uint ndquot; kmem_zone_t *qm_dqzone; kmem_zone_t *qm_dqtrxzone; @@ -93,7 +92,6 @@ xfs_Gqm_init(void) goto out_free_udqhash; hsize /= sizeof(xfs_dqhash_t); - ndquot = hsize << 8; xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP); xqm->qm_dqhashmask = hsize - 1; @@ -137,7 +135,6 @@ xfs_Gqm_init(void) xqm->qm_dqtrxzone = qm_dqtrxzone; atomic_set(&xqm->qm_totaldquots, 0); - xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO; xqm->qm_nrefs = 0; return xqm; @@ -1600,216 +1597,150 @@ xfs_qm_init_quotainos( return 0; } +STATIC void +xfs_qm_dqfree_one( + struct xfs_dquot *dqp) +{ + struct xfs_mount *mp = dqp->q_mount; + struct xfs_quotainfo *qi = mp->m_quotainfo; + mutex_lock(&dqp->q_hash->qh_lock); + list_del_init(&dqp->q_hashlist); + dqp->q_hash->qh_version++; + mutex_unlock(&dqp->q_hash->qh_lock); -/* - * Pop the least recently used dquot off the freelist and recycle it. - */ -STATIC struct xfs_dquot * -xfs_qm_dqreclaim_one(void) + mutex_lock(&qi->qi_dqlist_lock); + list_del_init(&dqp->q_mplist); + qi->qi_dquots--; + qi->qi_dqreclaims++; + mutex_unlock(&qi->qi_dqlist_lock); + + xfs_qm_dqdestroy(dqp); +} + +STATIC void +xfs_qm_dqreclaim_one( + struct xfs_dquot *dqp, + struct list_head *dispose_list) { - struct xfs_dquot *dqp; - int restarts = 0; + struct xfs_mount *mp = dqp->q_mount; + int error; - mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); -restart: - list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) { - struct xfs_mount *mp = dqp->q_mount; + if (!xfs_dqlock_nowait(dqp)) + goto out_busy; - if (!xfs_dqlock_nowait(dqp)) - continue; + /* + * This dquot has acquired a reference in the meantime remove it from + * the freelist and try again. + */ + if (dqp->q_nrefs) { + xfs_dqunlock(dqp); - /* - * This dquot has already been grabbed by dqlookup. - * Remove it from the freelist and try again. - */ - if (dqp->q_nrefs) { - trace_xfs_dqreclaim_want(dqp); - XQM_STATS_INC(xqmstats.xs_qm_dqwants); - - list_del_init(&dqp->q_freelist); - xfs_Gqm->qm_dqfrlist_cnt--; - restarts++; - goto dqunlock; - } + trace_xfs_dqreclaim_want(dqp); + XQM_STATS_INC(xqmstats.xs_qm_dqwants); - ASSERT(dqp->q_hash); - ASSERT(!list_empty(&dqp->q_mplist)); + list_del_init(&dqp->q_freelist); + xfs_Gqm->qm_dqfrlist_cnt--; + return; + } - /* - * Try to grab the flush lock. If this dquot is in the process - * of getting flushed to disk, we don't want to reclaim it. - */ - if (!xfs_dqflock_nowait(dqp)) - goto dqunlock; + ASSERT(dqp->q_hash); + ASSERT(!list_empty(&dqp->q_mplist)); - /* - * We have the flush lock so we know that this is not in the - * process of being flushed. So, if this is dirty, flush it - * DELWRI so that we don't get a freelist infested with - * dirty dquots. - */ - if (XFS_DQ_IS_DIRTY(dqp)) { - int error; + /* + * Try to grab the flush lock. If this dquot is in the process of + * getting flushed to disk, we don't want to reclaim it. + */ + if (!xfs_dqflock_nowait(dqp)) + goto out_busy; - trace_xfs_dqreclaim_dirty(dqp); + /* + * We have the flush lock so we know that this is not in the + * process of being flushed. So, if this is dirty, flush it + * DELWRI so that we don't get a freelist infested with + * dirty dquots. + */ + if (XFS_DQ_IS_DIRTY(dqp)) { + trace_xfs_dqreclaim_dirty(dqp); - /* - * We flush it delayed write, so don't bother - * releasing the freelist lock. - */ - error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK); - if (error) { - xfs_warn(mp, "%s: dquot %p flush failed", - __func__, dqp); - } - goto dqunlock; + /* + * We flush it delayed write, so don't bother releasing the + * freelist lock. + */ + error = xfs_qm_dqflush(dqp, 0); + if (error) { + xfs_warn(mp, "%s: dquot %p flush failed", + __func__, dqp); } - xfs_dqfunlock(dqp); /* - * Prevent lookup now that we are going to reclaim the dquot. - * Once XFS_DQ_FREEING is set lookup won't touch the dquot, - * thus we can drop the lock now. + * Give the dquot another try on the freelist, as the + * flushing will take some time. */ - dqp->dq_flags |= XFS_DQ_FREEING; - xfs_dqunlock(dqp); - - mutex_lock(&dqp->q_hash->qh_lock); - list_del_init(&dqp->q_hashlist); - dqp->q_hash->qh_version++; - mutex_unlock(&dqp->q_hash->qh_lock); - - mutex_lock(&mp->m_quotainfo->qi_dqlist_lock); - list_del_init(&dqp->q_mplist); - mp->m_quotainfo->qi_dquots--; - mp->m_quotainfo->qi_dqreclaims++; - mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); + goto out_busy; + } + xfs_dqfunlock(dqp); - ASSERT(dqp->q_nrefs == 0); - list_del_init(&dqp->q_freelist); - xfs_Gqm->qm_dqfrlist_cnt--; + /* + * Prevent lookups now that we are past the point of no return. + */ + dqp->dq_flags |= XFS_DQ_FREEING; + xfs_dqunlock(dqp); - mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); - return dqp; -dqunlock: - xfs_dqunlock(dqp); - if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) - break; - goto restart; - } + ASSERT(dqp->q_nrefs == 0); + list_move_tail(&dqp->q_freelist, dispose_list); + xfs_Gqm->qm_dqfrlist_cnt--; - mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); - return NULL; -} + trace_xfs_dqreclaim_done(dqp); + XQM_STATS_INC(xqmstats.xs_qm_dqreclaims); + return; -/* - * Traverse the freelist of dquots and attempt to reclaim a maximum of - * 'howmany' dquots. This operation races with dqlookup(), and attempts to - * favor the lookup function ... - */ -STATIC int -xfs_qm_shake_freelist( - int howmany) -{ - int nreclaimed = 0; - xfs_dquot_t *dqp; +out_busy: + xfs_dqunlock(dqp); - if (howmany <= 0) - return 0; + /* + * Move the dquot to the tail of the list so that we don't spin on it. + */ + list_move_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist); - while (nreclaimed < howmany) { - dqp = xfs_qm_dqreclaim_one(); - if (!dqp) - return nreclaimed; - xfs_qm_dqdestroy(dqp); - nreclaimed++; - } - return nreclaimed; + trace_xfs_dqreclaim_busy(dqp); + XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses); } -/* - * The kmem_shake interface is invoked when memory is running low. - */ -/* ARGSUSED */ STATIC int xfs_qm_shake( - struct shrinker *shrink, - struct shrink_control *sc) + struct shrinker *shrink, + struct shrink_control *sc) { - int ndqused, nfree, n; - gfp_t gfp_mask = sc->gfp_mask; - - if (!kmem_shake_allow(gfp_mask)) - return 0; - if (!xfs_Gqm) - return 0; - - nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */ - /* incore dquots in all f/s's */ - ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree; - - ASSERT(ndqused >= 0); + int nr_to_scan = sc->nr_to_scan; + LIST_HEAD (dispose_list); + struct xfs_dquot *dqp; - if (nfree <= ndqused && nfree < ndquot) + if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) return 0; + if (!nr_to_scan) + goto out; - ndqused *= xfs_Gqm->qm_dqfree_ratio; /* target # of free dquots */ - n = nfree - ndqused - ndquot; /* # over target */ - - return xfs_qm_shake_freelist(MAX(nfree, n)); -} - - -/*------------------------------------------------------------------*/ - -/* - * Return a new incore dquot. Depending on the number of - * dquots in the system, we either allocate a new one on the kernel heap, - * or reclaim a free one. - * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed - * to reclaim an existing one from the freelist. - */ -boolean_t -xfs_qm_dqalloc_incore( - xfs_dquot_t **O_dqpp) -{ - xfs_dquot_t *dqp; - - /* - * Check against high water mark to see if we want to pop - * a nincompoop dquot off the freelist. - */ - if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) { - /* - * Try to recycle a dquot from the freelist. - */ - if ((dqp = xfs_qm_dqreclaim_one())) { - XQM_STATS_INC(xqmstats.xs_qm_dqreclaims); - /* - * Just zero the core here. The rest will get - * reinitialized by caller. XXX we shouldn't even - * do this zero ... - */ - memset(&dqp->q_core, 0, sizeof(dqp->q_core)); - *O_dqpp = dqp; - return B_FALSE; - } - XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses); + mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); + while (!list_empty(&xfs_Gqm->qm_dqfrlist)) { + if (nr_to_scan-- <= 0) + break; + dqp = list_first_entry(&xfs_Gqm->qm_dqfrlist, struct xfs_dquot, + q_freelist); + xfs_qm_dqreclaim_one(dqp, &dispose_list); } + mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); - /* - * Allocate a brand new dquot on the kernel heap and return it - * to the caller to initialize. - */ - ASSERT(xfs_Gqm->qm_dqzone != NULL); - *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP); - atomic_inc(&xfs_Gqm->qm_totaldquots); - - return B_TRUE; + while (!list_empty(&dispose_list)) { + dqp = list_first_entry(&dispose_list, struct xfs_dquot, + q_freelist); + list_del_init(&dqp->q_freelist); + xfs_qm_dqfree_one(dqp); + } +out: + return (xfs_Gqm->qm_dqfrlist_cnt / 100) * sysctl_vfs_cache_pressure; } - /* * Start a transaction and write the incore superblock changes to * disk. flags parameter indicates which fields have changed. diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 9b4f3ad..9a9b997 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -26,24 +26,12 @@ struct xfs_qm; struct xfs_inode; -extern uint ndquot; extern struct mutex xfs_Gqm_lock; extern struct xfs_qm *xfs_Gqm; extern kmem_zone_t *qm_dqzone; extern kmem_zone_t *qm_dqtrxzone; /* - * Ditto, for xfs_qm_dqreclaim_one. - */ -#define XFS_QM_RECLAIM_MAX_RESTARTS 4 - -/* - * Ideal ratio of free to in use dquots. Quota manager makes an attempt - * to keep this balance. - */ -#define XFS_QM_DQFREE_RATIO 2 - -/* * Dquot hashtable constants/threshold values. */ #define XFS_QM_HASHSIZE_LOW (PAGE_SIZE / sizeof(xfs_dqhash_t)) @@ -74,7 +62,6 @@ typedef struct xfs_qm { int qm_dqfrlist_cnt; atomic_t qm_totaldquots; /* total incore dquots */ uint qm_nrefs; /* file systems with quota on */ - int qm_dqfree_ratio;/* ratio of free to inuse dquots */ kmem_zone_t *qm_dqzone; /* dquot mem-alloc zone */ kmem_zone_t *qm_dqtrxzone; /* t_dqinfo of transactions */ } xfs_qm_t; @@ -143,7 +130,6 @@ extern int xfs_qm_quotacheck(xfs_mount_t *); extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); /* dquot stuff */ -extern boolean_t xfs_qm_dqalloc_incore(xfs_dquot_t **); extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint); extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); diff --git a/fs/xfs/xfs_qm_stats.c b/fs/xfs/xfs_qm_stats.c index 8671a0b..5729ba5 100644 --- a/fs/xfs/xfs_qm_stats.c +++ b/fs/xfs/xfs_qm_stats.c @@ -42,9 +42,9 @@ static int xqm_proc_show(struct seq_file *m, void *v) { /* maximum; incore; ratio free to inuse; freelist */ seq_printf(m, "%d\t%d\t%d\t%u\n", - ndquot, + 0, xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0, - xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0, + 0, xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0); return 0; } diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 6b6df58..bb134a8 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -733,11 +733,10 @@ DEFINE_EVENT(xfs_dquot_class, name, \ DEFINE_DQUOT_EVENT(xfs_dqadjust); DEFINE_DQUOT_EVENT(xfs_dqreclaim_want); DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty); -DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink); +DEFINE_DQUOT_EVENT(xfs_dqreclaim_busy); +DEFINE_DQUOT_EVENT(xfs_dqreclaim_done); DEFINE_DQUOT_EVENT(xfs_dqattach_found); DEFINE_DQUOT_EVENT(xfs_dqattach_get); -DEFINE_DQUOT_EVENT(xfs_dqinit); -DEFINE_DQUOT_EVENT(xfs_dqreuse); DEFINE_DQUOT_EVENT(xfs_dqalloc); DEFINE_DQUOT_EVENT(xfs_dqtobp_read); DEFINE_DQUOT_EVENT(xfs_dqread); |