diff options
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/array.c | 47 | ||||
-rw-r--r-- | fs/proc/base.c | 3 | ||||
-rw-r--r-- | fs/proc/generic.c | 163 | ||||
-rw-r--r-- | fs/proc/internal.h | 11 | ||||
-rw-r--r-- | fs/proc/proc_net.c | 1 | ||||
-rw-r--r-- | fs/proc/root.c | 1 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 107 |
7 files changed, 208 insertions, 125 deletions
diff --git a/fs/proc/array.c b/fs/proc/array.c index cd3653e..bd117d0 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -157,20 +157,29 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, struct user_namespace *user_ns = seq_user_ns(m); struct group_info *group_info; int g; - struct fdtable *fdt = NULL; + struct task_struct *tracer; const struct cred *cred; - pid_t ppid, tpid; + pid_t ppid, tpid = 0, tgid, ngid; + unsigned int max_fds = 0; rcu_read_lock(); ppid = pid_alive(p) ? task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0; - tpid = 0; - if (pid_alive(p)) { - struct task_struct *tracer = ptrace_parent(p); - if (tracer) - tpid = task_pid_nr_ns(tracer, ns); - } + + tracer = ptrace_parent(p); + if (tracer) + tpid = task_pid_nr_ns(tracer, ns); + + tgid = task_tgid_nr_ns(p, ns); + ngid = task_numa_group_id(p); cred = get_task_cred(p); + + task_lock(p); + if (p->files) + max_fds = files_fdtable(p->files)->max_fds; + task_unlock(p); + rcu_read_unlock(); + seq_printf(m, "State:\t%s\n" "Tgid:\t%d\n" @@ -179,12 +188,10 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, "PPid:\t%d\n" "TracerPid:\t%d\n" "Uid:\t%d\t%d\t%d\t%d\n" - "Gid:\t%d\t%d\t%d\t%d\n", + "Gid:\t%d\t%d\t%d\t%d\n" + "FDSize:\t%d\nGroups:\t", get_task_state(p), - task_tgid_nr_ns(p, ns), - task_numa_group_id(p), - pid_nr_ns(pid, ns), - ppid, tpid, + tgid, ngid, pid_nr_ns(pid, ns), ppid, tpid, from_kuid_munged(user_ns, cred->uid), from_kuid_munged(user_ns, cred->euid), from_kuid_munged(user_ns, cred->suid), @@ -192,20 +199,10 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, from_kgid_munged(user_ns, cred->gid), from_kgid_munged(user_ns, cred->egid), from_kgid_munged(user_ns, cred->sgid), - from_kgid_munged(user_ns, cred->fsgid)); - - task_lock(p); - if (p->files) - fdt = files_fdtable(p->files); - seq_printf(m, - "FDSize:\t%d\n" - "Groups:\t", - fdt ? fdt->max_fds : 0); - rcu_read_unlock(); + from_kgid_munged(user_ns, cred->fsgid), + max_fds); group_info = cred->group_info; - task_unlock(p); - for (g = 0; g < group_info->ngroups; g++) seq_printf(m, "%d ", from_kgid_munged(user_ns, GROUP_AT(group_info, g))); diff --git a/fs/proc/base.c b/fs/proc/base.c index 64891f3..590aeda 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2618,6 +2618,9 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) dput(dentry); } + if (pid == tgid) + return; + name.name = buf; name.len = snprintf(buf, sizeof(buf), "%d", tgid); leader = d_hash_and_lookup(mnt->mnt_root, &name); diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 317b726..7fea132 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -31,9 +31,73 @@ static DEFINE_SPINLOCK(proc_subdir_lock); static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de) { - if (de->namelen != len) - return 0; - return !memcmp(name, de->name, len); + if (len < de->namelen) + return -1; + if (len > de->namelen) + return 1; + + return memcmp(name, de->name, len); +} + +static struct proc_dir_entry *pde_subdir_first(struct proc_dir_entry *dir) +{ + return rb_entry_safe(rb_first(&dir->subdir), struct proc_dir_entry, + subdir_node); +} + +static struct proc_dir_entry *pde_subdir_next(struct proc_dir_entry *dir) +{ + return rb_entry_safe(rb_next(&dir->subdir_node), struct proc_dir_entry, + subdir_node); +} + +static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir, + const char *name, + unsigned int len) +{ + struct rb_node *node = dir->subdir.rb_node; + + while (node) { + struct proc_dir_entry *de = container_of(node, + struct proc_dir_entry, + subdir_node); + int result = proc_match(len, name, de); + + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else + return de; + } + return NULL; +} + +static bool pde_subdir_insert(struct proc_dir_entry *dir, + struct proc_dir_entry *de) +{ + struct rb_root *root = &dir->subdir; + struct rb_node **new = &root->rb_node, *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + struct proc_dir_entry *this = + container_of(*new, struct proc_dir_entry, subdir_node); + int result = proc_match(de->namelen, de->name, this); + + parent = *new; + if (result < 0) + new = &(*new)->rb_left; + else if (result > 0) + new = &(*new)->rb_right; + else + return false; + } + + /* Add new node and rebalance tree. */ + rb_link_node(&de->subdir_node, parent, new); + rb_insert_color(&de->subdir_node, root); + return true; } static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) @@ -92,10 +156,7 @@ static int __xlate_proc_name(const char *name, struct proc_dir_entry **ret, break; len = next - cp; - for (de = de->subdir; de ; de = de->next) { - if (proc_match(len, cp, de)) - break; - } + de = pde_subdir_find(de, cp, len); if (!de) { WARN(1, "name '%s'\n", name); return -ENOENT; @@ -183,19 +244,16 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, struct inode *inode; spin_lock(&proc_subdir_lock); - for (de = de->subdir; de ; de = de->next) { - if (de->namelen != dentry->d_name.len) - continue; - if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { - pde_get(de); - spin_unlock(&proc_subdir_lock); - inode = proc_get_inode(dir->i_sb, de); - if (!inode) - return ERR_PTR(-ENOMEM); - d_set_d_op(dentry, &simple_dentry_operations); - d_add(dentry, inode); - return NULL; - } + de = pde_subdir_find(de, dentry->d_name.name, dentry->d_name.len); + if (de) { + pde_get(de); + spin_unlock(&proc_subdir_lock); + inode = proc_get_inode(dir->i_sb, de); + if (!inode) + return ERR_PTR(-ENOMEM); + d_set_d_op(dentry, &simple_dentry_operations); + d_add(dentry, inode); + return NULL; } spin_unlock(&proc_subdir_lock); return ERR_PTR(-ENOENT); @@ -225,7 +283,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file, return 0; spin_lock(&proc_subdir_lock); - de = de->subdir; + de = pde_subdir_first(de); i = ctx->pos - 2; for (;;) { if (!de) { @@ -234,7 +292,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file, } if (!i) break; - de = de->next; + de = pde_subdir_next(de); i--; } @@ -249,7 +307,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file, } spin_lock(&proc_subdir_lock); ctx->pos++; - next = de->next; + next = pde_subdir_next(de); pde_put(de); de = next; } while (de); @@ -286,9 +344,8 @@ static const struct inode_operations proc_dir_inode_operations = { static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) { - struct proc_dir_entry *tmp; int ret; - + ret = proc_alloc_inum(&dp->low_ino); if (ret) return ret; @@ -304,21 +361,21 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp dp->proc_iops = &proc_file_inode_operations; } else { WARN_ON(1); + proc_free_inum(dp->low_ino); return -EINVAL; } spin_lock(&proc_subdir_lock); - - for (tmp = dir->subdir; tmp; tmp = tmp->next) - if (strcmp(tmp->name, dp->name) == 0) { - WARN(1, "proc_dir_entry '%s/%s' already registered\n", - dir->name, dp->name); - break; - } - - dp->next = dir->subdir; dp->parent = dir; - dir->subdir = dp; + if (pde_subdir_insert(dir, dp) == false) { + WARN(1, "proc_dir_entry '%s/%s' already registered\n", + dir->name, dp->name); + spin_unlock(&proc_subdir_lock); + if (S_ISDIR(dp->mode)) + dir->nlink--; + proc_free_inum(dp->low_ino); + return -EEXIST; + } spin_unlock(&proc_subdir_lock); return 0; @@ -354,6 +411,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, ent->namelen = qstr.len; ent->mode = mode; ent->nlink = nlink; + ent->subdir = RB_ROOT; atomic_set(&ent->count, 1); spin_lock_init(&ent->pde_unload_lock); INIT_LIST_HEAD(&ent->pde_openers); @@ -485,7 +543,6 @@ void pde_put(struct proc_dir_entry *pde) */ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) { - struct proc_dir_entry **p; struct proc_dir_entry *de = NULL; const char *fn = name; unsigned int len; @@ -497,14 +554,9 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) } len = strlen(fn); - for (p = &parent->subdir; *p; p=&(*p)->next ) { - if (proc_match(len, fn, *p)) { - de = *p; - *p = de->next; - de->next = NULL; - break; - } - } + de = pde_subdir_find(parent, fn, len); + if (de) + rb_erase(&de->subdir_node, &parent->subdir); spin_unlock(&proc_subdir_lock); if (!de) { WARN(1, "name '%s'\n", name); @@ -516,16 +568,15 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) if (S_ISDIR(de->mode)) parent->nlink--; de->nlink = 0; - WARN(de->subdir, "%s: removing non-empty directory " - "'%s/%s', leaking at least '%s'\n", __func__, - de->parent->name, de->name, de->subdir->name); + WARN(pde_subdir_first(de), + "%s: removing non-empty directory '%s/%s', leaking at least '%s'\n", + __func__, de->parent->name, de->name, pde_subdir_first(de)->name); pde_put(de); } EXPORT_SYMBOL(remove_proc_entry); int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) { - struct proc_dir_entry **p; struct proc_dir_entry *root = NULL, *de, *next; const char *fn = name; unsigned int len; @@ -537,24 +588,18 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) } len = strlen(fn); - for (p = &parent->subdir; *p; p=&(*p)->next ) { - if (proc_match(len, fn, *p)) { - root = *p; - *p = root->next; - root->next = NULL; - break; - } - } + root = pde_subdir_find(parent, fn, len); if (!root) { spin_unlock(&proc_subdir_lock); return -ENOENT; } + rb_erase(&root->subdir_node, &parent->subdir); + de = root; while (1) { - next = de->subdir; + next = pde_subdir_first(de); if (next) { - de->subdir = next->next; - next->next = NULL; + rb_erase(&next->subdir_node, &de->subdir); de = next; continue; } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index d689fd6..6fcdba5 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -24,10 +24,9 @@ struct mempolicy; * tree) of these proc_dir_entries, so that we can dynamically * add new files to /proc. * - * The "next" pointer creates a linked list of one /proc directory, - * while parent/subdir create the directory structure (every - * /proc file has a parent, but "subdir" is NULL for all - * non-directory entries). + * parent/subdir are used for the directory structure (every /proc file has a + * parent, but "subdir" is empty for all non-directory entries). + * subdir_node is used to build the rb tree "subdir" of the parent. */ struct proc_dir_entry { unsigned int low_ino; @@ -38,7 +37,9 @@ struct proc_dir_entry { loff_t size; const struct inode_operations *proc_iops; const struct file_operations *proc_fops; - struct proc_dir_entry *next, *parent, *subdir; + struct proc_dir_entry *parent; + struct rb_root subdir; + struct rb_node subdir_node; void *data; atomic_t count; /* use count */ atomic_t in_use; /* number of callers into module in progress; */ diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index a63af3e..1bde894 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -192,6 +192,7 @@ static __net_init int proc_net_ns_init(struct net *net) if (!netd) goto out; + netd->subdir = RB_ROOT; netd->data = net; netd->nlink = 2; netd->namelen = 3; diff --git a/fs/proc/root.c b/fs/proc/root.c index 094e44d..e74ac9f 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -251,6 +251,7 @@ struct proc_dir_entry proc_root = { .proc_iops = &proc_root_inode_operations, .proc_fops = &proc_root_operations, .parent = &proc_root, + .subdir = RB_ROOT, .name = "/proc", }; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 4e0388c..246eae8 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -447,59 +447,92 @@ struct mem_size_stats { u64 pss; }; +static void smaps_account(struct mem_size_stats *mss, struct page *page, + unsigned long size, bool young, bool dirty) +{ + int mapcount; + + if (PageAnon(page)) + mss->anonymous += size; -static void smaps_pte_entry(pte_t ptent, unsigned long addr, - unsigned long ptent_size, struct mm_walk *walk) + mss->resident += size; + /* Accumulate the size in pages that have been accessed. */ + if (young || PageReferenced(page)) + mss->referenced += size; + mapcount = page_mapcount(page); + if (mapcount >= 2) { + u64 pss_delta; + + if (dirty || PageDirty(page)) + mss->shared_dirty += size; + else + mss->shared_clean += size; + pss_delta = (u64)size << PSS_SHIFT; + do_div(pss_delta, mapcount); + mss->pss += pss_delta; + } else { + if (dirty || PageDirty(page)) + mss->private_dirty += size; + else + mss->private_clean += size; + mss->pss += (u64)size << PSS_SHIFT; + } +} + +static void smaps_pte_entry(pte_t *pte, unsigned long addr, + struct mm_walk *walk) { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = mss->vma; pgoff_t pgoff = linear_page_index(vma, addr); struct page *page = NULL; - int mapcount; - if (pte_present(ptent)) { - page = vm_normal_page(vma, addr, ptent); - } else if (is_swap_pte(ptent)) { - swp_entry_t swpent = pte_to_swp_entry(ptent); + if (pte_present(*pte)) { + page = vm_normal_page(vma, addr, *pte); + } else if (is_swap_pte(*pte)) { + swp_entry_t swpent = pte_to_swp_entry(*pte); if (!non_swap_entry(swpent)) - mss->swap += ptent_size; + mss->swap += PAGE_SIZE; else if (is_migration_entry(swpent)) page = migration_entry_to_page(swpent); - } else if (pte_file(ptent)) { - if (pte_to_pgoff(ptent) != pgoff) - mss->nonlinear += ptent_size; + } else if (pte_file(*pte)) { + if (pte_to_pgoff(*pte) != pgoff) + mss->nonlinear += PAGE_SIZE; } if (!page) return; - if (PageAnon(page)) - mss->anonymous += ptent_size; - if (page->index != pgoff) - mss->nonlinear += ptent_size; + mss->nonlinear += PAGE_SIZE; - mss->resident += ptent_size; - /* Accumulate the size in pages that have been accessed. */ - if (pte_young(ptent) || PageReferenced(page)) - mss->referenced += ptent_size; - mapcount = page_mapcount(page); - if (mapcount >= 2) { - if (pte_dirty(ptent) || PageDirty(page)) - mss->shared_dirty += ptent_size; - else - mss->shared_clean += ptent_size; - mss->pss += (ptent_size << PSS_SHIFT) / mapcount; - } else { - if (pte_dirty(ptent) || PageDirty(page)) - mss->private_dirty += ptent_size; - else - mss->private_clean += ptent_size; - mss->pss += (ptent_size << PSS_SHIFT); - } + smaps_account(mss, page, PAGE_SIZE, pte_young(*pte), pte_dirty(*pte)); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, + struct mm_walk *walk) +{ + struct mem_size_stats *mss = walk->private; + struct vm_area_struct *vma = mss->vma; + struct page *page; + + /* FOLL_DUMP will return -EFAULT on huge zero page */ + page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP); + if (IS_ERR_OR_NULL(page)) + return; + mss->anonymous_thp += HPAGE_PMD_SIZE; + smaps_account(mss, page, HPAGE_PMD_SIZE, + pmd_young(*pmd), pmd_dirty(*pmd)); +} +#else +static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, + struct mm_walk *walk) +{ +} +#endif + static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { @@ -509,9 +542,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, spinlock_t *ptl; if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { - smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk); + smaps_pmd_entry(pmd, addr, walk); spin_unlock(ptl); - mss->anonymous_thp += HPAGE_PMD_SIZE; return 0; } @@ -524,7 +556,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, */ pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) - smaps_pte_entry(*pte, addr, PAGE_SIZE, walk); + smaps_pte_entry(pte, addr, walk); pte_unmap_unlock(pte - 1, ptl); cond_resched(); return 0; @@ -552,6 +584,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) [ilog2(VM_GROWSDOWN)] = "gd", [ilog2(VM_PFNMAP)] = "pf", [ilog2(VM_DENYWRITE)] = "dw", +#ifdef CONFIG_X86_INTEL_MPX + [ilog2(VM_MPX)] = "mp", +#endif [ilog2(VM_LOCKED)] = "lo", [ilog2(VM_IO)] = "io", [ilog2(VM_SEQ_READ)] = "sr", |