From f9a00e8738c209d95493cf97d3a82ab2655892e5 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 17 Dec 2012 16:01:25 -0800 Subject: procfs: use kbasename() [yongjun_wei@trendmicro.com.cn: remove duplicated include] Signed-off-by: Andy Shevchenko Signed-off-by: Wei Yongjun Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_devtree.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c index df7dd08..de20ec4 100644 --- a/fs/proc/proc_devtree.c +++ b/fs/proc/proc_devtree.c @@ -195,11 +195,7 @@ void proc_device_tree_add_node(struct device_node *np, set_node_proc_entry(np, de); for (child = NULL; (child = of_get_next_child(np, child));) { /* Use everything after the last slash, or the full name */ - p = strrchr(child->full_name, '/'); - if (!p) - p = child->full_name; - else - ++p; + p = kbasename(child->full_name); if (duplicate_name(de, p)) p = fixup_name(np, de, p); -- cgit v1.1 From 7b9a7ec565505699f503b4fcf61500dceb36e744 Mon Sep 17 00:00:00 2001 From: Andrew Vagin Date: Mon, 17 Dec 2012 16:03:10 -0800 Subject: proc: don't show nonexistent capabilities Without this patch it is really hard to interpret a bounding set, if CAP_LAST_CAP is unknown for a current kernel. Non-existant capabilities can not be deleted from a bounding set with help of prctl. E.g.: Here are two examples without/with this patch. CapBnd: ffffffe0fdecffff CapBnd: 00000000fdecffff I suggest to hide non-existent capabilities. Here is two reasons. * It's logically and easier for using. * It helps to checkpoint-restore capabilities of tasks, because tasks can be restored on another kernel, where CAP_LAST_CAP is bigger. Signed-off-by: Andrew Vagin Cc: Andrew G. Morgan Reviewed-by: Serge E. Hallyn Cc: Pavel Emelyanov Reviewed-by: Kees Cook Cc: KAMEZAWA Hiroyuki Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/array.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/proc') diff --git a/fs/proc/array.c b/fs/proc/array.c index d369670..377a373 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -308,6 +308,10 @@ static void render_cap_t(struct seq_file *m, const char *header, seq_putc(m, '\n'); } +/* Remove non-existent capabilities */ +#define NORM_CAPS(v) (v.cap[CAP_TO_INDEX(CAP_LAST_CAP)] &= \ + CAP_TO_MASK(CAP_LAST_CAP + 1) - 1) + static inline void task_cap(struct seq_file *m, struct task_struct *p) { const struct cred *cred; @@ -321,6 +325,11 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p) cap_bset = cred->cap_bset; rcu_read_unlock(); + NORM_CAPS(cap_inheritable); + NORM_CAPS(cap_permitted); + NORM_CAPS(cap_effective); + NORM_CAPS(cap_bset); + render_cap_t(m, "CapInh:\t", &cap_inheritable); render_cap_t(m, "CapPrm:\t", &cap_permitted); render_cap_t(m, "CapEff:\t", &cap_effective); -- cgit v1.1 From 834f82e2aa9a8ede94b17b656329f850c1471514 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 17 Dec 2012 16:03:13 -0800 Subject: procfs: add VmFlags field in smaps output During c/r sessions we've found that there is no way at the moment to fetch some VMA associated flags, such as mlock() and madvise(). This leads us to a problem -- we don't know if we should call for mlock() and/or madvise() after restore on the vma area we're bringing back to life. This patch intorduces a new field into "smaps" output called VmFlags, where all set flags associated with the particular VMA is shown as two letter mnemonics. [ Strictly speaking for c/r we only need mlock/madvise bits but it has been said that providing just a few flags looks somehow inconsistent. So all flags are here now. ] This feature is made available on CONFIG_CHECKPOINT_RESTORE=n kernels, as other applications may start to use these fields. The data is encoded in a somewhat awkward two letters mnemonic form, to encourage userspace to be prepared for fields being added or removed in the future. [a.p.zijlstra@chello.nl: props to use for_each_set_bit] [sfr@canb.auug.org.au: props to use array instead of struct] [akpm@linux-foundation.org: overall redesign and simplification] [akpm@linux-foundation.org: remove unneeded braces per sfr, avoid using bloaty for_each_set_bit()] Signed-off-by: Cyrill Gorcunov Cc: Pavel Emelyanov Cc: Peter Zijlstra Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'fs/proc') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 4877562..448455b 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -526,6 +526,57 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, return 0; } +static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) +{ + /* + * Don't forget to update Documentation/ on changes. + */ + static const char mnemonics[BITS_PER_LONG][2] = { + /* + * In case if we meet a flag we don't know about. + */ + [0 ... (BITS_PER_LONG-1)] = "??", + + [ilog2(VM_READ)] = "rd", + [ilog2(VM_WRITE)] = "wr", + [ilog2(VM_EXEC)] = "ex", + [ilog2(VM_SHARED)] = "sh", + [ilog2(VM_MAYREAD)] = "mr", + [ilog2(VM_MAYWRITE)] = "mw", + [ilog2(VM_MAYEXEC)] = "me", + [ilog2(VM_MAYSHARE)] = "ms", + [ilog2(VM_GROWSDOWN)] = "gd", + [ilog2(VM_PFNMAP)] = "pf", + [ilog2(VM_DENYWRITE)] = "dw", + [ilog2(VM_LOCKED)] = "lo", + [ilog2(VM_IO)] = "io", + [ilog2(VM_SEQ_READ)] = "sr", + [ilog2(VM_RAND_READ)] = "rr", + [ilog2(VM_DONTCOPY)] = "dc", + [ilog2(VM_DONTEXPAND)] = "de", + [ilog2(VM_ACCOUNT)] = "ac", + [ilog2(VM_NORESERVE)] = "nr", + [ilog2(VM_HUGETLB)] = "ht", + [ilog2(VM_NONLINEAR)] = "nl", + [ilog2(VM_ARCH_1)] = "ar", + [ilog2(VM_DONTDUMP)] = "dd", + [ilog2(VM_MIXEDMAP)] = "mm", + [ilog2(VM_HUGEPAGE)] = "hg", + [ilog2(VM_NOHUGEPAGE)] = "nh", + [ilog2(VM_MERGEABLE)] = "mg", + }; + size_t i; + + seq_puts(m, "VmFlags: "); + for (i = 0; i < BITS_PER_LONG; i++) { + if (vma->vm_flags & (1UL << i)) { + seq_printf(m, "%c%c ", + mnemonics[i][0], mnemonics[i][1]); + } + } + seq_putc(m, '\n'); +} + static int show_smap(struct seq_file *m, void *v, int is_pid) { struct proc_maps_private *priv = m->private; @@ -581,6 +632,8 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) seq_printf(m, "Nonlinear: %8lu kB\n", mss.nonlinear >> 10); + show_smap_vma_flags(m, vma); + if (m->count < m->size) /* vma is copied successfully */ m->version = (vma != get_gate_vma(task->mm)) ? vma->vm_start : 0; -- cgit v1.1 From 2f4b3bf6b2318cfaa177ec5a802f4d8d6afbd816 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 17 Dec 2012 16:03:14 -0800 Subject: /proc/pid/status: add "Seccomp" field It is currently impossible to examine the state of seccomp for a given process. While attaching with gdb and attempting "call prctl(PR_GET_SECCOMP,...)" will work with some situations, it is not reliable. If the process is in seccomp mode 1, this query will kill the process (prctl not allowed), if the process is in mode 2 with prctl not allowed, it will similarly be killed, and in weird cases, if prctl is filtered to return errno 0, it can look like seccomp is disabled. When reviewing the state of running processes, there should be a way to externally examine the seccomp mode. ("Did this build of Chrome end up using seccomp?" "Did my distro ship ssh with seccomp enabled?") This adds the "Seccomp" line to /proc/$pid/status. Signed-off-by: Kees Cook Reviewed-by: Cyrill Gorcunov Cc: Andrea Arcangeli Cc: James Morris Acked-by: Serge E. Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/array.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs/proc') diff --git a/fs/proc/array.c b/fs/proc/array.c index 377a373..077235f 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -336,6 +336,13 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p) render_cap_t(m, "CapBnd:\t", &cap_bset); } +static inline void task_seccomp(struct seq_file *m, struct task_struct *p) +{ +#ifdef CONFIG_SECCOMP + seq_printf(m, "Seccomp:\t%d\n", p->seccomp.mode); +#endif +} + static inline void task_context_switch_counts(struct seq_file *m, struct task_struct *p) { @@ -369,6 +376,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, } task_sig(m, task); task_cap(m, task); + task_seccomp(m, task); task_cpus_allowed(m, task); cpuset_task_status_allowed(m, task); task_context_switch_counts(m, task); -- cgit v1.1 From 8d238027b87e654be552eabdf492042a34c5c300 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 17 Dec 2012 16:03:17 -0800 Subject: proc: pid/status: show all supplementary groups We display a list of supplementary group for each process in /proc//status. However, we show only the first 32 groups, not all of them. Although this is rare, but sometimes processes do have more than 32 supplementary groups, and this kernel limitation breaks user-space apps that rely on the group list in /proc//status. Number 32 comes from the internal NGROUPS_SMALL macro which defines the length for the internal kernel "small" groups buffer. There is no apparent reason to limit to this value. This patch removes the 32 groups printing limit. The Linux kernel limits the amount of supplementary groups by NGROUPS_MAX, which is currently set to 65536. And this is the maximum count of groups we may possibly print. Signed-off-by: Artem Bityutskiy Acked-by: Serge E. Hallyn Acked-by: Kees Cook Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/array.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/proc') diff --git a/fs/proc/array.c b/fs/proc/array.c index 077235f..439544f 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -212,7 +212,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, group_info = cred->group_info; task_unlock(p); - for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++) + for (g = 0; g < group_info->ngroups; g++) seq_printf(m, "%d ", from_kgid_munged(user_ns, GROUP_AT(group_info, g))); put_cred(cred); -- cgit v1.1 From 55985dd72ab27b47530dcc8bdddd28b69f4abe8b Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 17 Dec 2012 16:04:55 -0800 Subject: procfs: add ability to plug in auxiliary fdinfo providers This patch brings ability to print out auxiliary data associated with file in procfs interface /proc/pid/fdinfo/fd. In particular further patches make eventfd, evenpoll, signalfd and fsnotify to print additional information complete enough to restore these objects after checkpoint. To simplify the code we add show_fdinfo callback inside struct file_operations (as Al and Pavel are proposing). Signed-off-by: Cyrill Gorcunov Acked-by: Pavel Emelyanov Cc: Oleg Nesterov Cc: Andrey Vagin Cc: Al Viro Cc: Alexey Dobriyan Cc: James Bottomley Cc: "Aneesh Kumar K.V" Cc: Alexey Dobriyan Cc: Matthew Helsley Cc: "J. Bruce Fields" Cc: "Aneesh Kumar K.V" Cc: Tvrtko Ursulin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/fd.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/proc') diff --git a/fs/proc/fd.c b/fs/proc/fd.c index f28a875..d7a4a28 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -50,6 +50,8 @@ static int seq_show(struct seq_file *m, void *v) if (!ret) { seq_printf(m, "pos:\t%lli\nflags:\t0%o\n", (long long)file->f_pos, f_flags); + if (file->f_op->show_fdinfo) + ret = file->f_op->show_fdinfo(m, file); fput(file); } -- cgit v1.1 From 138d22b58696c506799f8de759804083ff9effae Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 17 Dec 2012 16:05:02 -0800 Subject: fs, epoll: add procfs fdinfo helper This allows us to print out eventpoll target file descriptor, events and data, the /proc/pid/fdinfo/fd consists of | pos: 0 | flags: 02 | tfd: 5 events: 1d data: ffffffffffffffff enabled: 1 [avagin@: fix for unitialized ret variable] Signed-off-by: Cyrill Gorcunov Acked-by: Pavel Emelyanov Cc: Oleg Nesterov Cc: Andrey Vagin Cc: Al Viro Cc: Alexey Dobriyan Cc: James Bottomley Cc: "Aneesh Kumar K.V" Cc: Alexey Dobriyan Cc: Matthew Helsley Cc: "J. Bruce Fields" Cc: "Aneesh Kumar K.V" Cc: Tvrtko Ursulin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/array.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/proc') diff --git a/fs/proc/array.c b/fs/proc/array.c index 439544f..060a56a 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -220,7 +220,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, seq_putc(m, '\n'); } -static void render_sigset_t(struct seq_file *m, const char *header, +void render_sigset_t(struct seq_file *m, const char *header, sigset_t *set) { int i; -- cgit v1.1