diff options
Diffstat (limited to 'fs/proc/base.c')
-rw-r--r-- | fs/proc/base.c | 447 |
1 files changed, 436 insertions, 11 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c index a1dddda..8173dfd 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -83,9 +83,11 @@ #include <linux/pid_namespace.h> #include <linux/fs_struct.h> #include <linux/slab.h> +#include <linux/flex_array.h> #ifdef CONFIG_HARDWALL #include <asm/hardwall.h> #endif +#include <trace/events/oom.h> #include "internal.h" /* NOTE: @@ -133,6 +135,8 @@ struct pid_entry { NULL, &proc_single_file_operations, \ { .proc_show = show } ) +static int proc_fd_permission(struct inode *inode, int mask); + /* * Count the number of hardlinks for the pid_entry table, excluding the . * and .. links. @@ -165,9 +169,9 @@ static int get_task_root(struct task_struct *task, struct path *root) return result; } -static int proc_cwd_link(struct inode *inode, struct path *path) +static int proc_cwd_link(struct dentry *dentry, struct path *path) { - struct task_struct *task = get_proc_task(inode); + struct task_struct *task = get_proc_task(dentry->d_inode); int result = -ENOENT; if (task) { @@ -182,9 +186,9 @@ static int proc_cwd_link(struct inode *inode, struct path *path) return result; } -static int proc_root_link(struct inode *inode, struct path *path) +static int proc_root_link(struct dentry *dentry, struct path *path) { - struct task_struct *task = get_proc_task(inode); + struct task_struct *task = get_proc_task(dentry->d_inode); int result = -ENOENT; if (task) { @@ -627,6 +631,50 @@ int proc_setattr(struct dentry *dentry, struct iattr *attr) return 0; } +/* + * May current process learn task's sched/cmdline info (for hide_pid_min=1) + * or euid/egid (for hide_pid_min=2)? + */ +static bool has_pid_permissions(struct pid_namespace *pid, + struct task_struct *task, + int hide_pid_min) +{ + if (pid->hide_pid < hide_pid_min) + return true; + if (in_group_p(pid->pid_gid)) + return true; + return ptrace_may_access(task, PTRACE_MODE_READ); +} + + +static int proc_pid_permission(struct inode *inode, int mask) +{ + struct pid_namespace *pid = inode->i_sb->s_fs_info; + struct task_struct *task; + bool has_perms; + + task = get_proc_task(inode); + has_perms = has_pid_permissions(pid, task, 1); + put_task_struct(task); + + if (!has_perms) { + if (pid->hide_pid == 2) { + /* + * Let's make getdents(), stat(), and open() + * consistent with each other. If a process + * may not stat() a file, it shouldn't be seen + * in procfs at all. + */ + return -ENOENT; + } + + return -EPERM; + } + return generic_permission(inode, mask); +} + + + static const struct inode_operations proc_def_inode_operations = { .setattr = proc_setattr, }; @@ -1010,6 +1058,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, else task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE; + trace_oom_score_adj_update(task); err_sighand: unlock_task_sighand(task, &flags); err_task_lock: @@ -1097,6 +1146,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, task->signal->oom_score_adj = oom_score_adj; if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) task->signal->oom_score_adj_min = oom_score_adj; + trace_oom_score_adj_update(task); /* * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is * always attainable. @@ -1453,13 +1503,13 @@ static const struct file_operations proc_pid_set_comm_operations = { .release = single_release, }; -static int proc_exe_link(struct inode *inode, struct path *exe_path) +static int proc_exe_link(struct dentry *dentry, struct path *exe_path) { struct task_struct *task; struct mm_struct *mm; struct file *exe_file; - task = get_proc_task(inode); + task = get_proc_task(dentry->d_inode); if (!task) return -ENOENT; mm = get_task_mm(task); @@ -1489,7 +1539,7 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) if (!proc_fd_access_allowed(inode)) goto out; - error = PROC_I(inode)->op.proc_get_link(inode, &nd->path); + error = PROC_I(inode)->op.proc_get_link(dentry, &nd->path); out: return ERR_PTR(error); } @@ -1528,7 +1578,7 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b if (!proc_fd_access_allowed(inode)) goto out; - error = PROC_I(inode)->op.proc_get_link(inode, &path); + error = PROC_I(inode)->op.proc_get_link(dentry, &path); if (error) goto out; @@ -1609,6 +1659,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) struct inode *inode = dentry->d_inode; struct task_struct *task; const struct cred *cred; + struct pid_namespace *pid = dentry->d_sb->s_fs_info; generic_fillattr(inode, stat); @@ -1617,6 +1668,14 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) stat->gid = 0; task = pid_task(proc_pid(inode), PIDTYPE_PID); if (task) { + if (!has_pid_permissions(pid, task, 2)) { + rcu_read_unlock(); + /* + * This doesn't prevent learning whether PID exists, + * it only makes getattr() consistent with readdir(). + */ + return -ENOENT; + } if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || task_dumpable(task)) { cred = __task_cred(task); @@ -1820,9 +1879,9 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info) return -ENOENT; } -static int proc_fd_link(struct inode *inode, struct path *path) +static int proc_fd_link(struct dentry *dentry, struct path *path) { - return proc_fd_info(inode, path, NULL); + return proc_fd_info(dentry->d_inode, path, NULL); } static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) @@ -2043,6 +2102,355 @@ static const struct file_operations proc_fd_operations = { .llseek = default_llseek, }; +#ifdef CONFIG_CHECKPOINT_RESTORE + +/* + * dname_to_vma_addr - maps a dentry name into two unsigned longs + * which represent vma start and end addresses. + */ +static int dname_to_vma_addr(struct dentry *dentry, + unsigned long *start, unsigned long *end) +{ + if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2) + return -EINVAL; + + return 0; +} + +static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd) +{ + unsigned long vm_start, vm_end; + bool exact_vma_exists = false; + struct mm_struct *mm = NULL; + struct task_struct *task; + const struct cred *cred; + struct inode *inode; + int status = 0; + + if (nd && nd->flags & LOOKUP_RCU) + return -ECHILD; + + if (!capable(CAP_SYS_ADMIN)) { + status = -EACCES; + goto out_notask; + } + + inode = dentry->d_inode; + task = get_proc_task(inode); + if (!task) + goto out_notask; + + if (!ptrace_may_access(task, PTRACE_MODE_READ)) + goto out; + + mm = get_task_mm(task); + if (!mm) + goto out; + + if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) { + down_read(&mm->mmap_sem); + exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end); + up_read(&mm->mmap_sem); + } + + mmput(mm); + + if (exact_vma_exists) { + if (task_dumpable(task)) { + rcu_read_lock(); + cred = __task_cred(task); + inode->i_uid = cred->euid; + inode->i_gid = cred->egid; + rcu_read_unlock(); + } else { + inode->i_uid = 0; + inode->i_gid = 0; + } + security_task_to_inode(task, inode); + status = 1; + } + +out: + put_task_struct(task); + +out_notask: + if (status <= 0) + d_drop(dentry); + + return status; +} + +static const struct dentry_operations tid_map_files_dentry_operations = { + .d_revalidate = map_files_d_revalidate, + .d_delete = pid_delete_dentry, +}; + +static int proc_map_files_get_link(struct dentry *dentry, struct path *path) +{ + unsigned long vm_start, vm_end; + struct vm_area_struct *vma; + struct task_struct *task; + struct mm_struct *mm; + int rc; + + rc = -ENOENT; + task = get_proc_task(dentry->d_inode); + if (!task) + goto out; + + mm = get_task_mm(task); + put_task_struct(task); + if (!mm) + goto out; + + rc = dname_to_vma_addr(dentry, &vm_start, &vm_end); + if (rc) + goto out_mmput; + + down_read(&mm->mmap_sem); + vma = find_exact_vma(mm, vm_start, vm_end); + if (vma && vma->vm_file) { + *path = vma->vm_file->f_path; + path_get(path); + rc = 0; + } + up_read(&mm->mmap_sem); + +out_mmput: + mmput(mm); +out: + return rc; +} + +struct map_files_info { + struct file *file; + unsigned long len; + unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ +}; + +static struct dentry * +proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, + struct task_struct *task, const void *ptr) +{ + const struct file *file = ptr; + struct proc_inode *ei; + struct inode *inode; + + if (!file) + return ERR_PTR(-ENOENT); + + inode = proc_pid_make_inode(dir->i_sb, task); + if (!inode) + return ERR_PTR(-ENOENT); + + ei = PROC_I(inode); + ei->op.proc_get_link = proc_map_files_get_link; + + inode->i_op = &proc_pid_link_inode_operations; + inode->i_size = 64; + inode->i_mode = S_IFLNK; + + if (file->f_mode & FMODE_READ) + inode->i_mode |= S_IRUSR; + if (file->f_mode & FMODE_WRITE) + inode->i_mode |= S_IWUSR; + + d_set_d_op(dentry, &tid_map_files_dentry_operations); + d_add(dentry, inode); + + return NULL; +} + +static struct dentry *proc_map_files_lookup(struct inode *dir, + struct dentry *dentry, struct nameidata *nd) +{ + unsigned long vm_start, vm_end; + struct vm_area_struct *vma; + struct task_struct *task; + struct dentry *result; + struct mm_struct *mm; + + result = ERR_PTR(-EACCES); + if (!capable(CAP_SYS_ADMIN)) + goto out; + + result = ERR_PTR(-ENOENT); + task = get_proc_task(dir); + if (!task) + goto out; + + result = ERR_PTR(-EACCES); + if (lock_trace(task)) + goto out_put_task; + + result = ERR_PTR(-ENOENT); + if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) + goto out_unlock; + + mm = get_task_mm(task); + if (!mm) + goto out_unlock; + + down_read(&mm->mmap_sem); + vma = find_exact_vma(mm, vm_start, vm_end); + if (!vma) + goto out_no_vma; + + result = proc_map_files_instantiate(dir, dentry, task, vma->vm_file); + +out_no_vma: + up_read(&mm->mmap_sem); + mmput(mm); +out_unlock: + unlock_trace(task); +out_put_task: + put_task_struct(task); +out: + return result; +} + +static const struct inode_operations proc_map_files_inode_operations = { + .lookup = proc_map_files_lookup, + .permission = proc_fd_permission, + .setattr = proc_setattr, +}; + +static int +proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + struct dentry *dentry = filp->f_path.dentry; + struct inode *inode = dentry->d_inode; + struct vm_area_struct *vma; + struct task_struct *task; + struct mm_struct *mm; + ino_t ino; + int ret; + + ret = -EACCES; + if (!capable(CAP_SYS_ADMIN)) + goto out; + + ret = -ENOENT; + task = get_proc_task(inode); + if (!task) + goto out; + + ret = -EACCES; + if (lock_trace(task)) + goto out_put_task; + + ret = 0; + switch (filp->f_pos) { + case 0: + ino = inode->i_ino; + if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0) + goto out_unlock; + filp->f_pos++; + case 1: + ino = parent_ino(dentry); + if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) + goto out_unlock; + filp->f_pos++; + default: + { + unsigned long nr_files, pos, i; + struct flex_array *fa = NULL; + struct map_files_info info; + struct map_files_info *p; + + mm = get_task_mm(task); + if (!mm) + goto out_unlock; + down_read(&mm->mmap_sem); + + nr_files = 0; + + /* + * We need two passes here: + * + * 1) Collect vmas of mapped files with mmap_sem taken + * 2) Release mmap_sem and instantiate entries + * + * otherwise we get lockdep complained, since filldir() + * routine might require mmap_sem taken in might_fault(). + */ + + for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) { + if (vma->vm_file && ++pos > filp->f_pos) + nr_files++; + } + + if (nr_files) { + fa = flex_array_alloc(sizeof(info), nr_files, + GFP_KERNEL); + if (!fa || flex_array_prealloc(fa, 0, nr_files, + GFP_KERNEL)) { + ret = -ENOMEM; + if (fa) + flex_array_free(fa); + up_read(&mm->mmap_sem); + mmput(mm); + goto out_unlock; + } + for (i = 0, vma = mm->mmap, pos = 2; vma; + vma = vma->vm_next) { + if (!vma->vm_file) + continue; + if (++pos <= filp->f_pos) + continue; + + get_file(vma->vm_file); + info.file = vma->vm_file; + info.len = snprintf(info.name, + sizeof(info.name), "%lx-%lx", + vma->vm_start, vma->vm_end); + if (flex_array_put(fa, i++, &info, GFP_KERNEL)) + BUG(); + } + } + up_read(&mm->mmap_sem); + + for (i = 0; i < nr_files; i++) { + p = flex_array_get(fa, i); + ret = proc_fill_cache(filp, dirent, filldir, + p->name, p->len, + proc_map_files_instantiate, + task, p->file); + if (ret) + break; + filp->f_pos++; + fput(p->file); + } + for (; i < nr_files; i++) { + /* + * In case of error don't forget + * to put rest of file refs. + */ + p = flex_array_get(fa, i); + fput(p->file); + } + if (fa) + flex_array_free(fa); + mmput(mm); + } + } + +out_unlock: + unlock_trace(task); +out_put_task: + put_task_struct(task); +out: + return ret; +} + +static const struct file_operations proc_map_files_operations = { + .read = generic_read_dir, + .readdir = proc_map_files_readdir, + .llseek = default_llseek, +}; + +#endif /* CONFIG_CHECKPOINT_RESTORE */ + /* * /proc/pid/fd needs a special permission handler so that a process can still * access /proc/self/fd after it has executed a setuid(). @@ -2658,6 +3066,9 @@ static const struct inode_operations proc_task_inode_operations; static const struct pid_entry tgid_base_stuff[] = { DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), +#ifdef CONFIG_CHECKPOINT_RESTORE + DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations), +#endif DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), #ifdef CONFIG_NET @@ -2761,6 +3172,7 @@ static const struct inode_operations proc_tgid_base_inode_operations = { .lookup = proc_tgid_base_lookup, .getattr = pid_getattr, .setattr = proc_setattr, + .permission = proc_pid_permission, }; static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) @@ -2964,6 +3376,12 @@ static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldi proc_pid_instantiate, iter.task, NULL); } +static int fake_filldir(void *buf, const char *name, int namelen, + loff_t offset, u64 ino, unsigned d_type) +{ + return 0; +} + /* for the /proc/ directory itself, after non-process stuff has been done */ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) { @@ -2971,6 +3389,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) struct task_struct *reaper; struct tgid_iter iter; struct pid_namespace *ns; + filldir_t __filldir; if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET) goto out_no_task; @@ -2992,8 +3411,13 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) for (iter = next_tgid(ns, iter); iter.task; iter.tgid += 1, iter = next_tgid(ns, iter)) { + if (has_pid_permissions(ns, iter.task, 2)) + __filldir = filldir; + else + __filldir = fake_filldir; + filp->f_pos = iter.tgid + TGID_OFFSET; - if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) { + if (proc_pid_fill_cache(filp, dirent, __filldir, iter) < 0) { put_task_struct(iter.task); goto out; } @@ -3328,6 +3752,7 @@ static const struct inode_operations proc_task_inode_operations = { .lookup = proc_task_lookup, .getattr = proc_task_getattr, .setattr = proc_setattr, + .permission = proc_pid_permission, }; static const struct file_operations proc_task_operations = { |