diff options
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/Makefile | 14 | ||||
-rw-r--r-- | fs/proc/array.c | 484 | ||||
-rw-r--r-- | fs/proc/base.c | 2056 | ||||
-rw-r--r-- | fs/proc/generic.c | 705 | ||||
-rw-r--r-- | fs/proc/inode-alloc.txt | 14 | ||||
-rw-r--r-- | fs/proc/inode.c | 218 | ||||
-rw-r--r-- | fs/proc/internal.h | 48 | ||||
-rw-r--r-- | fs/proc/kcore.c | 404 | ||||
-rw-r--r-- | fs/proc/kmsg.c | 55 | ||||
-rw-r--r-- | fs/proc/mmu.c | 67 | ||||
-rw-r--r-- | fs/proc/nommu.c | 135 | ||||
-rw-r--r-- | fs/proc/proc_devtree.c | 165 | ||||
-rw-r--r-- | fs/proc/proc_misc.c | 615 | ||||
-rw-r--r-- | fs/proc/proc_tty.c | 242 | ||||
-rw-r--r-- | fs/proc/root.c | 161 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 235 | ||||
-rw-r--r-- | fs/proc/task_nommu.c | 164 |
17 files changed, 5782 insertions, 0 deletions
diff --git a/fs/proc/Makefile b/fs/proc/Makefile new file mode 100644 index 0000000..738b9b6 --- /dev/null +++ b/fs/proc/Makefile @@ -0,0 +1,14 @@ +# +# Makefile for the Linux proc filesystem routines. +# + +obj-$(CONFIG_PROC_FS) += proc.o + +proc-y := nommu.o task_nommu.o +proc-$(CONFIG_MMU) := mmu.o task_mmu.o + +proc-y += inode.o root.o base.o generic.o array.o \ + kmsg.o proc_tty.o proc_misc.o + +proc-$(CONFIG_PROC_KCORE) += kcore.o +proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o diff --git a/fs/proc/array.c b/fs/proc/array.c new file mode 100644 index 0000000..37668fe --- /dev/null +++ b/fs/proc/array.c @@ -0,0 +1,484 @@ +/* + * linux/fs/proc/array.c + * + * Copyright (C) 1992 by Linus Torvalds + * based on ideas by Darren Senn + * + * Fixes: + * Michael. K. Johnson: stat,statm extensions. + * <johnsonm@stolaf.edu> + * + * Pauline Middelink : Made cmdline,envline only break at '\0's, to + * make sure SET_PROCTITLE works. Also removed + * bad '!' which forced address recalculation for + * EVERY character on the current page. + * <middelin@polyware.iaf.nl> + * + * Danny ter Haar : added cpuinfo + * <dth@cistron.nl> + * + * Alessandro Rubini : profile extension. + * <rubini@ipvvis.unipv.it> + * + * Jeff Tranter : added BogoMips field to cpuinfo + * <Jeff_Tranter@Mitel.COM> + * + * Bruno Haible : remove 4K limit for the maps file + * <haible@ma2s2.mathematik.uni-karlsruhe.de> + * + * Yves Arrouye : remove removal of trailing spaces in get_array. + * <Yves.Arrouye@marin.fdn.fr> + * + * Jerome Forissier : added per-CPU time information to /proc/stat + * and /proc/<pid>/cpu extension + * <forissier@isia.cma.fr> + * - Incorporation and non-SMP safe operation + * of forissier patch in 2.1.78 by + * Hans Marcus <crowbar@concepts.nl> + * + * aeb@cwi.nl : /proc/partitions + * + * + * Alan Cox : security fixes. + * <Alan.Cox@linux.org> + * + * Al Viro : safe handling of mm_struct + * + * Gerhard Wichert : added BIGMEM support + * Siemens AG <Gerhard.Wichert@pdb.siemens.de> + * + * Al Viro & Jeff Garzik : moved most of the thing into base.c and + * : proc_misc.c. The rest may eventually go into + * : base.c too. + */ + +#include <linux/config.h> +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/time.h> +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/tty.h> +#include <linux/string.h> +#include <linux/mman.h> +#include <linux/proc_fs.h> +#include <linux/ioport.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/pagemap.h> +#include <linux/swap.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/signal.h> +#include <linux/highmem.h> +#include <linux/file.h> +#include <linux/times.h> +#include <linux/cpuset.h> + +#include <asm/uaccess.h> +#include <asm/pgtable.h> +#include <asm/io.h> +#include <asm/processor.h> +#include "internal.h" + +/* Gcc optimizes away "strlen(x)" for constant x */ +#define ADDBUF(buffer, string) \ +do { memcpy(buffer, string, strlen(string)); \ + buffer += strlen(string); } while (0) + +static inline char * task_name(struct task_struct *p, char * buf) +{ + int i; + char * name; + char tcomm[sizeof(p->comm)]; + + get_task_comm(tcomm, p); + + ADDBUF(buf, "Name:\t"); + name = tcomm; + i = sizeof(tcomm); + do { + unsigned char c = *name; + name++; + i--; + *buf = c; + if (!c) + break; + if (c == '\\') { + buf[1] = c; + buf += 2; + continue; + } + if (c == '\n') { + buf[0] = '\\'; + buf[1] = 'n'; + buf += 2; + continue; + } + buf++; + } while (i); + *buf = '\n'; + return buf+1; +} + +/* + * The task state array is a strange "bitmap" of + * reasons to sleep. Thus "running" is zero, and + * you can test for combinations of others with + * simple bit tests. + */ +static const char *task_state_array[] = { + "R (running)", /* 0 */ + "S (sleeping)", /* 1 */ + "D (disk sleep)", /* 2 */ + "T (stopped)", /* 4 */ + "T (tracing stop)", /* 8 */ + "Z (zombie)", /* 16 */ + "X (dead)" /* 32 */ +}; + +static inline const char * get_task_state(struct task_struct *tsk) +{ + unsigned int state = (tsk->state & (TASK_RUNNING | + TASK_INTERRUPTIBLE | + TASK_UNINTERRUPTIBLE | + TASK_STOPPED | + TASK_TRACED)) | + (tsk->exit_state & (EXIT_ZOMBIE | + EXIT_DEAD)); + const char **p = &task_state_array[0]; + + while (state) { + p++; + state >>= 1; + } + return *p; +} + +static inline char * task_state(struct task_struct *p, char *buffer) +{ + struct group_info *group_info; + int g; + + read_lock(&tasklist_lock); + buffer += sprintf(buffer, + "State:\t%s\n" + "SleepAVG:\t%lu%%\n" + "Tgid:\t%d\n" + "Pid:\t%d\n" + "PPid:\t%d\n" + "TracerPid:\t%d\n" + "Uid:\t%d\t%d\t%d\t%d\n" + "Gid:\t%d\t%d\t%d\t%d\n", + get_task_state(p), + (p->sleep_avg/1024)*100/(1020000000/1024), + p->tgid, + p->pid, pid_alive(p) ? p->group_leader->real_parent->tgid : 0, + pid_alive(p) && p->ptrace ? p->parent->pid : 0, + p->uid, p->euid, p->suid, p->fsuid, + p->gid, p->egid, p->sgid, p->fsgid); + read_unlock(&tasklist_lock); + task_lock(p); + buffer += sprintf(buffer, + "FDSize:\t%d\n" + "Groups:\t", + p->files ? p->files->max_fds : 0); + + group_info = p->group_info; + get_group_info(group_info); + task_unlock(p); + + for (g = 0; g < min(group_info->ngroups,NGROUPS_SMALL); g++) + buffer += sprintf(buffer, "%d ", GROUP_AT(group_info,g)); + put_group_info(group_info); + + buffer += sprintf(buffer, "\n"); + return buffer; +} + +static char * render_sigset_t(const char *header, sigset_t *set, char *buffer) +{ + int i, len; + + len = strlen(header); + memcpy(buffer, header, len); + buffer += len; + + i = _NSIG; + do { + int x = 0; + + i -= 4; + if (sigismember(set, i+1)) x |= 1; + if (sigismember(set, i+2)) x |= 2; + if (sigismember(set, i+3)) x |= 4; + if (sigismember(set, i+4)) x |= 8; + *buffer++ = (x < 10 ? '0' : 'a' - 10) + x; + } while (i >= 4); + + *buffer++ = '\n'; + *buffer = 0; + return buffer; +} + +static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign, + sigset_t *catch) +{ + struct k_sigaction *k; + int i; + + k = p->sighand->action; + for (i = 1; i <= _NSIG; ++i, ++k) { + if (k->sa.sa_handler == SIG_IGN) + sigaddset(ign, i); + else if (k->sa.sa_handler != SIG_DFL) + sigaddset(catch, i); + } +} + +static inline char * task_sig(struct task_struct *p, char *buffer) +{ + sigset_t pending, shpending, blocked, ignored, caught; + int num_threads = 0; + unsigned long qsize = 0; + unsigned long qlim = 0; + + sigemptyset(&pending); + sigemptyset(&shpending); + sigemptyset(&blocked); + sigemptyset(&ignored); + sigemptyset(&caught); + + /* Gather all the data with the appropriate locks held */ + read_lock(&tasklist_lock); + if (p->sighand) { + spin_lock_irq(&p->sighand->siglock); + pending = p->pending.signal; + shpending = p->signal->shared_pending.signal; + blocked = p->blocked; + collect_sigign_sigcatch(p, &ignored, &caught); + num_threads = atomic_read(&p->signal->count); + qsize = atomic_read(&p->user->sigpending); + qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur; + spin_unlock_irq(&p->sighand->siglock); + } + read_unlock(&tasklist_lock); + + buffer += sprintf(buffer, "Threads:\t%d\n", num_threads); + buffer += sprintf(buffer, "SigQ:\t%lu/%lu\n", qsize, qlim); + + /* render them all */ + buffer = render_sigset_t("SigPnd:\t", &pending, buffer); + buffer = render_sigset_t("ShdPnd:\t", &shpending, buffer); + buffer = render_sigset_t("SigBlk:\t", &blocked, buffer); + buffer = render_sigset_t("SigIgn:\t", &ignored, buffer); + buffer = render_sigset_t("SigCgt:\t", &caught, buffer); + + return buffer; +} + +static inline char *task_cap(struct task_struct *p, char *buffer) +{ + return buffer + sprintf(buffer, "CapInh:\t%016x\n" + "CapPrm:\t%016x\n" + "CapEff:\t%016x\n", + cap_t(p->cap_inheritable), + cap_t(p->cap_permitted), + cap_t(p->cap_effective)); +} + +int proc_pid_status(struct task_struct *task, char * buffer) +{ + char * orig = buffer; + struct mm_struct *mm = get_task_mm(task); + + buffer = task_name(task, buffer); + buffer = task_state(task, buffer); + + if (mm) { + buffer = task_mem(mm, buffer); + mmput(mm); + } + buffer = task_sig(task, buffer); + buffer = task_cap(task, buffer); + buffer = cpuset_task_status_allowed(task, buffer); +#if defined(CONFIG_ARCH_S390) + buffer = task_show_regs(task, buffer); +#endif + return buffer - orig; +} + +static int do_task_stat(struct task_struct *task, char * buffer, int whole) +{ + unsigned long vsize, eip, esp, wchan = ~0UL; + long priority, nice; + int tty_pgrp = -1, tty_nr = 0; + sigset_t sigign, sigcatch; + char state; + int res; + pid_t ppid, pgid = -1, sid = -1; + int num_threads = 0; + struct mm_struct *mm; + unsigned long long start_time; + unsigned long cmin_flt = 0, cmaj_flt = 0; + unsigned long min_flt = 0, maj_flt = 0; + cputime_t cutime, cstime, utime, stime; + unsigned long rsslim = 0; + unsigned long it_real_value = 0; + struct task_struct *t; + char tcomm[sizeof(task->comm)]; + + state = *get_task_state(task); + vsize = eip = esp = 0; + mm = get_task_mm(task); + if (mm) { + vsize = task_vsize(mm); + eip = KSTK_EIP(task); + esp = KSTK_ESP(task); + } + + get_task_comm(tcomm, task); + + sigemptyset(&sigign); + sigemptyset(&sigcatch); + cutime = cstime = utime = stime = cputime_zero; + read_lock(&tasklist_lock); + if (task->sighand) { + spin_lock_irq(&task->sighand->siglock); + num_threads = atomic_read(&task->signal->count); + collect_sigign_sigcatch(task, &sigign, &sigcatch); + + /* add up live thread stats at the group level */ + if (whole) { + t = task; + do { + min_flt += t->min_flt; + maj_flt += t->maj_flt; + utime = cputime_add(utime, t->utime); + stime = cputime_add(stime, t->stime); + t = next_thread(t); + } while (t != task); + } + + spin_unlock_irq(&task->sighand->siglock); + } + if (task->signal) { + if (task->signal->tty) { + tty_pgrp = task->signal->tty->pgrp; + tty_nr = new_encode_dev(tty_devnum(task->signal->tty)); + } + pgid = process_group(task); + sid = task->signal->session; + cmin_flt = task->signal->cmin_flt; + cmaj_flt = task->signal->cmaj_flt; + cutime = task->signal->cutime; + cstime = task->signal->cstime; + rsslim = task->signal->rlim[RLIMIT_RSS].rlim_cur; + if (whole) { + min_flt += task->signal->min_flt; + maj_flt += task->signal->maj_flt; + utime = cputime_add(utime, task->signal->utime); + stime = cputime_add(stime, task->signal->stime); + } + it_real_value = task->signal->it_real_value; + } + ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0; + read_unlock(&tasklist_lock); + + if (!whole || num_threads<2) + wchan = get_wchan(task); + if (!whole) { + min_flt = task->min_flt; + maj_flt = task->maj_flt; + utime = task->utime; + stime = task->stime; + } + + /* scale priority and nice values from timeslices to -20..20 */ + /* to make it look like a "normal" Unix priority/nice value */ + priority = task_prio(task); + nice = task_nice(task); + + /* Temporary variable needed for gcc-2.96 */ + /* convert timespec -> nsec*/ + start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC + + task->start_time.tv_nsec; + /* convert nsec -> ticks */ + start_time = nsec_to_clock_t(start_time); + + res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \ +%lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \ +%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n", + task->pid, + tcomm, + state, + ppid, + pgid, + sid, + tty_nr, + tty_pgrp, + task->flags, + min_flt, + cmin_flt, + maj_flt, + cmaj_flt, + cputime_to_clock_t(utime), + cputime_to_clock_t(stime), + cputime_to_clock_t(cutime), + cputime_to_clock_t(cstime), + priority, + nice, + num_threads, + jiffies_to_clock_t(it_real_value), + start_time, + vsize, + mm ? get_mm_counter(mm, rss) : 0, /* you might want to shift this left 3 */ + rsslim, + mm ? mm->start_code : 0, + mm ? mm->end_code : 0, + mm ? mm->start_stack : 0, + esp, + eip, + /* The signal information here is obsolete. + * It must be decimal for Linux 2.0 compatibility. + * Use /proc/#/status for real-time signals. + */ + task->pending.signal.sig[0] & 0x7fffffffUL, + task->blocked.sig[0] & 0x7fffffffUL, + sigign .sig[0] & 0x7fffffffUL, + sigcatch .sig[0] & 0x7fffffffUL, + wchan, + 0UL, + 0UL, + task->exit_signal, + task_cpu(task), + task->rt_priority, + task->policy); + if(mm) + mmput(mm); + return res; +} + +int proc_tid_stat(struct task_struct *task, char * buffer) +{ + return do_task_stat(task, buffer, 0); +} + +int proc_tgid_stat(struct task_struct *task, char * buffer) +{ + return do_task_stat(task, buffer, 1); +} + +int proc_pid_statm(struct task_struct *task, char *buffer) +{ + int size = 0, resident = 0, shared = 0, text = 0, lib = 0, data = 0; + struct mm_struct *mm = get_task_mm(task); + + if (mm) { + size = task_statm(mm, &shared, &text, &data, &resident); + mmput(mm); + } + + return sprintf(buffer,"%d %d %d %d %d %d %d\n", + size, resident, shared, text, lib, data, 0); +} diff --git a/fs/proc/base.c b/fs/proc/base.c new file mode 100644 index 0000000..dad8ea4 --- /dev/null +++ b/fs/proc/base.c @@ -0,0 +1,2056 @@ +/* + * linux/fs/proc/base.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * proc base directory handling functions + * + * 1999, Al Viro. Rewritten. Now it covers the whole per-process part. + * Instead of using magical inumbers to determine the kind of object + * we allocate and fill in-core inodes upon lookup. They don't even + * go into icache. We cache the reference to task_struct upon lookup too. + * Eventually it should become a filesystem in its own. We don't use the + * rest of procfs anymore. + */ + +#include <asm/uaccess.h> + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/time.h> +#include <linux/proc_fs.h> +#include <linux/stat.h> +#include <linux/init.h> +#include <linux/file.h> +#include <linux/string.h> +#include <linux/seq_file.h> +#include <linux/namei.h> +#include <linux/namespace.h> +#include <linux/mm.h> +#include <linux/smp_lock.h> +#include <linux/kallsyms.h> +#include <linux/mount.h> +#include <linux/security.h> +#include <linux/ptrace.h> +#include <linux/seccomp.h> +#include <linux/cpuset.h> +#include <linux/audit.h> +#include "internal.h" + +/* + * For hysterical raisins we keep the same inumbers as in the old procfs. + * Feel free to change the macro below - just keep the range distinct from + * inumbers of the rest of procfs (currently those are in 0x0000--0xffff). + * As soon as we'll get a separate superblock we will be able to forget + * about magical ranges too. + */ + +#define fake_ino(pid,ino) (((pid)<<16)|(ino)) + +enum pid_directory_inos { + PROC_TGID_INO = 2, + PROC_TGID_TASK, + PROC_TGID_STATUS, + PROC_TGID_MEM, +#ifdef CONFIG_SECCOMP + PROC_TGID_SECCOMP, +#endif + PROC_TGID_CWD, + PROC_TGID_ROOT, + PROC_TGID_EXE, + PROC_TGID_FD, + PROC_TGID_ENVIRON, + PROC_TGID_AUXV, + PROC_TGID_CMDLINE, + PROC_TGID_STAT, + PROC_TGID_STATM, + PROC_TGID_MAPS, + PROC_TGID_MOUNTS, + PROC_TGID_WCHAN, +#ifdef CONFIG_SCHEDSTATS + PROC_TGID_SCHEDSTAT, +#endif +#ifdef CONFIG_CPUSETS + PROC_TGID_CPUSET, +#endif +#ifdef CONFIG_SECURITY + PROC_TGID_ATTR, + PROC_TGID_ATTR_CURRENT, + PROC_TGID_ATTR_PREV, + PROC_TGID_ATTR_EXEC, + PROC_TGID_ATTR_FSCREATE, +#endif +#ifdef CONFIG_AUDITSYSCALL + PROC_TGID_LOGINUID, +#endif + PROC_TGID_FD_DIR, + PROC_TGID_OOM_SCORE, + PROC_TGID_OOM_ADJUST, + PROC_TID_INO, + PROC_TID_STATUS, + PROC_TID_MEM, +#ifdef CONFIG_SECCOMP + PROC_TID_SECCOMP, +#endif + PROC_TID_CWD, + PROC_TID_ROOT, + PROC_TID_EXE, + PROC_TID_FD, + PROC_TID_ENVIRON, + PROC_TID_AUXV, + PROC_TID_CMDLINE, + PROC_TID_STAT, + PROC_TID_STATM, + PROC_TID_MAPS, + PROC_TID_MOUNTS, + PROC_TID_WCHAN, +#ifdef CONFIG_SCHEDSTATS + PROC_TID_SCHEDSTAT, +#endif +#ifdef CONFIG_CPUSETS + PROC_TID_CPUSET, +#endif +#ifdef CONFIG_SECURITY + PROC_TID_ATTR, + PROC_TID_ATTR_CURRENT, + PROC_TID_ATTR_PREV, + PROC_TID_ATTR_EXEC, + PROC_TID_ATTR_FSCREATE, +#endif +#ifdef CONFIG_AUDITSYSCALL + PROC_TID_LOGINUID, +#endif + PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ + PROC_TID_OOM_SCORE, + PROC_TID_OOM_ADJUST, +}; + +struct pid_entry { + int type; + int len; + char *name; + mode_t mode; +}; + +#define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)} + +static struct pid_entry tgid_base_stuff[] = { + E(PROC_TGID_TASK, "task", S_IFDIR|S_IRUGO|S_IXUGO), + E(PROC_TGID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR), + E(PROC_TGID_ENVIRON, "environ", S_IFREG|S_IRUSR), + E(PROC_TGID_AUXV, "auxv", S_IFREG|S_IRUSR), + E(PROC_TGID_STATUS, "status", S_IFREG|S_IRUGO), + E(PROC_TGID_CMDLINE, "cmdline", S_IFREG|S_IRUGO), + E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO), + E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO), + E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO), + E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), +#ifdef CONFIG_SECCOMP + E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), +#endif + E(PROC_TGID_CWD, "cwd", S_IFLNK|S_IRWXUGO), + E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO), + E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO), + E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO), +#ifdef CONFIG_SECURITY + E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), +#endif +#ifdef CONFIG_KALLSYMS + E(PROC_TGID_WCHAN, "wchan", S_IFREG|S_IRUGO), +#endif +#ifdef CONFIG_SCHEDSTATS + E(PROC_TGID_SCHEDSTAT, "schedstat", S_IFREG|S_IRUGO), +#endif +#ifdef CONFIG_CPUSETS + E(PROC_TGID_CPUSET, "cpuset", S_IFREG|S_IRUGO), +#endif + E(PROC_TGID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO), + E(PROC_TGID_OOM_ADJUST,"oom_adj", S_IFREG|S_IRUGO|S_IWUSR), +#ifdef CONFIG_AUDITSYSCALL + E(PROC_TGID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO), +#endif + {0,0,NULL,0} +}; +static struct pid_entry tid_base_stuff[] = { + E(PROC_TID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR), + E(PROC_TID_ENVIRON, "environ", S_IFREG|S_IRUSR), + E(PROC_TID_AUXV, "auxv", S_IFREG|S_IRUSR), + E(PROC_TID_STATUS, "status", S_IFREG|S_IRUGO), + E(PROC_TID_CMDLINE, "cmdline", S_IFREG|S_IRUGO), + E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO), + E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO), + E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO), + E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), +#ifdef CONFIG_SECCOMP + E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), +#endif + E(PROC_TID_CWD, "cwd", S_IFLNK|S_IRWXUGO), + E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO), + E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO), + E(PROC_TID_MOUNTS, "mounts", S_IFREG|S_IRUGO), +#ifdef CONFIG_SECURITY + E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), +#endif +#ifdef CONFIG_KALLSYMS + E(PROC_TID_WCHAN, "wchan", S_IFREG|S_IRUGO), +#endif +#ifdef CONFIG_SCHEDSTATS + E(PROC_TID_SCHEDSTAT, "schedstat",S_IFREG|S_IRUGO), +#endif +#ifdef CONFIG_CPUSETS + E(PROC_TID_CPUSET, "cpuset", S_IFREG|S_IRUGO), +#endif + E(PROC_TID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO), + E(PROC_TID_OOM_ADJUST, "oom_adj", S_IFREG|S_IRUGO|S_IWUSR), +#ifdef CONFIG_AUDITSYSCALL + E(PROC_TID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO), +#endif + {0,0,NULL,0} +}; + +#ifdef CONFIG_SECURITY +static struct pid_entry tgid_attr_stuff[] = { + E(PROC_TGID_ATTR_CURRENT, "current", S_IFREG|S_IRUGO|S_IWUGO), + E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), + E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), + E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), + {0,0,NULL,0} +}; +static struct pid_entry tid_attr_stuff[] = { + E(PROC_TID_ATTR_CURRENT, "current", S_IFREG|S_IRUGO|S_IWUGO), + E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), + E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), + E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), + {0,0,NULL,0} +}; +#endif + +#undef E + +static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +{ + struct task_struct *task = proc_task(inode); + struct files_struct *files; + struct file *file; + int fd = proc_type(inode) - PROC_TID_FD_DIR; + + files = get_files_struct(task); + if (files) { + spin_lock(&files->file_lock); + file = fcheck_files(files, fd); + if (file) { + *mnt = mntget(file->f_vfsmnt); + *dentry = dget(file->f_dentry); + spin_unlock(&files->file_lock); + put_files_struct(files); + return 0; + } + spin_unlock(&files->file_lock); + put_files_struct(files); + } + return -ENOENT; +} + +static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +{ + struct fs_struct *fs; + int result = -ENOENT; + task_lock(proc_task(inode)); + fs = proc_task(inode)->fs; + if(fs) + atomic_inc(&fs->count); + task_unlock(proc_task(inode)); + if (fs) { + read_lock(&fs->lock); + *mnt = mntget(fs->pwdmnt); + *dentry = dget(fs->pwd); + read_unlock(&fs->lock); + result = 0; + put_fs_struct(fs); + } + return result; +} + +static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +{ + struct fs_struct *fs; + int result = -ENOENT; + task_lock(proc_task(inode)); + fs = proc_task(inode)->fs; + if(fs) + atomic_inc(&fs->count); + task_unlock(proc_task(inode)); + if (fs) { + read_lock(&fs->lock); + *mnt = mntget(fs->rootmnt); + *dentry = dget(fs->root); + read_unlock(&fs->lock); + result = 0; + put_fs_struct(fs); + } + return result; +} + +#define MAY_PTRACE(task) \ + (task == current || \ + (task->parent == current && \ + (task->ptrace & PT_PTRACED) && \ + (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \ + security_ptrace(current,task) == 0)) + +static int may_ptrace_attach(struct task_struct *task) +{ + int retval = 0; + + task_lock(task); + + if (!task->mm) + goto out; + if (((current->uid != task->euid) || + (current->uid != task->suid) || + (current->uid != task->uid) || + (current->gid != task->egid) || + (current->gid != task->sgid) || + (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) + goto out; + rmb(); + if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) + goto out; + if (security_ptrace(current, task)) + goto out; + + retval = 1; +out: + task_unlock(task); + return retval; +} + +static int proc_pid_environ(struct task_struct *task, char * buffer) +{ + int res = 0; + struct mm_struct *mm = get_task_mm(task); + if (mm) { + unsigned int len = mm->env_end - mm->env_start; + if (len > PAGE_SIZE) + len = PAGE_SIZE; + res = access_process_vm(task, mm->env_start, buffer, len, 0); + if (!may_ptrace_attach(task)) + res = -ESRCH; + mmput(mm); + } + return res; +} + +static int proc_pid_cmdline(struct task_struct *task, char * buffer) +{ + int res = 0; + unsigned int len; + struct mm_struct *mm = get_task_mm(task); + if (!mm) + goto out; + if (!mm->arg_end) + goto out_mm; /* Shh! No looking before we're done */ + + len = mm->arg_end - mm->arg_start; + + if (len > PAGE_SIZE) + len = PAGE_SIZE; + + res = access_process_vm(task, mm->arg_start, buffer, len, 0); + + // If the nul at the end of args has been overwritten, then + // assume application is using setproctitle(3). + if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) { + len = strnlen(buffer, res); + if (len < res) { + res = len; + } else { + len = mm->env_end - mm->env_start; + if (len > PAGE_SIZE - res) + len = PAGE_SIZE - res; + res += access_process_vm(task, mm->env_start, buffer+res, len, 0); + res = strnlen(buffer, res); + } + } +out_mm: + mmput(mm); +out: + return res; +} + +static int proc_pid_auxv(struct task_struct *task, char *buffer) +{ + int res = 0; + struct mm_struct *mm = get_task_mm(task); + if (mm) { + unsigned int nwords = 0; + do + nwords += 2; + while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ + res = nwords * sizeof(mm->saved_auxv[0]); + if (res > PAGE_SIZE) + res = PAGE_SIZE; + memcpy(buffer, mm->saved_auxv, res); + mmput(mm); + } + return res; +} + + +#ifdef CONFIG_KALLSYMS +/* + * Provides a wchan file via kallsyms in a proper one-value-per-file format. + * Returns the resolved symbol. If that fails, simply return the address. + */ +static int proc_pid_wchan(struct task_struct *task, char *buffer) +{ + char *modname; + const char *sym_name; + unsigned long wchan, size, offset; + char namebuf[KSYM_NAME_LEN+1]; + + wchan = get_wchan(task); + + sym_name = kallsyms_lookup(wchan, &size, &offset, &modname, namebuf); + if (sym_name) + return sprintf(buffer, "%s", sym_name); + return sprintf(buffer, "%lu", wchan); +} +#endif /* CONFIG_KALLSYMS */ + +#ifdef CONFIG_SCHEDSTATS +/* + * Provides /proc/PID/schedstat + */ +static int proc_pid_schedstat(struct task_struct *task, char *buffer) +{ + return sprintf(buffer, "%lu %lu %lu\n", + task->sched_info.cpu_time, + task->sched_info.run_delay, + task->sched_info.pcnt); +} +#endif + +/* The badness from the OOM killer */ +unsigned long badness(struct task_struct *p, unsigned long uptime); +static int proc_oom_score(struct task_struct *task, char *buffer) +{ + unsigned long points; + struct timespec uptime; + + do_posix_clock_monotonic_gettime(&uptime); + points = badness(task, uptime.tv_sec); + return sprintf(buffer, "%lu\n", points); +} + +/************************************************************************/ +/* Here the fs part begins */ +/************************************************************************/ + +/* permission checks */ + +static int proc_check_root(struct inode *inode) +{ + struct dentry *de, *base, *root; + struct vfsmount *our_vfsmnt, *vfsmnt, *mnt; + int res = 0; + + if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */ + return -ENOENT; + read_lock(¤t->fs->lock); + our_vfsmnt = mntget(current->fs->rootmnt); + base = dget(current->fs->root); + read_unlock(¤t->fs->lock); + + spin_lock(&vfsmount_lock); + de = root; + mnt = vfsmnt; + + while (vfsmnt != our_vfsmnt) { + if (vfsmnt == vfsmnt->mnt_parent) + goto out; + de = vfsmnt->mnt_mountpoint; + vfsmnt = vfsmnt->mnt_parent; + } + + if (!is_subdir(de, base)) + goto out; + spin_unlock(&vfsmount_lock); + +exit: + dput(base); + mntput(our_vfsmnt); + dput(root); + mntput(mnt); + return res; +out: + spin_unlock(&vfsmount_lock); + res = -EACCES; + goto exit; +} + +static int proc_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + if (generic_permission(inode, mask, NULL) != 0) + return -EACCES; + return proc_check_root(inode); +} + +extern struct seq_operations proc_pid_maps_op; +static int maps_open(struct inode *inode, struct file *file) +{ + struct task_struct *task = proc_task(inode); + int ret = seq_open(file, &proc_pid_maps_op); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = task; + } + return ret; +} + +static struct file_operations proc_maps_operations = { + .open = maps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +extern struct seq_operations mounts_op; +static int mounts_open(struct inode *inode, struct file *file) +{ + struct task_struct *task = proc_task(inode); + int ret = seq_open(file, &mounts_op); + + if (!ret) { + struct seq_file *m = file->private_data; + struct namespace *namespace; + task_lock(task); + namespace = task->namespace; + if (namespace) + get_namespace(namespace); + task_unlock(task); + + if (namespace) + m->private = namespace; + else { + seq_release(inode, file); + ret = -EINVAL; + } + } + return ret; +} + +static int mounts_release(struct inode *inode, struct file *file) +{ + struct seq_file *m = file->private_data; + struct namespace *namespace = m->private; + put_namespace(namespace); + return seq_release(inode, file); +} + +static struct file_operations proc_mounts_operations = { + .open = mounts_open, + .read = seq_read, + .llseek = seq_lseek, + .release = mounts_release, +}; + +#define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */ + +static ssize_t proc_info_read(struct file * file, char __user * buf, + size_t count, loff_t *ppos) +{ + struct inode * inode = file->f_dentry->d_inode; + unsigned long page; + ssize_t length; + struct task_struct *task = proc_task(inode); + + if (count > PROC_BLOCK_SIZE) + count = PROC_BLOCK_SIZE; + if (!(page = __get_free_page(GFP_KERNEL))) + return -ENOMEM; + + length = PROC_I(inode)->op.proc_read(task, (char*)page); + + if (length >= 0) + length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); + free_page(page); + return length; +} + +static struct file_operations proc_info_file_operations = { + .read = proc_info_read, +}; + +static int mem_open(struct inode* inode, struct file* file) +{ + file->private_data = (void*)((long)current->self_exec_id); + return 0; +} + +static ssize_t mem_read(struct file * file, char __user * buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task = proc_task(file->f_dentry->d_inode); + char *page; + unsigned long src = *ppos; + int ret = -ESRCH; + struct mm_struct *mm; + + if (!MAY_PTRACE(task) || !may_ptrace_attach(task)) + goto out; + + ret = -ENOMEM; + page = (char *)__get_free_page(GFP_USER); + if (!page) + goto out; + + ret = 0; + + mm = get_task_mm(task); + if (!mm) + goto out_free; + + ret = -EIO; + + if (file->private_data != (void*)((long)current->self_exec_id)) + goto out_put; + + ret = 0; + + while (count > 0) { + int this_len, retval; + + this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; + retval = access_process_vm(task, src, page, this_len, 0); + if (!retval || !MAY_PTRACE(task) || !may_ptrace_attach(task)) { + if (!ret) + ret = -EIO; + break; + } + + if (copy_to_user(buf, page, retval)) { + ret = -EFAULT; + break; + } + + ret += retval; + src += retval; + buf += retval; + count -= retval; + } + *ppos = src; + +out_put: + mmput(mm); +out_free: + free_page((unsigned long) page); +out: + return ret; +} + +#define mem_write NULL + +#ifndef mem_write +/* This is a security hazard */ +static ssize_t mem_write(struct file * file, const char * buf, + size_t count, loff_t *ppos) +{ + int copied = 0; + char *page; + struct task_struct *task = proc_task(file->f_dentry->d_inode); + unsigned long dst = *ppos; + + if (!MAY_PTRACE(task) || !may_ptrace_attach(task)) + return -ESRCH; + + page = (char *)__get_free_page(GFP_USER); + if (!page) + return -ENOMEM; + + while (count > 0) { + int this_len, retval; + + this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; + if (copy_from_user(page, buf, this_len)) { + copied = -EFAULT; + break; + } + retval = access_process_vm(task, dst, page, this_len, 1); + if (!retval) { + if (!copied) + copied = -EIO; + break; + } + copied += retval; + buf += retval; + dst += retval; + count -= retval; + } + *ppos = dst; + free_page((unsigned long) page); + return copied; +} +#endif + +static loff_t mem_lseek(struct file * file, loff_t offset, int orig) +{ + switch (orig) { + case 0: + file->f_pos = offset; + break; + case 1: + file->f_pos += offset; + break; + default: + return -EINVAL; + } + force_successful_syscall_return(); + return file->f_pos; +} + +static struct file_operations proc_mem_operations = { + .llseek = mem_lseek, + .read = mem_read, + .write = mem_write, + .open = mem_open, +}; + +static ssize_t oom_adjust_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task = proc_task(file->f_dentry->d_inode); + char buffer[8]; + size_t len; + int oom_adjust = task->oomkilladj; + loff_t __ppos = *ppos; + + len = sprintf(buffer, "%i\n", oom_adjust); + if (__ppos >= len) + return 0; + if (count > len-__ppos) + count = len-__ppos; + if (copy_to_user(buf, buffer + __ppos, count)) + return -EFAULT; + *ppos = __ppos + count; + return count; +} + +static ssize_t oom_adjust_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task = proc_task(file->f_dentry->d_inode); + char buffer[8], *end; + int oom_adjust; + + if (!capable(CAP_SYS_RESOURCE)) + return -EPERM; + memset(buffer, 0, 8); + if (count > 6) + count = 6; + if (copy_from_user(buffer, buf, count)) + return -EFAULT; + oom_adjust = simple_strtol(buffer, &end, 0); + if (oom_adjust < -16 || oom_adjust > 15) + return -EINVAL; + if (*end == '\n') + end++; + task->oomkilladj = oom_adjust; + if (end - buffer == 0) + return -EIO; + return end - buffer; +} + +static struct file_operations proc_oom_adjust_operations = { + .read = oom_adjust_read, + .write = oom_adjust_write, +}; + +static struct inode_operations proc_mem_inode_operations = { + .permission = proc_permission, +}; + +#ifdef CONFIG_AUDITSYSCALL +#define TMPBUFLEN 21 +static ssize_t proc_loginuid_read(struct file * file, char __user * buf, + size_t count, loff_t *ppos) +{ + struct inode * inode = file->f_dentry->d_inode; + struct task_struct *task = proc_task(inode); + ssize_t length; + char tmpbuf[TMPBUFLEN]; + + length = scnprintf(tmpbuf, TMPBUFLEN, "%u", + audit_get_loginuid(task->audit_context)); + return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); +} + +static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, + size_t count, loff_t *ppos) +{ + struct inode * inode = file->f_dentry->d_inode; + char *page, *tmp; + ssize_t length; + struct task_struct *task = proc_task(inode); + uid_t loginuid; + + if (!capable(CAP_AUDIT_CONTROL)) + return -EPERM; + + if (current != task) + return -EPERM; + + if (count > PAGE_SIZE) + count = PAGE_SIZE; + + if (*ppos != 0) { + /* No partial writes. */ + return -EINVAL; + } + page = (char*)__get_free_page(GFP_USER); + if (!page) + return -ENOMEM; + length = -EFAULT; + if (copy_from_user(page, buf, count)) + goto out_free_page; + + loginuid = simple_strtoul(page, &tmp, 10); + if (tmp == page) { + length = -EINVAL; + goto out_free_page; + + } + length = audit_set_loginuid(task->audit_context, loginuid); + if (likely(length == 0)) + length = count; + +out_free_page: + free_page((unsigned long) page); + return length; +} + +static struct file_operations proc_loginuid_operations = { + .read = proc_loginuid_read, + .write = proc_loginuid_write, +}; +#endif + +#ifdef CONFIG_SECCOMP +static ssize_t seccomp_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *tsk = proc_task(file->f_dentry->d_inode); + char __buf[20]; + loff_t __ppos = *ppos; + size_t len; + + /* no need to print the trailing zero, so use only len */ + len = sprintf(__buf, "%u\n", tsk->seccomp.mode); + if (__ppos >= len) + return 0; + if (count > len - __ppos) + count = len - __ppos; + if (copy_to_user(buf, __buf + __ppos, count)) + return -EFAULT; + *ppos = __ppos + count; + return count; +} + +static ssize_t seccomp_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *tsk = proc_task(file->f_dentry->d_inode); + char __buf[20], *end; + unsigned int seccomp_mode; + + /* can set it only once to be even more secure */ + if (unlikely(tsk->seccomp.mode)) + return -EPERM; + + memset(__buf, 0, sizeof(__buf)); + count = min(count, sizeof(__buf) - 1); + if (copy_from_user(__buf, buf, count)) + return -EFAULT; + seccomp_mode = simple_strtoul(__buf, &end, 0); + if (*end == '\n') + end++; + if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { + tsk->seccomp.mode = seccomp_mode; + set_tsk_thread_flag(tsk, TIF_SECCOMP); + } else + return -EINVAL; + if (unlikely(!(end - __buf))) + return -EIO; + return end - __buf; +} + +static struct file_operations proc_seccomp_operations = { + .read = seccomp_read, + .write = seccomp_write, +}; +#endif /* CONFIG_SECCOMP */ + +static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct inode *inode = dentry->d_inode; + int error = -EACCES; + + /* We don't need a base pointer in the /proc filesystem */ + path_release(nd); + + if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE)) + goto out; + error = proc_check_root(inode); + if (error) + goto out; + + error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt); + nd->last_type = LAST_BIND; +out: + return error; +} + +static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt, + char __user *buffer, int buflen) +{ + struct inode * inode; + char *tmp = (char*)__get_free_page(GFP_KERNEL), *path; + int len; + + if (!tmp) + return -ENOMEM; + + inode = dentry->d_inode; + path = d_path(dentry, mnt, tmp, PAGE_SIZE); + len = PTR_ERR(path); + if (IS_ERR(path)) + goto out; + len = tmp + PAGE_SIZE - 1 - path; + + if (len > buflen) + len = buflen; + if (copy_to_user(buffer, path, len)) + len = -EFAULT; + out: + free_page((unsigned long)tmp); + return len; +} + +static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen) +{ + int error = -EACCES; + struct inode *inode = dentry->d_inode; + struct dentry *de; + struct vfsmount *mnt = NULL; + + lock_kernel(); + + if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE)) + goto out; + error = proc_check_root(inode); + if (error) + goto out; + + error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt); + if (error) + goto out; + + error = do_proc_readlink(de, mnt, buffer, buflen); + dput(de); + mntput(mnt); +out: + unlock_kernel(); + return error; +} + +static struct inode_operations proc_pid_link_inode_operations = { + .readlink = proc_pid_readlink, + .follow_link = proc_pid_follow_link +}; + +#define NUMBUF 10 + +static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) +{ + struct inode *inode = filp->f_dentry->d_inode; + struct task_struct *p = proc_task(inode); + unsigned int fd, tid, ino; + int retval; + char buf[NUMBUF]; + struct files_struct * files; + + retval = -ENOENT; + if (!pid_alive(p)) + goto out; + retval = 0; + tid = p->pid; + + fd = filp->f_pos; + switch (fd) { + case 0: + if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) + goto out; + filp->f_pos++; + case 1: + ino = fake_ino(tid, PROC_TID_INO); + if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) + goto out; + filp->f_pos++; + default: + files = get_files_struct(p); + if (!files) + goto out; + spin_lock(&files->file_lock); + for (fd = filp->f_pos-2; + fd < files->max_fds; + fd++, filp->f_pos++) { + unsigned int i,j; + + if (!fcheck_files(files, fd)) + continue; + spin_unlock(&files->file_lock); + + j = NUMBUF; + i = fd; + do { + j--; + buf[j] = '0' + (i % 10); + i /= 10; + } while (i); + + ino = fake_ino(tid, PROC_TID_FD_DIR + fd); + if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { + spin_lock(&files->file_lock); + break; + } + spin_lock(&files->file_lock); + } + spin_unlock(&files->file_lock); + put_files_struct(files); + } +out: + return retval; +} + +static int proc_pident_readdir(struct file *filp, + void *dirent, filldir_t filldir, + struct pid_entry *ents, unsigned int nents) +{ + int i; + int pid; + struct dentry *dentry = filp->f_dentry; + struct inode *inode = dentry->d_inode; + struct pid_entry *p; + ino_t ino; + int ret; + + ret = -ENOENT; + if (!pid_alive(proc_task(inode))) + goto out; + + ret = 0; + pid = proc_task(inode)->pid; + i = filp->f_pos; + switch (i) { + case 0: + ino = inode->i_ino; + if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) + goto out; + i++; + filp->f_pos++; + /* fall through */ + case 1: + ino = parent_ino(dentry); + if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) + goto out; + i++; + filp->f_pos++; + /* fall through */ + default: + i -= 2; + if (i >= nents) { + ret = 1; + goto out; + } + p = ents + i; + while (p->name) { + if (filldir(dirent, p->name, p->len, filp->f_pos, + fake_ino(pid, p->type), p->mode >> 12) < 0) + goto out; + filp->f_pos++; + p++; + } + } + + ret = 1; +out: + return ret; +} + +static int proc_tgid_base_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + return proc_pident_readdir(filp,dirent,filldir, + tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff)); +} + +static int proc_tid_base_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + return proc_pident_readdir(filp,dirent,filldir, + tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); +} + +/* building an inode */ + +static int task_dumpable(struct task_struct *task) +{ + int dumpable = 0; + struct mm_struct *mm; + + task_lock(task); + mm = task->mm; + if (mm) + dumpable = mm->dumpable; + task_unlock(task); + return dumpable; +} + + +static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino) +{ + struct inode * inode; + struct proc_inode *ei; + + /* We need a new inode */ + + inode = new_inode(sb); + if (!inode) + goto out; + + /* Common stuff */ + ei = PROC_I(inode); + ei->task = NULL; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_ino = fake_ino(task->pid, ino); + + if (!pid_alive(task)) + goto out_unlock; + + /* + * grab the reference to task. + */ + get_task_struct(task); + ei->task = task; + ei->type = ino; + inode->i_uid = 0; + inode->i_gid = 0; + if (ino == PROC_TGID_INO || ino == PROC_TID_INO || task_dumpable(task)) { + inode->i_uid = task->euid; + inode->i_gid = task->egid; + } + security_task_to_inode(task, inode); + +out: + return inode; + +out_unlock: + ei->pde = NULL; + iput(inode); + return NULL; +} + +/* dentry stuff */ + +/* + * Exceptional case: normally we are not allowed to unhash a busy + * directory. In this case, however, we can do it - no aliasing problems + * due to the way we treat inodes. + * + * Rewrite the inode's ownerships here because the owning task may have + * performed a setuid(), etc. + */ +static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) +{ + struct inode *inode = dentry->d_inode; + struct task_struct *task = proc_task(inode); + if (pid_alive(task)) { + if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) { + inode->i_uid = task->euid; + inode->i_gid = task->egid; + } else { + inode->i_uid = 0; + inode->i_gid = 0; + } + security_task_to_inode(task, inode); + return 1; + } + d_drop(dentry); + return 0; +} + +static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) +{ + struct inode *inode = dentry->d_inode; + struct task_struct *task = proc_task(inode); + int fd = proc_type(inode) - PROC_TID_FD_DIR; + struct files_struct *files; + + files = get_files_struct(task); + if (files) { + spin_lock(&files->file_lock); + if (fcheck_files(files, fd)) { + spin_unlock(&files->file_lock); + put_files_struct(files); + if (task_dumpable(task)) { + inode->i_uid = task->euid; + inode->i_gid = task->egid; + } else { + inode->i_uid = 0; + inode->i_gid = 0; + } + security_task_to_inode(task, inode); + return 1; + } + spin_unlock(&files->file_lock); + put_files_struct(files); + } + d_drop(dentry); + return 0; +} + +static void pid_base_iput(struct dentry *dentry, struct inode *inode) +{ + struct task_struct *task = proc_task(inode); + spin_lock(&task->proc_lock); + if (task->proc_dentry == dentry) + task->proc_dentry = NULL; + spin_unlock(&task->proc_lock); + iput(inode); +} + +static int pid_delete_dentry(struct dentry * dentry) +{ + /* Is the task we represent dead? + * If so, then don't put the dentry on the lru list, + * kill it immediately. + */ + return !pid_alive(proc_task(dentry->d_inode)); +} + +static struct dentry_operations tid_fd_dentry_operations = +{ + .d_revalidate = tid_fd_revalidate, + .d_delete = pid_delete_dentry, +}; + +static struct dentry_operations pid_dentry_operations = +{ + .d_revalidate = pid_revalidate, + .d_delete = pid_delete_dentry, +}; + +static struct dentry_operations pid_base_dentry_operations = +{ + .d_revalidate = pid_revalidate, + .d_iput = pid_base_iput, + .d_delete = pid_delete_dentry, +}; + +/* Lookups */ + +static unsigned name_to_int(struct dentry *dentry) +{ + const char *name = dentry->d_name.name; + int len = dentry->d_name.len; + unsigned n = 0; + + if (len > 1 && *name == '0') + goto out; + while (len-- > 0) { + unsigned c = *name++ - '0'; + if (c > 9) + goto out; + if (n >= (~0U-9)/10) + goto out; + n *= 10; + n += c; + } + return n; +out: + return ~0U; +} + +/* SMP-safe */ +static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) +{ + struct task_struct *task = proc_task(dir); + unsigned fd = name_to_int(dentry); + struct file * file; + struct files_struct * files; + struct inode *inode; + struct proc_inode *ei; + + if (fd == ~0U) + goto out; + if (!pid_alive(task)) + goto out; + + inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd); + if (!inode) + goto out; + ei = PROC_I(inode); + files = get_files_struct(task); + if (!files) + goto out_unlock; + inode->i_mode = S_IFLNK; + spin_lock(&files->file_lock); + file = fcheck_files(files, fd); + if (!file) + goto out_unlock2; + if (file->f_mode & 1) + inode->i_mode |= S_IRUSR | S_IXUSR; + if (file->f_mode & 2) + inode->i_mode |= S_IWUSR | S_IXUSR; + spin_unlock(&files->file_lock); + put_files_struct(files); + inode->i_op = &proc_pid_link_inode_operations; + inode->i_size = 64; + ei->op.proc_get_link = proc_fd_link; + dentry->d_op = &tid_fd_dentry_operations; + d_add(dentry, inode); + return NULL; + +out_unlock2: + spin_unlock(&files->file_lock); + put_files_struct(files); +out_unlock: + iput(inode); +out: + return ERR_PTR(-ENOENT); +} + +static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir); +static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd); + +static struct file_operations proc_fd_operations = { + .read = generic_read_dir, + .readdir = proc_readfd, +}; + +static struct file_operations proc_task_operations = { + .read = generic_read_dir, + .readdir = proc_task_readdir, +}; + +/* + * proc directories can do almost nothing.. + */ +static struct inode_operations proc_fd_inode_operations = { + .lookup = proc_lookupfd, + .permission = proc_permission, +}; + +static struct inode_operations proc_task_inode_operations = { + .lookup = proc_task_lookup, + .permission = proc_permission, +}; + +#ifdef CONFIG_SECURITY +static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, + size_t count, loff_t *ppos) +{ + struct inode * inode = file->f_dentry->d_inode; + unsigned long page; + ssize_t length; + struct task_struct *task = proc_task(inode); + + if (count > PAGE_SIZE) + count = PAGE_SIZE; + if (!(page = __get_free_page(GFP_KERNEL))) + return -ENOMEM; + + length = security_getprocattr(task, + (char*)file->f_dentry->d_name.name, + (void*)page, count); + if (length >= 0) + length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); + free_page(page); + return length; +} + +static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, + size_t count, loff_t *ppos) +{ + struct inode * inode = file->f_dentry->d_inode; + char *page; + ssize_t length; + struct task_struct *task = proc_task(inode); + + if (count > PAGE_SIZE) + count = PAGE_SIZE; + if (*ppos != 0) { + /* No partial writes. */ + return -EINVAL; + } + page = (char*)__get_free_page(GFP_USER); + if (!page) + return -ENOMEM; + length = -EFAULT; + if (copy_from_user(page, buf, count)) + goto out; + + length = security_setprocattr(task, + (char*)file->f_dentry->d_name.name, + (void*)page, count); +out: + free_page((unsigned long) page); + return length; +} + +static struct file_operations proc_pid_attr_operations = { + .read = proc_pid_attr_read, + .write = proc_pid_attr_write, +}; + +static struct file_operations proc_tid_attr_operations; +static struct inode_operations proc_tid_attr_inode_operations; +static struct file_operations proc_tgid_attr_operations; +static struct inode_operations proc_tgid_attr_inode_operations; +#endif + +/* SMP-safe */ +static struct dentry *proc_pident_lookup(struct inode *dir, + struct dentry *dentry, + struct pid_entry *ents) +{ + struct inode *inode; + int error; + struct task_struct *task = proc_task(dir); + struct pid_entry *p; + struct proc_inode *ei; + + error = -ENOENT; + inode = NULL; + + if (!pid_alive(task)) + goto out; + + for (p = ents; p->name; p++) { + if (p->len != dentry->d_name.len) + continue; + if (!memcmp(dentry->d_name.name, p->name, p->len)) + break; + } + if (!p->name) + goto out; + + error = -EINVAL; + inode = proc_pid_make_inode(dir->i_sb, task, p->type); + if (!inode) + goto out; + + ei = PROC_I(inode); + inode->i_mode = p->mode; + /* + * Yes, it does not scale. And it should not. Don't add + * new entries into /proc/<tgid>/ without very good reasons. + */ + switch(p->type) { + case PROC_TGID_TASK: + inode->i_nlink = 3; + inode->i_op = &proc_task_inode_operations; + inode->i_fop = &proc_task_operations; + break; + case PROC_TID_FD: + case PROC_TGID_FD: + inode->i_nlink = 2; + inode->i_op = &proc_fd_inode_operations; + inode->i_fop = &proc_fd_operations; + break; + case PROC_TID_EXE: + case PROC_TGID_EXE: + inode->i_op = &proc_pid_link_inode_operations; + ei->op.proc_get_link = proc_exe_link; + break; + case PROC_TID_CWD: + case PROC_TGID_CWD: + inode->i_op = &proc_pid_link_inode_operations; + ei->op.proc_get_link = proc_cwd_link; + break; + case PROC_TID_ROOT: + case PROC_TGID_ROOT: + inode->i_op = &proc_pid_link_inode_operations; + ei->op.proc_get_link = proc_root_link; + break; + case PROC_TID_ENVIRON: + case PROC_TGID_ENVIRON: + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_pid_environ; + break; + case PROC_TID_AUXV: + case PROC_TGID_AUXV: + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_pid_auxv; + break; + case PROC_TID_STATUS: + case PROC_TGID_STATUS: + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_pid_status; + break; + case PROC_TID_STAT: + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_tid_stat; + break; + case PROC_TGID_STAT: + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_tgid_stat; + break; + case PROC_TID_CMDLINE: + case PROC_TGID_CMDLINE: + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_pid_cmdline; + break; + case PROC_TID_STATM: + case PROC_TGID_STATM: + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_pid_statm; + break; + case PROC_TID_MAPS: + case PROC_TGID_MAPS: + inode->i_fop = &proc_maps_operations; + break; + case PROC_TID_MEM: + case PROC_TGID_MEM: + inode->i_op = &proc_mem_inode_operations; + inode->i_fop = &proc_mem_operations; + break; +#ifdef CONFIG_SECCOMP + case PROC_TID_SECCOMP: + case PROC_TGID_SECCOMP: + inode->i_fop = &proc_seccomp_operations; + break; +#endif /* CONFIG_SECCOMP */ + case PROC_TID_MOUNTS: + case PROC_TGID_MOUNTS: + inode->i_fop = &proc_mounts_operations; + break; +#ifdef CONFIG_SECURITY + case PROC_TID_ATTR: + inode->i_nlink = 2; + inode->i_op = &proc_tid_attr_inode_operations; + inode->i_fop = &proc_tid_attr_operations; + break; + case PROC_TGID_ATTR: + inode->i_nlink = 2; + inode->i_op = &proc_tgid_attr_inode_operations; + inode->i_fop = &proc_tgid_attr_operations; + break; + case PROC_TID_ATTR_CURRENT: + case PROC_TGID_ATTR_CURRENT: + case PROC_TID_ATTR_PREV: + case PROC_TGID_ATTR_PREV: + case PROC_TID_ATTR_EXEC: + case PROC_TGID_ATTR_EXEC: + case PROC_TID_ATTR_FSCREATE: + case PROC_TGID_ATTR_FSCREATE: + inode->i_fop = &proc_pid_attr_operations; + break; +#endif +#ifdef CONFIG_KALLSYMS + case PROC_TID_WCHAN: + case PROC_TGID_WCHAN: + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_pid_wchan; + break; +#endif +#ifdef CONFIG_SCHEDSTATS + case PROC_TID_SCHEDSTAT: + case PROC_TGID_SCHEDSTAT: + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_pid_schedstat; + break; +#endif +#ifdef CONFIG_CPUSETS + case PROC_TID_CPUSET: + case PROC_TGID_CPUSET: + inode->i_fop = &proc_cpuset_operations; + break; +#endif + case PROC_TID_OOM_SCORE: + case PROC_TGID_OOM_SCORE: + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_oom_score; + break; + case PROC_TID_OOM_ADJUST: + case PROC_TGID_OOM_ADJUST: + inode->i_fop = &proc_oom_adjust_operations; + break; +#ifdef CONFIG_AUDITSYSCALL + case PROC_TID_LOGINUID: + case PROC_TGID_LOGINUID: + inode->i_fop = &proc_loginuid_operations; + break; +#endif + default: + printk("procfs: impossible type (%d)",p->type); + iput(inode); + return ERR_PTR(-EINVAL); + } + dentry->d_op = &pid_dentry_operations; + d_add(dentry, inode); + return NULL; + +out: + return ERR_PTR(error); +} + +static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ + return proc_pident_lookup(dir, dentry, tgid_base_stuff); +} + +static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ + return proc_pident_lookup(dir, dentry, tid_base_stuff); +} + +static struct file_operations proc_tgid_base_operations = { + .read = generic_read_dir, + .readdir = proc_tgid_base_readdir, +}; + +static struct file_operations proc_tid_base_operations = { + .read = generic_read_dir, + .readdir = proc_tid_base_readdir, +}; + +static struct inode_operations proc_tgid_base_inode_operations = { + .lookup = proc_tgid_base_lookup, +}; + +static struct inode_operations proc_tid_base_inode_operations = { + .lookup = proc_tid_base_lookup, +}; + +#ifdef CONFIG_SECURITY +static int proc_tgid_attr_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + return proc_pident_readdir(filp,dirent,filldir, + tgid_attr_stuff,ARRAY_SIZE(tgid_attr_stuff)); +} + +static int proc_tid_attr_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + return proc_pident_readdir(filp,dirent,filldir, + tid_attr_stuff,ARRAY_SIZE(tid_attr_stuff)); +} + +static struct file_operations proc_tgid_attr_operations = { + .read = generic_read_dir, + .readdir = proc_tgid_attr_readdir, +}; + +static struct file_operations proc_tid_attr_operations = { + .read = generic_read_dir, + .readdir = proc_tid_attr_readdir, +}; + +static struct dentry *proc_tgid_attr_lookup(struct inode *dir, + struct dentry *dentry, struct nameidata *nd) +{ + return proc_pident_lookup(dir, dentry, tgid_attr_stuff); +} + +static struct dentry *proc_tid_attr_lookup(struct inode *dir, + struct dentry *dentry, struct nameidata *nd) +{ + return proc_pident_lookup(dir, dentry, tid_attr_stuff); +} + +static struct inode_operations proc_tgid_attr_inode_operations = { + .lookup = proc_tgid_attr_lookup, +}; + +static struct inode_operations proc_tid_attr_inode_operations = { + .lookup = proc_tid_attr_lookup, +}; +#endif + +/* + * /proc/self: + */ +static int proc_self_readlink(struct dentry *dentry, char __user *buffer, + int buflen) +{ + char tmp[30]; + sprintf(tmp, "%d", current->tgid); + return vfs_readlink(dentry,buffer,buflen,tmp); +} + +static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + char tmp[30]; + sprintf(tmp, "%d", current->tgid); + return vfs_follow_link(nd,tmp); +} + +static struct inode_operations proc_self_inode_operations = { + .readlink = proc_self_readlink, + .follow_link = proc_self_follow_link, +}; + +/** + * proc_pid_unhash - Unhash /proc/<pid> entry from the dcache. + * @p: task that should be flushed. + * + * Drops the /proc/<pid> dcache entry from the hash chains. + * + * Dropping /proc/<pid> entries and detach_pid must be synchroneous, + * otherwise e.g. /proc/<pid>/exe might point to the wrong executable, + * if the pid value is immediately reused. This is enforced by + * - caller must acquire spin_lock(p->proc_lock) + * - must be called before detach_pid() + * - proc_pid_lookup acquires proc_lock, and checks that + * the target is not dead by looking at the attach count + * of PIDTYPE_PID. + */ + +struct dentry *proc_pid_unhash(struct task_struct *p) +{ + struct dentry *proc_dentry; + + proc_dentry = p->proc_dentry; + if (proc_dentry != NULL) { + + spin_lock(&dcache_lock); + spin_lock(&proc_dentry->d_lock); + if (!d_unhashed(proc_dentry)) { + dget_locked(proc_dentry); + __d_drop(proc_dentry); + spin_unlock(&proc_dentry->d_lock); + } else { + spin_unlock(&proc_dentry->d_lock); + proc_dentry = NULL; + } + spin_unlock(&dcache_lock); + } + return proc_dentry; +} + +/** + * proc_pid_flush - recover memory used by stale /proc/<pid>/x entries + * @proc_entry: directoy to prune. + * + * Shrink the /proc directory that was used by the just killed thread. + */ + +void proc_pid_flush(struct dentry *proc_dentry) +{ + might_sleep(); + if(proc_dentry != NULL) { + shrink_dcache_parent(proc_dentry); + dput(proc_dentry); + } +} + +/* SMP-safe */ +struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) +{ + struct task_struct *task; + struct inode *inode; + struct proc_inode *ei; + unsigned tgid; + int died; + + if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { + inode = new_inode(dir->i_sb); + if (!inode) + return ERR_PTR(-ENOMEM); + ei = PROC_I(inode); + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_ino = fake_ino(0, PROC_TGID_INO); + ei->pde = NULL; + inode->i_mode = S_IFLNK|S_IRWXUGO; + inode->i_uid = inode->i_gid = 0; + inode->i_size = 64; + inode->i_op = &proc_self_inode_operations; + d_add(dentry, inode); + return NULL; + } + tgid = name_to_int(dentry); + if (tgid == ~0U) + goto out; + + read_lock(&tasklist_lock); + task = find_task_by_pid(tgid); + if (task) + get_task_struct(task); + read_unlock(&tasklist_lock); + if (!task) + goto out; + + inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); + + + if (!inode) { + put_task_struct(task); + goto out; + } + inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; + inode->i_op = &proc_tgid_base_inode_operations; + inode->i_fop = &proc_tgid_base_operations; + inode->i_nlink = 3; + inode->i_flags|=S_IMMUTABLE; + + dentry->d_op = &pid_base_dentry_operations; + + died = 0; + d_add(dentry, inode); + spin_lock(&task->proc_lock); + task->proc_dentry = dentry; + if (!pid_alive(task)) { + dentry = proc_pid_unhash(task); + died = 1; + } + spin_unlock(&task->proc_lock); + + put_task_struct(task); + if (died) { + proc_pid_flush(dentry); + goto out; + } + return NULL; +out: + return ERR_PTR(-ENOENT); +} + +/* SMP-safe */ +static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) +{ + struct task_struct *task; + struct task_struct *leader = proc_task(dir); + struct inode *inode; + unsigned tid; + + tid = name_to_int(dentry); + if (tid == ~0U) + goto out; + + read_lock(&tasklist_lock); + task = find_task_by_pid(tid); + if (task) + get_task_struct(task); + read_unlock(&tasklist_lock); + if (!task) + goto out; + if (leader->tgid != task->tgid) + goto out_drop_task; + + inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO); + + + if (!inode) + goto out_drop_task; + inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; + inode->i_op = &proc_tid_base_inode_operations; + inode->i_fop = &proc_tid_base_operations; + inode->i_nlink = 3; + inode->i_flags|=S_IMMUTABLE; + + dentry->d_op = &pid_base_dentry_operations; + + d_add(dentry, inode); + + put_task_struct(task); + return NULL; +out_drop_task: + put_task_struct(task); +out: + return ERR_PTR(-ENOENT); +} + +#define PROC_NUMBUF 10 +#define PROC_MAXPIDS 20 + +/* + * Get a few tgid's to return for filldir - we need to hold the + * tasklist lock while doing this, and we must release it before + * we actually do the filldir itself, so we use a temp buffer.. + */ +static int get_tgid_list(int index, unsigned long version, unsigned int *tgids) +{ + struct task_struct *p; + int nr_tgids = 0; + + index--; + read_lock(&tasklist_lock); + p = NULL; + if (version) { + p = find_task_by_pid(version); + if (p && !thread_group_leader(p)) + p = NULL; + } + + if (p) + index = 0; + else + p = next_task(&init_task); + + for ( ; p != &init_task; p = next_task(p)) { + int tgid = p->pid; + if (!pid_alive(p)) + continue; + if (--index >= 0) + continue; + tgids[nr_tgids] = tgid; + nr_tgids++; + if (nr_tgids >= PROC_MAXPIDS) + break; + } + read_unlock(&tasklist_lock); + return nr_tgids; +} + +/* + * Get a few tid's to return for filldir - we need to hold the + * tasklist lock while doing this, and we must release it before + * we actually do the filldir itself, so we use a temp buffer.. + */ +static int get_tid_list(int index, unsigned int *tids, struct inode *dir) +{ + struct task_struct *leader_task = proc_task(dir); + struct task_struct *task = leader_task; + int nr_tids = 0; + + index -= 2; + read_lock(&tasklist_lock); + /* + * The starting point task (leader_task) might be an already + * unlinked task, which cannot be used to access the task-list + * via next_thread(). + */ + if (pid_alive(task)) do { + int tid = task->pid; + + if (--index >= 0) + continue; + tids[nr_tids] = tid; + nr_tids++; + if (nr_tids >= PROC_MAXPIDS) + break; + } while ((task = next_thread(task)) != leader_task); + read_unlock(&tasklist_lock); + return nr_tids; +} + +/* for the /proc/ directory itself, after non-process stuff has been done */ +int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) +{ + unsigned int tgid_array[PROC_MAXPIDS]; + char buf[PROC_NUMBUF]; + unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; + unsigned int nr_tgids, i; + int next_tgid; + + if (!nr) { + ino_t ino = fake_ino(0,PROC_TGID_INO); + if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0) + return 0; + filp->f_pos++; + nr++; + } + + /* f_version caches the tgid value that the last readdir call couldn't + * return. lseek aka telldir automagically resets f_version to 0. + */ + next_tgid = filp->f_version; + filp->f_version = 0; + for (;;) { + nr_tgids = get_tgid_list(nr, next_tgid, tgid_array); + if (!nr_tgids) { + /* no more entries ! */ + break; + } + next_tgid = 0; + + /* do not use the last found pid, reserve it for next_tgid */ + if (nr_tgids == PROC_MAXPIDS) { + nr_tgids--; + next_tgid = tgid_array[nr_tgids]; + } + + for (i=0;i<nr_tgids;i++) { + int tgid = tgid_array[i]; + ino_t ino = fake_ino(tgid,PROC_TGID_INO); + unsigned long j = PROC_NUMBUF; + + do + buf[--j] = '0' + (tgid % 10); + while ((tgid /= 10) != 0); + + if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) { + /* returning this tgid failed, save it as the first + * pid for the next readir call */ + filp->f_version = tgid_array[i]; + goto out; + } + filp->f_pos++; + nr++; + } + } +out: + return 0; +} + +/* for the /proc/TGID/task/ directories */ +static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) +{ + unsigned int tid_array[PROC_MAXPIDS]; + char buf[PROC_NUMBUF]; + unsigned int nr_tids, i; + struct dentry *dentry = filp->f_dentry; + struct inode *inode = dentry->d_inode; + int retval = -ENOENT; + ino_t ino; + unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ + + if (!pid_alive(proc_task(inode))) + goto out; + retval = 0; + + switch (pos) { + case 0: + ino = inode->i_ino; + if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0) + goto out; + pos++; + /* fall through */ + case 1: + ino = parent_ino(dentry); + if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0) + goto out; + pos++; + /* fall through */ + } + + nr_tids = get_tid_list(pos, tid_array, inode); + + for (i = 0; i < nr_tids; i++) { + unsigned long j = PROC_NUMBUF; + int tid = tid_array[i]; + + ino = fake_ino(tid,PROC_TID_INO); + + do + buf[--j] = '0' + (tid % 10); + while ((tid /= 10) != 0); + + if (filldir(dirent, buf+j, PROC_NUMBUF-j, pos, ino, DT_DIR) < 0) + break; + pos++; + } +out: + filp->f_pos = pos; + return retval; +} diff --git a/fs/proc/generic.c b/fs/proc/generic.c new file mode 100644 index 0000000..6c6315d --- /dev/null +++ b/fs/proc/generic.c @@ -0,0 +1,705 @@ +/* + * proc/fs/generic.c --- generic routines for the proc-fs + * + * This file contains generic proc-fs routines for handling + * directories and files. + * + * Copyright (C) 1991, 1992 Linus Torvalds. + * Copyright (C) 1997 Theodore Ts'o + */ + +#include <linux/errno.h> +#include <linux/time.h> +#include <linux/proc_fs.h> +#include <linux/stat.h> +#include <linux/module.h> +#include <linux/mount.h> +#include <linux/smp_lock.h> +#include <linux/init.h> +#include <linux/idr.h> +#include <linux/namei.h> +#include <linux/bitops.h> +#include <asm/uaccess.h> + +static ssize_t proc_file_read(struct file *file, char __user *buf, + size_t nbytes, loff_t *ppos); +static ssize_t proc_file_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos); +static loff_t proc_file_lseek(struct file *, loff_t, int); + +int proc_match(int len, const char *name, struct proc_dir_entry *de) +{ + if (de->namelen != len) + return 0; + return !memcmp(name, de->name, len); +} + +static struct file_operations proc_file_operations = { + .llseek = proc_file_lseek, + .read = proc_file_read, + .write = proc_file_write, +}; + +/* buffer size is one page but our output routines use some slack for overruns */ +#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024) + +static ssize_t +proc_file_read(struct file *file, char __user *buf, size_t nbytes, + loff_t *ppos) +{ + struct inode * inode = file->f_dentry->d_inode; + char *page; + ssize_t retval=0; + int eof=0; + ssize_t n, count; + char *start; + struct proc_dir_entry * dp; + + dp = PDE(inode); + if (!(page = (char*) __get_free_page(GFP_KERNEL))) + return -ENOMEM; + + while ((nbytes > 0) && !eof) { + count = min_t(size_t, PROC_BLOCK_SIZE, nbytes); + + start = NULL; + if (dp->get_info) { + /* Handle old net routines */ + n = dp->get_info(page, &start, *ppos, count); + if (n < count) + eof = 1; + } else if (dp->read_proc) { + /* + * How to be a proc read function + * ------------------------------ + * Prototype: + * int f(char *buffer, char **start, off_t offset, + * int count, int *peof, void *dat) + * + * Assume that the buffer is "count" bytes in size. + * + * If you know you have supplied all the data you + * have, set *peof. + * + * You have three ways to return data: + * 0) Leave *start = NULL. (This is the default.) + * Put the data of the requested offset at that + * offset within the buffer. Return the number (n) + * of bytes there are from the beginning of the + * buffer up to the last byte of data. If the + * number of supplied bytes (= n - offset) is + * greater than zero and you didn't signal eof + * and the reader is prepared to take more data + * you will be called again with the requested + * offset advanced by the number of bytes + * absorbed. This interface is useful for files + * no larger than the buffer. + * 1) Set *start = an unsigned long value less than + * the buffer address but greater than zero. + * Put the data of the requested offset at the + * beginning of the buffer. Return the number of + * bytes of data placed there. If this number is + * greater than zero and you didn't signal eof + * and the reader is prepared to take more data + * you will be called again with the requested + * offset advanced by *start. This interface is + * useful when you have a large file consisting + * of a series of blocks which you want to count + * and return as wholes. + * (Hack by Paul.Russell@rustcorp.com.au) + * 2) Set *start = an address within the buffer. + * Put the data of the requested offset at *start. + * Return the number of bytes of data placed there. + * If this number is greater than zero and you + * didn't signal eof and the reader is prepared to + * take more data you will be called again with the + * requested offset advanced by the number of bytes + * absorbed. + */ + n = dp->read_proc(page, &start, *ppos, + count, &eof, dp->data); + } else + break; + + if (n == 0) /* end of file */ + break; + if (n < 0) { /* error */ + if (retval == 0) + retval = n; + break; + } + + if (start == NULL) { + if (n > PAGE_SIZE) { + printk(KERN_ERR + "proc_file_read: Apparent buffer overflow!\n"); + n = PAGE_SIZE; + } + n -= *ppos; + if (n <= 0) + break; + if (n > count) + n = count; + start = page + *ppos; + } else if (start < page) { + if (n > PAGE_SIZE) { + printk(KERN_ERR + "proc_file_read: Apparent buffer overflow!\n"); + n = PAGE_SIZE; + } + if (n > count) { + /* + * Don't reduce n because doing so might + * cut off part of a data block. + */ + printk(KERN_WARNING + "proc_file_read: Read count exceeded\n"); + } + } else /* start >= page */ { + unsigned long startoff = (unsigned long)(start - page); + if (n > (PAGE_SIZE - startoff)) { + printk(KERN_ERR + "proc_file_read: Apparent buffer overflow!\n"); + n = PAGE_SIZE - startoff; + } + if (n > count) + n = count; + } + + n -= copy_to_user(buf, start < page ? page : start, n); + if (n == 0) { + if (retval == 0) + retval = -EFAULT; + break; + } + + *ppos += start < page ? (unsigned long)start : n; + nbytes -= n; + buf += n; + retval += n; + } + free_page((unsigned long) page); + return retval; +} + +static ssize_t +proc_file_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) +{ + struct inode *inode = file->f_dentry->d_inode; + struct proc_dir_entry * dp; + + dp = PDE(inode); + + if (!dp->write_proc) + return -EIO; + + /* FIXME: does this routine need ppos? probably... */ + return dp->write_proc(file, buffer, count, dp->data); +} + + +static loff_t +proc_file_lseek(struct file *file, loff_t offset, int orig) +{ + lock_kernel(); + + switch (orig) { + case 0: + if (offset < 0) + goto out; + file->f_pos = offset; + unlock_kernel(); + return(file->f_pos); + case 1: + if (offset + file->f_pos < 0) + goto out; + file->f_pos += offset; + unlock_kernel(); + return(file->f_pos); + case 2: + goto out; + default: + goto out; + } + +out: + unlock_kernel(); + return -EINVAL; +} + +static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) +{ + struct inode *inode = dentry->d_inode; + struct proc_dir_entry *de = PDE(inode); + int error; + + error = inode_change_ok(inode, iattr); + if (error) + goto out; + + error = inode_setattr(inode, iattr); + if (error) + goto out; + + de->uid = inode->i_uid; + de->gid = inode->i_gid; + de->mode = inode->i_mode; +out: + return error; +} + +static struct inode_operations proc_file_inode_operations = { + .setattr = proc_notify_change, +}; + +/* + * This function parses a name such as "tty/driver/serial", and + * returns the struct proc_dir_entry for "/proc/tty/driver", and + * returns "serial" in residual. + */ +static int xlate_proc_name(const char *name, + struct proc_dir_entry **ret, const char **residual) +{ + const char *cp = name, *next; + struct proc_dir_entry *de; + int len; + + de = &proc_root; + while (1) { + next = strchr(cp, '/'); + if (!next) + break; + + len = next - cp; + for (de = de->subdir; de ; de = de->next) { + if (proc_match(len, cp, de)) + break; + } + if (!de) + return -ENOENT; + cp += len + 1; + } + *residual = cp; + *ret = de; + return 0; +} + +static DEFINE_IDR(proc_inum_idr); +static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ + +#define PROC_DYNAMIC_FIRST 0xF0000000UL + +/* + * Return an inode number between PROC_DYNAMIC_FIRST and + * 0xffffffff, or zero on failure. + */ +static unsigned int get_inode_number(void) +{ + int i, inum = 0; + int error; + +retry: + if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0) + return 0; + + spin_lock(&proc_inum_lock); + error = idr_get_new(&proc_inum_idr, NULL, &i); + spin_unlock(&proc_inum_lock); + if (error == -EAGAIN) + goto retry; + else if (error) + return 0; + + inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST; + + /* inum will never be more than 0xf0ffffff, so no check + * for overflow. + */ + + return inum; +} + +static void release_inode_number(unsigned int inum) +{ + int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK; + + spin_lock(&proc_inum_lock); + idr_remove(&proc_inum_idr, id); + spin_unlock(&proc_inum_lock); +} + +static int proc_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + nd_set_link(nd, PDE(dentry->d_inode)->data); + return 0; +} + +static struct inode_operations proc_link_inode_operations = { + .readlink = generic_readlink, + .follow_link = proc_follow_link, +}; + +/* + * As some entries in /proc are volatile, we want to + * get rid of unused dentries. This could be made + * smarter: we could keep a "volatile" flag in the + * inode to indicate which ones to keep. + */ +static int proc_delete_dentry(struct dentry * dentry) +{ + return 1; +} + +static struct dentry_operations proc_dentry_operations = +{ + .d_delete = proc_delete_dentry, +}; + +/* + * Don't create negative dentries here, return -ENOENT by hand + * instead. + */ +struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) +{ + struct inode *inode = NULL; + struct proc_dir_entry * de; + int error = -ENOENT; + + lock_kernel(); + de = PDE(dir); + if (de) { + for (de = de->subdir; de ; de = de->next) { + if (de->namelen != dentry->d_name.len) + continue; + if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { + unsigned int ino = de->low_ino; + + error = -EINVAL; + inode = proc_get_inode(dir->i_sb, ino, de); + break; + } + } + } + unlock_kernel(); + + if (inode) { + dentry->d_op = &proc_dentry_operations; + d_add(dentry, inode); + return NULL; + } + return ERR_PTR(error); +} + +/* + * This returns non-zero if at EOF, so that the /proc + * root directory can use this and check if it should + * continue with the <pid> entries.. + * + * Note that the VFS-layer doesn't care about the return + * value of the readdir() call, as long as it's non-negative + * for success.. + */ +int proc_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + struct proc_dir_entry * de; + unsigned int ino; + int i; + struct inode *inode = filp->f_dentry->d_inode; + int ret = 0; + + lock_kernel(); + + ino = inode->i_ino; + de = PDE(inode); + if (!de) { + ret = -EINVAL; + goto out; + } + i = filp->f_pos; + switch (i) { + case 0: + if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) + goto out; + i++; + filp->f_pos++; + /* fall through */ + case 1: + if (filldir(dirent, "..", 2, i, + parent_ino(filp->f_dentry), + DT_DIR) < 0) + goto out; + i++; + filp->f_pos++; + /* fall through */ + default: + de = de->subdir; + i -= 2; + for (;;) { + if (!de) { + ret = 1; + goto out; + } + if (!i) + break; + de = de->next; + i--; + } + + do { + if (filldir(dirent, de->name, de->namelen, filp->f_pos, + de->low_ino, de->mode >> 12) < 0) + goto out; + filp->f_pos++; + de = de->next; + } while (de); + } + ret = 1; +out: unlock_kernel(); + return ret; +} + +/* + * These are the generic /proc directory operations. They + * use the in-memory "struct proc_dir_entry" tree to parse + * the /proc directory. + */ +static struct file_operations proc_dir_operations = { + .read = generic_read_dir, + .readdir = proc_readdir, +}; + +/* + * proc directories can do almost nothing.. + */ +static struct inode_operations proc_dir_inode_operations = { + .lookup = proc_lookup, + .setattr = proc_notify_change, +}; + +static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) +{ + unsigned int i; + + i = get_inode_number(); + if (i == 0) + return -EAGAIN; + dp->low_ino = i; + dp->next = dir->subdir; + dp->parent = dir; + dir->subdir = dp; + if (S_ISDIR(dp->mode)) { + if (dp->proc_iops == NULL) { + dp->proc_fops = &proc_dir_operations; + dp->proc_iops = &proc_dir_inode_operations; + } + dir->nlink++; + } else if (S_ISLNK(dp->mode)) { + if (dp->proc_iops == NULL) + dp->proc_iops = &proc_link_inode_operations; + } else if (S_ISREG(dp->mode)) { + if (dp->proc_fops == NULL) + dp->proc_fops = &proc_file_operations; + if (dp->proc_iops == NULL) + dp->proc_iops = &proc_file_inode_operations; + } + return 0; +} + +/* + * Kill an inode that got unregistered.. + */ +static void proc_kill_inodes(struct proc_dir_entry *de) +{ + struct list_head *p; + struct super_block *sb = proc_mnt->mnt_sb; + + /* + * Actually it's a partial revoke(). + */ + file_list_lock(); + list_for_each(p, &sb->s_files) { + struct file * filp = list_entry(p, struct file, f_list); + struct dentry * dentry = filp->f_dentry; + struct inode * inode; + struct file_operations *fops; + + if (dentry->d_op != &proc_dentry_operations) + continue; + inode = dentry->d_inode; + if (PDE(inode) != de) + continue; + fops = filp->f_op; + filp->f_op = NULL; + fops_put(fops); + } + file_list_unlock(); +} + +static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent, + const char *name, + mode_t mode, + nlink_t nlink) +{ + struct proc_dir_entry *ent = NULL; + const char *fn = name; + int len; + + /* make sure name is valid */ + if (!name || !strlen(name)) goto out; + + if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0) + goto out; + + /* At this point there must not be any '/' characters beyond *fn */ + if (strchr(fn, '/')) + goto out; + + len = strlen(fn); + + ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL); + if (!ent) goto out; + + memset(ent, 0, sizeof(struct proc_dir_entry)); + memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1); + ent->name = ((char *) ent) + sizeof(*ent); + ent->namelen = len; + ent->mode = mode; + ent->nlink = nlink; + out: + return ent; +} + +struct proc_dir_entry *proc_symlink(const char *name, + struct proc_dir_entry *parent, const char *dest) +{ + struct proc_dir_entry *ent; + + ent = proc_create(&parent,name, + (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1); + + if (ent) { + ent->data = kmalloc((ent->size=strlen(dest))+1, GFP_KERNEL); + if (ent->data) { + strcpy((char*)ent->data,dest); + if (proc_register(parent, ent) < 0) { + kfree(ent->data); + kfree(ent); + ent = NULL; + } + } else { + kfree(ent); + ent = NULL; + } + } + return ent; +} + +struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, + struct proc_dir_entry *parent) +{ + struct proc_dir_entry *ent; + + ent = proc_create(&parent, name, S_IFDIR | mode, 2); + if (ent) { + ent->proc_fops = &proc_dir_operations; + ent->proc_iops = &proc_dir_inode_operations; + + if (proc_register(parent, ent) < 0) { + kfree(ent); + ent = NULL; + } + } + return ent; +} + +struct proc_dir_entry *proc_mkdir(const char *name, + struct proc_dir_entry *parent) +{ + return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent); +} + +struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, + struct proc_dir_entry *parent) +{ + struct proc_dir_entry *ent; + nlink_t nlink; + + if (S_ISDIR(mode)) { + if ((mode & S_IALLUGO) == 0) + mode |= S_IRUGO | S_IXUGO; + nlink = 2; + } else { + if ((mode & S_IFMT) == 0) + mode |= S_IFREG; + if ((mode & S_IALLUGO) == 0) + mode |= S_IRUGO; + nlink = 1; + } + + ent = proc_create(&parent,name,mode,nlink); + if (ent) { + if (S_ISDIR(mode)) { + ent->proc_fops = &proc_dir_operations; + ent->proc_iops = &proc_dir_inode_operations; + } + if (proc_register(parent, ent) < 0) { + kfree(ent); + ent = NULL; + } + } + return ent; +} + +void free_proc_entry(struct proc_dir_entry *de) +{ + unsigned int ino = de->low_ino; + + if (ino < PROC_DYNAMIC_FIRST) + return; + + release_inode_number(ino); + + if (S_ISLNK(de->mode) && de->data) + kfree(de->data); + kfree(de); +} + +/* + * Remove a /proc entry and free it if it's not currently in use. + * If it is in use, we set the 'deleted' flag. + */ +void remove_proc_entry(const char *name, struct proc_dir_entry *parent) +{ + struct proc_dir_entry **p; + struct proc_dir_entry *de; + const char *fn = name; + int len; + + if (!parent && xlate_proc_name(name, &parent, &fn) != 0) + goto out; + len = strlen(fn); + for (p = &parent->subdir; *p; p=&(*p)->next ) { + if (!proc_match(len, fn, *p)) + continue; + de = *p; + *p = de->next; + de->next = NULL; + if (S_ISDIR(de->mode)) + parent->nlink--; + proc_kill_inodes(de); + de->nlink = 0; + WARN_ON(de->subdir); + if (!atomic_read(&de->count)) + free_proc_entry(de); + else { + de->deleted = 1; + printk("remove_proc_entry: %s/%s busy, count=%d\n", + parent->name, de->name, atomic_read(&de->count)); + } + break; + } +out: + return; +} diff --git a/fs/proc/inode-alloc.txt b/fs/proc/inode-alloc.txt new file mode 100644 index 0000000..77212f9 --- /dev/null +++ b/fs/proc/inode-alloc.txt @@ -0,0 +1,14 @@ +Current inode allocations in the proc-fs (hex-numbers): + + 00000000 reserved + 00000001-00000fff static entries (goners) + 001 root-ino + + 00001000-00001fff unused + 0001xxxx-7fffxxxx pid-dir entries for pid 1-7fff + 80000000-efffffff unused + f0000000-ffffffff dynamic entries + +Goal: + a) once we'll split the thing into several virtual filesystems we + will get rid of magical ranges (and this file, BTW). diff --git a/fs/proc/inode.c b/fs/proc/inode.c new file mode 100644 index 0000000..133c286 --- /dev/null +++ b/fs/proc/inode.c @@ -0,0 +1,218 @@ +/* + * linux/fs/proc/inode.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <linux/time.h> +#include <linux/proc_fs.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/file.h> +#include <linux/limits.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/smp_lock.h> + +#include <asm/system.h> +#include <asm/uaccess.h> + +extern void free_proc_entry(struct proc_dir_entry *); + +static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de) +{ + if (de) + atomic_inc(&de->count); + return de; +} + +/* + * Decrements the use count and checks for deferred deletion. + */ +static void de_put(struct proc_dir_entry *de) +{ + if (de) { + lock_kernel(); + if (!atomic_read(&de->count)) { + printk("de_put: entry %s already free!\n", de->name); + unlock_kernel(); + return; + } + + if (atomic_dec_and_test(&de->count)) { + if (de->deleted) { + printk("de_put: deferred delete of %s\n", + de->name); + free_proc_entry(de); + } + } + unlock_kernel(); + } +} + +/* + * Decrement the use count of the proc_dir_entry. + */ +static void proc_delete_inode(struct inode *inode) +{ + struct proc_dir_entry *de; + struct task_struct *tsk; + + /* Let go of any associated process */ + tsk = PROC_I(inode)->task; + if (tsk) + put_task_struct(tsk); + + /* Let go of any associated proc directory entry */ + de = PROC_I(inode)->pde; + if (de) { + if (de->owner) + module_put(de->owner); + de_put(de); + } + clear_inode(inode); +} + +struct vfsmount *proc_mnt; + +static void proc_read_inode(struct inode * inode) +{ + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; +} + +static kmem_cache_t * proc_inode_cachep; + +static struct inode *proc_alloc_inode(struct super_block *sb) +{ + struct proc_inode *ei; + struct inode *inode; + + ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, SLAB_KERNEL); + if (!ei) + return NULL; + ei->task = NULL; + ei->type = 0; + ei->op.proc_get_link = NULL; + ei->pde = NULL; + inode = &ei->vfs_inode; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + return inode; +} + +static void proc_destroy_inode(struct inode *inode) +{ + kmem_cache_free(proc_inode_cachep, PROC_I(inode)); +} + +static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +{ + struct proc_inode *ei = (struct proc_inode *) foo; + + if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) + inode_init_once(&ei->vfs_inode); +} + +int __init proc_init_inodecache(void) +{ + proc_inode_cachep = kmem_cache_create("proc_inode_cache", + sizeof(struct proc_inode), + 0, SLAB_RECLAIM_ACCOUNT, + init_once, NULL); + if (proc_inode_cachep == NULL) + return -ENOMEM; + return 0; +} + +static int proc_remount(struct super_block *sb, int *flags, char *data) +{ + *flags |= MS_NODIRATIME; + return 0; +} + +static struct super_operations proc_sops = { + .alloc_inode = proc_alloc_inode, + .destroy_inode = proc_destroy_inode, + .read_inode = proc_read_inode, + .drop_inode = generic_delete_inode, + .delete_inode = proc_delete_inode, + .statfs = simple_statfs, + .remount_fs = proc_remount, +}; + +struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, + struct proc_dir_entry *de) +{ + struct inode * inode; + + /* + * Increment the use count so the dir entry can't disappear. + */ + de_get(de); + + WARN_ON(de && de->deleted); + + inode = iget(sb, ino); + if (!inode) + goto out_fail; + + PROC_I(inode)->pde = de; + if (de) { + if (de->mode) { + inode->i_mode = de->mode; + inode->i_uid = de->uid; + inode->i_gid = de->gid; + } + if (de->size) + inode->i_size = de->size; + if (de->nlink) + inode->i_nlink = de->nlink; + if (!try_module_get(de->owner)) + goto out_fail; + if (de->proc_iops) + inode->i_op = de->proc_iops; + if (de->proc_fops) + inode->i_fop = de->proc_fops; + } + +out: + return inode; + +out_fail: + de_put(de); + goto out; +} + +int proc_fill_super(struct super_block *s, void *data, int silent) +{ + struct inode * root_inode; + + s->s_flags |= MS_NODIRATIME; + s->s_blocksize = 1024; + s->s_blocksize_bits = 10; + s->s_magic = PROC_SUPER_MAGIC; + s->s_op = &proc_sops; + s->s_time_gran = 1; + + root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root); + if (!root_inode) + goto out_no_root; + /* + * Fixup the root inode's nlink value + */ + root_inode->i_nlink += nr_processes(); + root_inode->i_uid = 0; + root_inode->i_gid = 0; + s->s_root = d_alloc_root(root_inode); + if (!s->s_root) + goto out_no_root; + return 0; + +out_no_root: + printk("proc_read_super: get root inode failed\n"); + iput(root_inode); + return -ENOMEM; +} +MODULE_LICENSE("GPL"); diff --git a/fs/proc/internal.h b/fs/proc/internal.h new file mode 100644 index 0000000..3e55198 --- /dev/null +++ b/fs/proc/internal.h @@ -0,0 +1,48 @@ +/* internal.h: internal procfs definitions + * + * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/proc_fs.h> + +struct vmalloc_info { + unsigned long used; + unsigned long largest_chunk; +}; + +#ifdef CONFIG_MMU +#define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START) +extern void get_vmalloc_info(struct vmalloc_info *vmi); +#else + +#define VMALLOC_TOTAL 0UL +#define get_vmalloc_info(vmi) \ +do { \ + (vmi)->used = 0; \ + (vmi)->largest_chunk = 0; \ +} while(0) + +#endif + +extern void create_seq_entry(char *name, mode_t mode, struct file_operations *f); +extern int proc_exe_link(struct inode *, struct dentry **, struct vfsmount **); +extern int proc_tid_stat(struct task_struct *, char *); +extern int proc_tgid_stat(struct task_struct *, char *); +extern int proc_pid_status(struct task_struct *, char *); +extern int proc_pid_statm(struct task_struct *, char *); + +static inline struct task_struct *proc_task(struct inode *inode) +{ + return PROC_I(inode)->task; +} + +static inline int proc_type(struct inode *inode) +{ + return PROC_I(inode)->type; +} diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c new file mode 100644 index 0000000..1c7da98 --- /dev/null +++ b/fs/proc/kcore.c @@ -0,0 +1,404 @@ +/* + * fs/proc/kcore.c kernel ELF core dumper + * + * Modelled on fs/exec.c:aout_core_dump() + * Jeremy Fitzhardinge <jeremy@sw.oz.au> + * ELF version written by David Howells <David.Howells@nexor.co.uk> + * Modified and incorporated into 2.3.x by Tigran Aivazian <tigran@veritas.com> + * Support to dump vmalloc'd areas (ELF only), Tigran Aivazian <tigran@veritas.com> + * Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com> + */ + +#include <linux/config.h> +#include <linux/mm.h> +#include <linux/proc_fs.h> +#include <linux/user.h> +#include <linux/a.out.h> +#include <linux/elf.h> +#include <linux/elfcore.h> +#include <linux/vmalloc.h> +#include <linux/highmem.h> +#include <linux/init.h> +#include <asm/uaccess.h> +#include <asm/io.h> + + +static int open_kcore(struct inode * inode, struct file * filp) +{ + return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; +} + +static ssize_t read_kcore(struct file *, char __user *, size_t, loff_t *); + +struct file_operations proc_kcore_operations = { + .read = read_kcore, + .open = open_kcore, +}; + +#ifndef kc_vaddr_to_offset +#define kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET) +#endif +#ifndef kc_offset_to_vaddr +#define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET) +#endif + +#define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) + +/* An ELF note in memory */ +struct memelfnote +{ + const char *name; + int type; + unsigned int datasz; + void *data; +}; + +static struct kcore_list *kclist; +static DEFINE_RWLOCK(kclist_lock); + +void +kclist_add(struct kcore_list *new, void *addr, size_t size) +{ + new->addr = (unsigned long)addr; + new->size = size; + + write_lock(&kclist_lock); + new->next = kclist; + kclist = new; + write_unlock(&kclist_lock); +} + +static size_t get_kcore_size(int *nphdr, size_t *elf_buflen) +{ + size_t try, size; + struct kcore_list *m; + + *nphdr = 1; /* PT_NOTE */ + size = 0; + + for (m=kclist; m; m=m->next) { + try = kc_vaddr_to_offset((size_t)m->addr + m->size); + if (try > size) + size = try; + *nphdr = *nphdr + 1; + } + *elf_buflen = sizeof(struct elfhdr) + + (*nphdr + 2)*sizeof(struct elf_phdr) + + 3 * (sizeof(struct elf_note) + 4) + + sizeof(struct elf_prstatus) + + sizeof(struct elf_prpsinfo) + + sizeof(struct task_struct); + *elf_buflen = PAGE_ALIGN(*elf_buflen); + return size + *elf_buflen; +} + + +/*****************************************************************************/ +/* + * determine size of ELF note + */ +static int notesize(struct memelfnote *en) +{ + int sz; + + sz = sizeof(struct elf_note); + sz += roundup(strlen(en->name), 4); + sz += roundup(en->datasz, 4); + + return sz; +} /* end notesize() */ + +/*****************************************************************************/ +/* + * store a note in the header buffer + */ +static char *storenote(struct memelfnote *men, char *bufp) +{ + struct elf_note en; + +#define DUMP_WRITE(addr,nr) do { memcpy(bufp,addr,nr); bufp += nr; } while(0) + + en.n_namesz = strlen(men->name); + en.n_descsz = men->datasz; + en.n_type = men->type; + + DUMP_WRITE(&en, sizeof(en)); + DUMP_WRITE(men->name, en.n_namesz); + + /* XXX - cast from long long to long to avoid need for libgcc.a */ + bufp = (char*) roundup((unsigned long)bufp,4); + DUMP_WRITE(men->data, men->datasz); + bufp = (char*) roundup((unsigned long)bufp,4); + +#undef DUMP_WRITE + + return bufp; +} /* end storenote() */ + +/* + * store an ELF coredump header in the supplied buffer + * nphdr is the number of elf_phdr to insert + */ +static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) +{ + struct elf_prstatus prstatus; /* NT_PRSTATUS */ + struct elf_prpsinfo prpsinfo; /* NT_PRPSINFO */ + struct elf_phdr *nhdr, *phdr; + struct elfhdr *elf; + struct memelfnote notes[3]; + off_t offset = 0; + struct kcore_list *m; + + /* setup ELF header */ + elf = (struct elfhdr *) bufp; + bufp += sizeof(struct elfhdr); + offset += sizeof(struct elfhdr); + memcpy(elf->e_ident, ELFMAG, SELFMAG); + elf->e_ident[EI_CLASS] = ELF_CLASS; + elf->e_ident[EI_DATA] = ELF_DATA; + elf->e_ident[EI_VERSION]= EV_CURRENT; + elf->e_ident[EI_OSABI] = ELF_OSABI; + memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD); + elf->e_type = ET_CORE; + elf->e_machine = ELF_ARCH; + elf->e_version = EV_CURRENT; + elf->e_entry = 0; + elf->e_phoff = sizeof(struct elfhdr); + elf->e_shoff = 0; +#if defined(CONFIG_H8300) + elf->e_flags = ELF_FLAGS; +#else + elf->e_flags = 0; +#endif + elf->e_ehsize = sizeof(struct elfhdr); + elf->e_phentsize= sizeof(struct elf_phdr); + elf->e_phnum = nphdr; + elf->e_shentsize= 0; + elf->e_shnum = 0; + elf->e_shstrndx = 0; + + /* setup ELF PT_NOTE program header */ + nhdr = (struct elf_phdr *) bufp; + bufp += sizeof(struct elf_phdr); + offset += sizeof(struct elf_phdr); + nhdr->p_type = PT_NOTE; + nhdr->p_offset = 0; + nhdr->p_vaddr = 0; + nhdr->p_paddr = 0; + nhdr->p_filesz = 0; + nhdr->p_memsz = 0; + nhdr->p_flags = 0; + nhdr->p_align = 0; + + /* setup ELF PT_LOAD program header for every area */ + for (m=kclist; m; m=m->next) { + phdr = (struct elf_phdr *) bufp; + bufp += sizeof(struct elf_phdr); + offset += sizeof(struct elf_phdr); + + phdr->p_type = PT_LOAD; + phdr->p_flags = PF_R|PF_W|PF_X; + phdr->p_offset = kc_vaddr_to_offset(m->addr) + dataoff; + phdr->p_vaddr = (size_t)m->addr; + phdr->p_paddr = 0; + phdr->p_filesz = phdr->p_memsz = m->size; + phdr->p_align = PAGE_SIZE; + } + + /* + * Set up the notes in similar form to SVR4 core dumps made + * with info from their /proc. + */ + nhdr->p_offset = offset; + + /* set up the process status */ + notes[0].name = "CORE"; + notes[0].type = NT_PRSTATUS; + notes[0].datasz = sizeof(struct elf_prstatus); + notes[0].data = &prstatus; + + memset(&prstatus, 0, sizeof(struct elf_prstatus)); + + nhdr->p_filesz = notesize(¬es[0]); + bufp = storenote(¬es[0], bufp); + + /* set up the process info */ + notes[1].name = "CORE"; + notes[1].type = NT_PRPSINFO; + notes[1].datasz = sizeof(struct elf_prpsinfo); + notes[1].data = &prpsinfo; + + memset(&prpsinfo, 0, sizeof(struct elf_prpsinfo)); + prpsinfo.pr_state = 0; + prpsinfo.pr_sname = 'R'; + prpsinfo.pr_zomb = 0; + + strcpy(prpsinfo.pr_fname, "vmlinux"); + strncpy(prpsinfo.pr_psargs, saved_command_line, ELF_PRARGSZ); + + nhdr->p_filesz += notesize(¬es[1]); + bufp = storenote(¬es[1], bufp); + + /* set up the task structure */ + notes[2].name = "CORE"; + notes[2].type = NT_TASKSTRUCT; + notes[2].datasz = sizeof(struct task_struct); + notes[2].data = current; + + nhdr->p_filesz += notesize(¬es[2]); + bufp = storenote(¬es[2], bufp); + +} /* end elf_kcore_store_hdr() */ + +/*****************************************************************************/ +/* + * read from the ELF header and then kernel memory + */ +static ssize_t +read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) +{ + ssize_t acc = 0; + size_t size, tsz; + size_t elf_buflen; + int nphdr; + unsigned long start; + + read_lock(&kclist_lock); + proc_root_kcore->size = size = get_kcore_size(&nphdr, &elf_buflen); + if (buflen == 0 || *fpos >= size) { + read_unlock(&kclist_lock); + return 0; + } + + /* trim buflen to not go beyond EOF */ + if (buflen > size - *fpos) + buflen = size - *fpos; + + /* construct an ELF core header if we'll need some of it */ + if (*fpos < elf_buflen) { + char * elf_buf; + + tsz = elf_buflen - *fpos; + if (buflen < tsz) + tsz = buflen; + elf_buf = kmalloc(elf_buflen, GFP_ATOMIC); + if (!elf_buf) { + read_unlock(&kclist_lock); + return -ENOMEM; + } + memset(elf_buf, 0, elf_buflen); + elf_kcore_store_hdr(elf_buf, nphdr, elf_buflen); + read_unlock(&kclist_lock); + if (copy_to_user(buffer, elf_buf + *fpos, tsz)) { + kfree(elf_buf); + return -EFAULT; + } + kfree(elf_buf); + buflen -= tsz; + *fpos += tsz; + buffer += tsz; + acc += tsz; + + /* leave now if filled buffer already */ + if (buflen == 0) + return acc; + } else + read_unlock(&kclist_lock); + + /* + * Check to see if our file offset matches with any of + * the addresses in the elf_phdr on our list. + */ + start = kc_offset_to_vaddr(*fpos - elf_buflen); + if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen) + tsz = buflen; + + while (buflen) { + struct kcore_list *m; + + read_lock(&kclist_lock); + for (m=kclist; m; m=m->next) { + if (start >= m->addr && start < (m->addr+m->size)) + break; + } + read_unlock(&kclist_lock); + + if (m == NULL) { + if (clear_user(buffer, tsz)) + return -EFAULT; + } else if ((start >= VMALLOC_START) && (start < VMALLOC_END)) { + char * elf_buf; + struct vm_struct *m; + unsigned long curstart = start; + unsigned long cursize = tsz; + + elf_buf = kmalloc(tsz, GFP_KERNEL); + if (!elf_buf) + return -ENOMEM; + memset(elf_buf, 0, tsz); + + read_lock(&vmlist_lock); + for (m=vmlist; m && cursize; m=m->next) { + unsigned long vmstart; + unsigned long vmsize; + unsigned long msize = m->size - PAGE_SIZE; + + if (((unsigned long)m->addr + msize) < + curstart) + continue; + if ((unsigned long)m->addr > (curstart + + cursize)) + break; + vmstart = (curstart < (unsigned long)m->addr ? + (unsigned long)m->addr : curstart); + if (((unsigned long)m->addr + msize) > + (curstart + cursize)) + vmsize = curstart + cursize - vmstart; + else + vmsize = (unsigned long)m->addr + + msize - vmstart; + curstart = vmstart + vmsize; + cursize -= vmsize; + /* don't dump ioremap'd stuff! (TA) */ + if (m->flags & VM_IOREMAP) + continue; + memcpy(elf_buf + (vmstart - start), + (char *)vmstart, vmsize); + } + read_unlock(&vmlist_lock); + if (copy_to_user(buffer, elf_buf, tsz)) { + kfree(elf_buf); + return -EFAULT; + } + kfree(elf_buf); + } else { + if (kern_addr_valid(start)) { + unsigned long n; + + n = copy_to_user(buffer, (char *)start, tsz); + /* + * We cannot distingush between fault on source + * and fault on destination. When this happens + * we clear too and hope it will trigger the + * EFAULT again. + */ + if (n) { + if (clear_user(buffer + tsz - n, + tsz - n)) + return -EFAULT; + } + } else { + if (clear_user(buffer, tsz)) + return -EFAULT; + } + } + buflen -= tsz; + *fpos += tsz; + buffer += tsz; + acc += tsz; + start += tsz; + tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen); + } + + return acc; +} diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c new file mode 100644 index 0000000..10d37bf --- /dev/null +++ b/fs/proc/kmsg.c @@ -0,0 +1,55 @@ +/* + * linux/fs/proc/kmsg.c + * + * Copyright (C) 1992 by Linus Torvalds + * + */ + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/time.h> +#include <linux/kernel.h> +#include <linux/poll.h> +#include <linux/fs.h> + +#include <asm/uaccess.h> +#include <asm/io.h> + +extern wait_queue_head_t log_wait; + +extern int do_syslog(int type, char __user *bug, int count); + +static int kmsg_open(struct inode * inode, struct file * file) +{ + return do_syslog(1,NULL,0); +} + +static int kmsg_release(struct inode * inode, struct file * file) +{ + (void) do_syslog(0,NULL,0); + return 0; +} + +static ssize_t kmsg_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + if ((file->f_flags & O_NONBLOCK) && !do_syslog(9, NULL, 0)) + return -EAGAIN; + return do_syslog(2, buf, count); +} + +static unsigned int kmsg_poll(struct file *file, poll_table *wait) +{ + poll_wait(file, &log_wait, wait); + if (do_syslog(9, NULL, 0)) + return POLLIN | POLLRDNORM; + return 0; +} + + +struct file_operations proc_kmsg_operations = { + .read = kmsg_read, + .poll = kmsg_poll, + .open = kmsg_open, + .release = kmsg_release, +}; diff --git a/fs/proc/mmu.c b/fs/proc/mmu.c new file mode 100644 index 0000000..a704103 --- /dev/null +++ b/fs/proc/mmu.c @@ -0,0 +1,67 @@ +/* mmu.c: mmu memory info files + * + * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/time.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/mman.h> +#include <linux/proc_fs.h> +#include <linux/mm.h> +#include <linux/mmzone.h> +#include <linux/pagemap.h> +#include <linux/swap.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/seq_file.h> +#include <linux/hugetlb.h> +#include <linux/vmalloc.h> +#include <asm/uaccess.h> +#include <asm/pgtable.h> +#include <asm/tlb.h> +#include <asm/div64.h> +#include "internal.h" + +void get_vmalloc_info(struct vmalloc_info *vmi) +{ + struct vm_struct *vma; + unsigned long free_area_size; + unsigned long prev_end; + + vmi->used = 0; + + if (!vmlist) { + vmi->largest_chunk = VMALLOC_TOTAL; + } + else { + vmi->largest_chunk = 0; + + prev_end = VMALLOC_START; + + read_lock(&vmlist_lock); + + for (vma = vmlist; vma; vma = vma->next) { + vmi->used += vma->size; + + free_area_size = (unsigned long) vma->addr - prev_end; + if (vmi->largest_chunk < free_area_size) + vmi->largest_chunk = free_area_size; + + prev_end = vma->size + (unsigned long) vma->addr; + } + + if (VMALLOC_END - prev_end > vmi->largest_chunk) + vmi->largest_chunk = VMALLOC_END - prev_end; + + read_unlock(&vmlist_lock); + } +} diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c new file mode 100644 index 0000000..f3bf016 --- /dev/null +++ b/fs/proc/nommu.c @@ -0,0 +1,135 @@ +/* nommu.c: mmu-less memory info files + * + * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/time.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/mman.h> +#include <linux/proc_fs.h> +#include <linux/mm.h> +#include <linux/mmzone.h> +#include <linux/pagemap.h> +#include <linux/swap.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/seq_file.h> +#include <linux/hugetlb.h> +#include <linux/vmalloc.h> +#include <asm/uaccess.h> +#include <asm/pgtable.h> +#include <asm/tlb.h> +#include <asm/div64.h> +#include "internal.h" + +/* + * display a list of all the VMAs the kernel knows about + * - nommu kernals have a single flat list + */ +static int nommu_vma_list_show(struct seq_file *m, void *v) +{ + struct vm_area_struct *vma; + unsigned long ino = 0; + struct file *file; + dev_t dev = 0; + int flags, len; + + vma = rb_entry((struct rb_node *) v, struct vm_area_struct, vm_rb); + + flags = vma->vm_flags; + file = vma->vm_file; + + if (file) { + struct inode *inode = vma->vm_file->f_dentry->d_inode; + dev = inode->i_sb->s_dev; + ino = inode->i_ino; + } + + seq_printf(m, + "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", + vma->vm_start, + vma->vm_end, + flags & VM_READ ? 'r' : '-', + flags & VM_WRITE ? 'w' : '-', + flags & VM_EXEC ? 'x' : '-', + flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', + vma->vm_pgoff << PAGE_SHIFT, + MAJOR(dev), MINOR(dev), ino, &len); + + if (file) { + len = 25 + sizeof(void *) * 6 - len; + if (len < 1) + len = 1; + seq_printf(m, "%*c", len, ' '); + seq_path(m, file->f_vfsmnt, file->f_dentry, ""); + } + + seq_putc(m, '\n'); + return 0; +} + +static void *nommu_vma_list_start(struct seq_file *m, loff_t *_pos) +{ + struct rb_node *_rb; + loff_t pos = *_pos; + void *next = NULL; + + down_read(&nommu_vma_sem); + + for (_rb = rb_first(&nommu_vma_tree); _rb; _rb = rb_next(_rb)) { + if (pos == 0) { + next = _rb; + break; + } + } + + return next; +} + +static void nommu_vma_list_stop(struct seq_file *m, void *v) +{ + up_read(&nommu_vma_sem); +} + +static void *nommu_vma_list_next(struct seq_file *m, void *v, loff_t *pos) +{ + (*pos)++; + return rb_next((struct rb_node *) v); +} + +static struct seq_operations proc_nommu_vma_list_seqop = { + .start = nommu_vma_list_start, + .next = nommu_vma_list_next, + .stop = nommu_vma_list_stop, + .show = nommu_vma_list_show +}; + +static int proc_nommu_vma_list_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &proc_nommu_vma_list_seqop); +} + +static struct file_operations proc_nommu_vma_list_operations = { + .open = proc_nommu_vma_list_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __init proc_nommu_init(void) +{ + create_seq_entry("maps", S_IRUGO, &proc_nommu_vma_list_operations); + return 0; +} + +module_init(proc_nommu_init); diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c new file mode 100644 index 0000000..67423c6 --- /dev/null +++ b/fs/proc/proc_devtree.c @@ -0,0 +1,165 @@ +/* + * proc_devtree.c - handles /proc/device-tree + * + * Copyright 1997 Paul Mackerras + */ +#include <linux/errno.h> +#include <linux/time.h> +#include <linux/proc_fs.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <asm/prom.h> +#include <asm/uaccess.h> + +#ifndef HAVE_ARCH_DEVTREE_FIXUPS +static inline void set_node_proc_entry(struct device_node *np, struct proc_dir_entry *de) +{ +} + +static void inline set_node_name_link(struct device_node *np, struct proc_dir_entry *de) +{ +} + +static void inline set_node_addr_link(struct device_node *np, struct proc_dir_entry *de) +{ +} +#endif + +static struct proc_dir_entry *proc_device_tree; + +/* + * Supply data on a read from /proc/device-tree/node/property. + */ +static int property_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct property *pp = data; + int n; + + if (off >= pp->length) { + *eof = 1; + return 0; + } + n = pp->length - off; + if (n > count) + n = count; + else + *eof = 1; + memcpy(page, pp->value + off, n); + *start = page; + return n; +} + +/* + * For a node with a name like "gc@10", we make symlinks called "gc" + * and "@10" to it. + */ + +/* + * Process a node, adding entries for its children and its properties. + */ +void proc_device_tree_add_node(struct device_node *np, struct proc_dir_entry *de) +{ + struct property *pp; + struct proc_dir_entry *ent; + struct device_node *child, *sib; + const char *p, *at; + int l; + struct proc_dir_entry *list, **lastp, *al; + + set_node_proc_entry(np, de); + lastp = &list; + for (pp = np->properties; pp != 0; pp = pp->next) { + /* + * Unfortunately proc_register puts each new entry + * at the beginning of the list. So we rearrange them. + */ + ent = create_proc_read_entry(pp->name, strncmp(pp->name, "security-", 9) ? + S_IRUGO : S_IRUSR, de, property_read_proc, pp); + if (ent == 0) + break; + if (!strncmp(pp->name, "security-", 9)) + ent->size = 0; /* don't leak number of password chars */ + else + ent->size = pp->length; + *lastp = ent; + lastp = &ent->next; + } + child = NULL; + while ((child = of_get_next_child(np, child))) { + p = strrchr(child->full_name, '/'); + if (!p) + p = child->full_name; + else + ++p; + /* chop off '@0' if the name ends with that */ + l = strlen(p); + if (l > 2 && p[l-2] == '@' && p[l-1] == '0') + l -= 2; + ent = proc_mkdir(p, de); + if (ent == 0) + break; + *lastp = ent; + lastp = &ent->next; + proc_device_tree_add_node(child, ent); + + /* + * If we left the address part on the name, consider + * adding symlinks from the name and address parts. + */ + if (p[l] != 0 || (at = strchr(p, '@')) == 0) + continue; + + /* + * If this is the first node with a given name property, + * add a symlink with the name property as its name. + */ + sib = NULL; + while ((sib = of_get_next_child(np, sib)) && sib != child) + if (sib->name && strcmp(sib->name, child->name) == 0) + break; + if (sib == child && strncmp(p, child->name, l) != 0) { + al = proc_symlink(child->name, de, ent->name); + if (al == 0) { + of_node_put(sib); + break; + } + set_node_name_link(child, al); + *lastp = al; + lastp = &al->next; + } + of_node_put(sib); + /* + * Add another directory with the @address part as its name. + */ + al = proc_symlink(at, de, ent->name); + if (al == 0) + break; + set_node_addr_link(child, al); + *lastp = al; + lastp = &al->next; + } + of_node_put(child); + *lastp = NULL; + de->subdir = list; +} + +/* + * Called on initialization to set up the /proc/device-tree subtree + */ +void proc_device_tree_init(void) +{ + struct device_node *root; + if ( !have_of ) + return; + proc_device_tree = proc_mkdir("device-tree", NULL); + if (proc_device_tree == 0) + return; + root = of_find_node_by_path("/"); + if (root == 0) { + printk(KERN_ERR "/proc/device-tree: can't find root\n"); + return; + } + proc_device_tree_add_node(root, proc_device_tree); + of_node_put(root); +} diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c new file mode 100644 index 0000000..1d75d6a --- /dev/null +++ b/fs/proc/proc_misc.c @@ -0,0 +1,615 @@ +/* + * linux/fs/proc/proc_misc.c + * + * linux/fs/proc/array.c + * Copyright (C) 1992 by Linus Torvalds + * based on ideas by Darren Senn + * + * This used to be the part of array.c. See the rest of history and credits + * there. I took this into a separate file and switched the thing to generic + * proc_file_inode_operations, leaving in array.c only per-process stuff. + * Inumbers allocation made dynamic (via create_proc_entry()). AV, May 1999. + * + * Changes: + * Fulton Green : Encapsulated position metric calculations. + * <kernel@FultonGreen.com> + */ + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/time.h> +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/tty.h> +#include <linux/string.h> +#include <linux/mman.h> +#include <linux/proc_fs.h> +#include <linux/ioport.h> +#include <linux/config.h> +#include <linux/mm.h> +#include <linux/mmzone.h> +#include <linux/pagemap.h> +#include <linux/swap.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/signal.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/smp_lock.h> +#include <linux/seq_file.h> +#include <linux/times.h> +#include <linux/profile.h> +#include <linux/blkdev.h> +#include <linux/hugetlb.h> +#include <linux/jiffies.h> +#include <linux/sysrq.h> +#include <linux/vmalloc.h> +#include <asm/uaccess.h> +#include <asm/pgtable.h> +#include <asm/io.h> +#include <asm/tlb.h> +#include <asm/div64.h> +#include "internal.h" + +#define LOAD_INT(x) ((x) >> FSHIFT) +#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) +/* + * Warning: stuff below (imported functions) assumes that its output will fit + * into one page. For some of those functions it may be wrong. Moreover, we + * have a way to deal with that gracefully. Right now I used straightforward + * wrappers, but this needs further analysis wrt potential overflows. + */ +extern int get_hardware_list(char *); +extern int get_stram_list(char *); +extern int get_chrdev_list(char *); +extern int get_filesystem_list(char *); +extern int get_exec_domain_list(char *); +extern int get_dma_list(char *); +extern int get_locks_status (char *, char **, off_t, int); + +static int proc_calc_metrics(char *page, char **start, off_t off, + int count, int *eof, int len) +{ + if (len <= off+count) *eof = 1; + *start = page + off; + len -= off; + if (len>count) len = count; + if (len<0) len = 0; + return len; +} + +static int loadavg_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int a, b, c; + int len; + + a = avenrun[0] + (FIXED_1/200); + b = avenrun[1] + (FIXED_1/200); + c = avenrun[2] + (FIXED_1/200); + len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n", + LOAD_INT(a), LOAD_FRAC(a), + LOAD_INT(b), LOAD_FRAC(b), + LOAD_INT(c), LOAD_FRAC(c), + nr_running(), nr_threads, last_pid); + return proc_calc_metrics(page, start, off, count, eof, len); +} + +static int uptime_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct timespec uptime; + struct timespec idle; + int len; + cputime_t idletime = cputime_add(init_task.utime, init_task.stime); + + do_posix_clock_monotonic_gettime(&uptime); + cputime_to_timespec(idletime, &idle); + len = sprintf(page,"%lu.%02lu %lu.%02lu\n", + (unsigned long) uptime.tv_sec, + (uptime.tv_nsec / (NSEC_PER_SEC / 100)), + (unsigned long) idle.tv_sec, + (idle.tv_nsec / (NSEC_PER_SEC / 100))); + + return proc_calc_metrics(page, start, off, count, eof, len); +} + +static int meminfo_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct sysinfo i; + int len; + struct page_state ps; + unsigned long inactive; + unsigned long active; + unsigned long free; + unsigned long committed; + unsigned long allowed; + struct vmalloc_info vmi; + + get_page_state(&ps); + get_zone_counts(&active, &inactive, &free); + +/* + * display in kilobytes. + */ +#define K(x) ((x) << (PAGE_SHIFT - 10)) + si_meminfo(&i); + si_swapinfo(&i); + committed = atomic_read(&vm_committed_space); + allowed = ((totalram_pages - hugetlb_total_pages()) + * sysctl_overcommit_ratio / 100) + total_swap_pages; + + get_vmalloc_info(&vmi); + + /* + * Tagged format, for easy grepping and expansion. + */ + len = sprintf(page, + "MemTotal: %8lu kB\n" + "MemFree: %8lu kB\n" + "Buffers: %8lu kB\n" + "Cached: %8lu kB\n" + "SwapCached: %8lu kB\n" + "Active: %8lu kB\n" + "Inactive: %8lu kB\n" + "HighTotal: %8lu kB\n" + "HighFree: %8lu kB\n" + "LowTotal: %8lu kB\n" + "LowFree: %8lu kB\n" + "SwapTotal: %8lu kB\n" + "SwapFree: %8lu kB\n" + "Dirty: %8lu kB\n" + "Writeback: %8lu kB\n" + "Mapped: %8lu kB\n" + "Slab: %8lu kB\n" + "CommitLimit: %8lu kB\n" + "Committed_AS: %8lu kB\n" + "PageTables: %8lu kB\n" + "VmallocTotal: %8lu kB\n" + "VmallocUsed: %8lu kB\n" + "VmallocChunk: %8lu kB\n", + K(i.totalram), + K(i.freeram), + K(i.bufferram), + K(get_page_cache_size()-total_swapcache_pages-i.bufferram), + K(total_swapcache_pages), + K(active), + K(inactive), + K(i.totalhigh), + K(i.freehigh), + K(i.totalram-i.totalhigh), + K(i.freeram-i.freehigh), + K(i.totalswap), + K(i.freeswap), + K(ps.nr_dirty), + K(ps.nr_writeback), + K(ps.nr_mapped), + K(ps.nr_slab), + K(allowed), + K(committed), + K(ps.nr_page_table_pages), + (unsigned long)VMALLOC_TOTAL >> 10, + vmi.used >> 10, + vmi.largest_chunk >> 10 + ); + + len += hugetlb_report_meminfo(page + len); + + return proc_calc_metrics(page, start, off, count, eof, len); +#undef K +} + +extern struct seq_operations fragmentation_op; +static int fragmentation_open(struct inode *inode, struct file *file) +{ + (void)inode; + return seq_open(file, &fragmentation_op); +} + +static struct file_operations fragmentation_file_operations = { + .open = fragmentation_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int version_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + + strcpy(page, linux_banner); + len = strlen(page); + return proc_calc_metrics(page, start, off, count, eof, len); +} + +extern struct seq_operations cpuinfo_op; +static int cpuinfo_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &cpuinfo_op); +} +static struct file_operations proc_cpuinfo_operations = { + .open = cpuinfo_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +extern struct seq_operations vmstat_op; +static int vmstat_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &vmstat_op); +} +static struct file_operations proc_vmstat_file_operations = { + .open = vmstat_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +#ifdef CONFIG_PROC_HARDWARE +static int hardware_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len = get_hardware_list(page); + return proc_calc_metrics(page, start, off, count, eof, len); +} +#endif + +#ifdef CONFIG_STRAM_PROC +static int stram_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len = get_stram_list(page); + return proc_calc_metrics(page, start, off, count, eof, len); +} +#endif + +extern struct seq_operations partitions_op; +static int partitions_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &partitions_op); +} +static struct file_operations proc_partitions_operations = { + .open = partitions_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +extern struct seq_operations diskstats_op; +static int diskstats_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &diskstats_op); +} +static struct file_operations proc_diskstats_operations = { + .open = diskstats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +#ifdef CONFIG_MODULES +extern struct seq_operations modules_op; +static int modules_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &modules_op); +} +static struct file_operations proc_modules_operations = { + .open = modules_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; +#endif + +extern struct seq_operations slabinfo_op; +extern ssize_t slabinfo_write(struct file *, const char __user *, size_t, loff_t *); +static int slabinfo_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &slabinfo_op); +} +static struct file_operations proc_slabinfo_operations = { + .open = slabinfo_open, + .read = seq_read, + .write = slabinfo_write, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int show_stat(struct seq_file *p, void *v) +{ + int i; + unsigned long jif; + cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; + u64 sum = 0; + + user = nice = system = idle = iowait = + irq = softirq = steal = cputime64_zero; + jif = - wall_to_monotonic.tv_sec; + if (wall_to_monotonic.tv_nsec) + --jif; + + for_each_cpu(i) { + int j; + + user = cputime64_add(user, kstat_cpu(i).cpustat.user); + nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice); + system = cputime64_add(system, kstat_cpu(i).cpustat.system); + idle = cputime64_add(idle, kstat_cpu(i).cpustat.idle); + iowait = cputime64_add(iowait, kstat_cpu(i).cpustat.iowait); + irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq); + softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); + steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); + for (j = 0 ; j < NR_IRQS ; j++) + sum += kstat_cpu(i).irqs[j]; + } + + seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu\n", + (unsigned long long)cputime64_to_clock_t(user), + (unsigned long long)cputime64_to_clock_t(nice), + (unsigned long long)cputime64_to_clock_t(system), + (unsigned long long)cputime64_to_clock_t(idle), + (unsigned long long)cputime64_to_clock_t(iowait), + (unsigned long long)cputime64_to_clock_t(irq), + (unsigned long long)cputime64_to_clock_t(softirq), + (unsigned long long)cputime64_to_clock_t(steal)); + for_each_online_cpu(i) { + + /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ + user = kstat_cpu(i).cpustat.user; + nice = kstat_cpu(i).cpustat.nice; + system = kstat_cpu(i).cpustat.system; + idle = kstat_cpu(i).cpustat.idle; + iowait = kstat_cpu(i).cpustat.iowait; + irq = kstat_cpu(i).cpustat.irq; + softirq = kstat_cpu(i).cpustat.softirq; + steal = kstat_cpu(i).cpustat.steal; + seq_printf(p, "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu\n", + i, + (unsigned long long)cputime64_to_clock_t(user), + (unsigned long long)cputime64_to_clock_t(nice), + (unsigned long long)cputime64_to_clock_t(system), + (unsigned long long)cputime64_to_clock_t(idle), + (unsigned long long)cputime64_to_clock_t(iowait), + (unsigned long long)cputime64_to_clock_t(irq), + (unsigned long long)cputime64_to_clock_t(softirq), + (unsigned long long)cputime64_to_clock_t(steal)); + } + seq_printf(p, "intr %llu", (unsigned long long)sum); + +#if !defined(CONFIG_PPC64) && !defined(CONFIG_ALPHA) + for (i = 0; i < NR_IRQS; i++) + seq_printf(p, " %u", kstat_irqs(i)); +#endif + + seq_printf(p, + "\nctxt %llu\n" + "btime %lu\n" + "processes %lu\n" + "procs_running %lu\n" + "procs_blocked %lu\n", + nr_context_switches(), + (unsigned long)jif, + total_forks, + nr_running(), + nr_iowait()); + + return 0; +} + +static int stat_open(struct inode *inode, struct file *file) +{ + unsigned size = 4096 * (1 + num_possible_cpus() / 32); + char *buf; + struct seq_file *m; + int res; + + /* don't ask for more than the kmalloc() max size, currently 128 KB */ + if (size > 128 * 1024) + size = 128 * 1024; + buf = kmalloc(size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + res = single_open(file, show_stat, NULL); + if (!res) { + m = file->private_data; + m->buf = buf; + m->size = size; + } else + kfree(buf); + return res; +} +static struct file_operations proc_stat_operations = { + .open = stat_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int devices_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len = get_chrdev_list(page); + len += get_blkdev_list(page+len); + return proc_calc_metrics(page, start, off, count, eof, len); +} + +/* + * /proc/interrupts + */ +static void *int_seq_start(struct seq_file *f, loff_t *pos) +{ + return (*pos <= NR_IRQS) ? pos : NULL; +} + +static void *int_seq_next(struct seq_file *f, void *v, loff_t *pos) +{ + (*pos)++; + if (*pos > NR_IRQS) + return NULL; + return pos; +} + +static void int_seq_stop(struct seq_file *f, void *v) +{ + /* Nothing to do */ +} + + +extern int show_interrupts(struct seq_file *f, void *v); /* In arch code */ +static struct seq_operations int_seq_ops = { + .start = int_seq_start, + .next = int_seq_next, + .stop = int_seq_stop, + .show = show_interrupts +}; + +static int interrupts_open(struct inode *inode, struct file *filp) +{ + return seq_open(filp, &int_seq_ops); +} + +static struct file_operations proc_interrupts_operations = { + .open = interrupts_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int filesystems_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len = get_filesystem_list(page); + return proc_calc_metrics(page, start, off, count, eof, len); +} + +static int cmdline_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + + len = sprintf(page, "%s\n", saved_command_line); + return proc_calc_metrics(page, start, off, count, eof, len); +} + +static int locks_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len = get_locks_status(page, start, off, count); + + if (len < count) + *eof = 1; + return len; +} + +static int execdomains_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len = get_exec_domain_list(page); + return proc_calc_metrics(page, start, off, count, eof, len); +} + +#ifdef CONFIG_MAGIC_SYSRQ +/* + * writing 'C' to /proc/sysrq-trigger is like sysrq-C + */ +static ssize_t write_sysrq_trigger(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + if (count) { + char c; + + if (get_user(c, buf)) + return -EFAULT; + __handle_sysrq(c, NULL, NULL, 0); + } + return count; +} + +static struct file_operations proc_sysrq_trigger_operations = { + .write = write_sysrq_trigger, +}; +#endif + +struct proc_dir_entry *proc_root_kcore; + +void create_seq_entry(char *name, mode_t mode, struct file_operations *f) +{ + struct proc_dir_entry *entry; + entry = create_proc_entry(name, mode, NULL); + if (entry) + entry->proc_fops = f; +} + +void __init proc_misc_init(void) +{ + struct proc_dir_entry *entry; + static struct { + char *name; + int (*read_proc)(char*,char**,off_t,int,int*,void*); + } *p, simple_ones[] = { + {"loadavg", loadavg_read_proc}, + {"uptime", uptime_read_proc}, + {"meminfo", meminfo_read_proc}, + {"version", version_read_proc}, +#ifdef CONFIG_PROC_HARDWARE + {"hardware", hardware_read_proc}, +#endif +#ifdef CONFIG_STRAM_PROC + {"stram", stram_read_proc}, +#endif + {"devices", devices_read_proc}, + {"filesystems", filesystems_read_proc}, + {"cmdline", cmdline_read_proc}, + {"locks", locks_read_proc}, + {"execdomains", execdomains_read_proc}, + {NULL,} + }; + for (p = simple_ones; p->name; p++) + create_proc_read_entry(p->name, 0, NULL, p->read_proc, NULL); + + proc_symlink("mounts", NULL, "self/mounts"); + + /* And now for trickier ones */ + entry = create_proc_entry("kmsg", S_IRUSR, &proc_root); + if (entry) + entry->proc_fops = &proc_kmsg_operations; + create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations); + create_seq_entry("partitions", 0, &proc_partitions_operations); + create_seq_entry("stat", 0, &proc_stat_operations); + create_seq_entry("interrupts", 0, &proc_interrupts_operations); + create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations); + create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations); + create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations); + create_seq_entry("diskstats", 0, &proc_diskstats_operations); +#ifdef CONFIG_MODULES + create_seq_entry("modules", 0, &proc_modules_operations); +#endif +#ifdef CONFIG_SCHEDSTATS + create_seq_entry("schedstat", 0, &proc_schedstat_operations); +#endif +#ifdef CONFIG_PROC_KCORE + proc_root_kcore = create_proc_entry("kcore", S_IRUSR, NULL); + if (proc_root_kcore) { + proc_root_kcore->proc_fops = &proc_kcore_operations; + proc_root_kcore->size = + (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; + } +#endif +#ifdef CONFIG_MAGIC_SYSRQ + entry = create_proc_entry("sysrq-trigger", S_IWUSR, NULL); + if (entry) + entry->proc_fops = &proc_sysrq_trigger_operations; +#endif +#ifdef CONFIG_PPC32 + { + extern struct file_operations ppc_htab_operations; + entry = create_proc_entry("ppc_htab", S_IRUGO|S_IWUSR, NULL); + if (entry) + entry->proc_fops = &ppc_htab_operations; + } +#endif +} diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c new file mode 100644 index 0000000..15c4455 --- /dev/null +++ b/fs/proc/proc_tty.c @@ -0,0 +1,242 @@ +/* + * proc_tty.c -- handles /proc/tty + * + * Copyright 1997, Theodore Ts'o + */ + +#include <asm/uaccess.h> + +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/time.h> +#include <linux/proc_fs.h> +#include <linux/stat.h> +#include <linux/tty.h> +#include <linux/seq_file.h> +#include <linux/bitops.h> + +static int tty_ldiscs_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data); + +/* + * The /proc/tty directory inodes... + */ +static struct proc_dir_entry *proc_tty_ldisc, *proc_tty_driver; + +/* + * This is the handler for /proc/tty/drivers + */ +static void show_tty_range(struct seq_file *m, struct tty_driver *p, + dev_t from, int num) +{ + seq_printf(m, "%-20s ", p->driver_name ? p->driver_name : "unknown"); + seq_printf(m, "/dev/%-8s ", p->name); + if (p->num > 1) { + seq_printf(m, "%3d %d-%d ", MAJOR(from), MINOR(from), + MINOR(from) + num - 1); + } else { + seq_printf(m, "%3d %7d ", MAJOR(from), MINOR(from)); + } + switch (p->type) { + case TTY_DRIVER_TYPE_SYSTEM: + seq_printf(m, "system"); + if (p->subtype == SYSTEM_TYPE_TTY) + seq_printf(m, ":/dev/tty"); + else if (p->subtype == SYSTEM_TYPE_SYSCONS) + seq_printf(m, ":console"); + else if (p->subtype == SYSTEM_TYPE_CONSOLE) + seq_printf(m, ":vtmaster"); + break; + case TTY_DRIVER_TYPE_CONSOLE: + seq_printf(m, "console"); + break; + case TTY_DRIVER_TYPE_SERIAL: + seq_printf(m, "serial"); + break; + case TTY_DRIVER_TYPE_PTY: + if (p->subtype == PTY_TYPE_MASTER) + seq_printf(m, "pty:master"); + else if (p->subtype == PTY_TYPE_SLAVE) + seq_printf(m, "pty:slave"); + else + seq_printf(m, "pty"); + break; + default: + seq_printf(m, "type:%d.%d", p->type, p->subtype); + } + seq_putc(m, '\n'); +} + +static int show_tty_driver(struct seq_file *m, void *v) +{ + struct tty_driver *p = v; + dev_t from = MKDEV(p->major, p->minor_start); + dev_t to = from + p->num; + + if (&p->tty_drivers == tty_drivers.next) { + /* pseudo-drivers first */ + seq_printf(m, "%-20s /dev/%-8s ", "/dev/tty", "tty"); + seq_printf(m, "%3d %7d ", TTYAUX_MAJOR, 0); + seq_printf(m, "system:/dev/tty\n"); + seq_printf(m, "%-20s /dev/%-8s ", "/dev/console", "console"); + seq_printf(m, "%3d %7d ", TTYAUX_MAJOR, 1); + seq_printf(m, "system:console\n"); +#ifdef CONFIG_UNIX98_PTYS + seq_printf(m, "%-20s /dev/%-8s ", "/dev/ptmx", "ptmx"); + seq_printf(m, "%3d %7d ", TTYAUX_MAJOR, 2); + seq_printf(m, "system\n"); +#endif +#ifdef CONFIG_VT + seq_printf(m, "%-20s /dev/%-8s ", "/dev/vc/0", "vc/0"); + seq_printf(m, "%3d %7d ", TTY_MAJOR, 0); + seq_printf(m, "system:vtmaster\n"); +#endif + } + + while (MAJOR(from) < MAJOR(to)) { + dev_t next = MKDEV(MAJOR(from)+1, 0); + show_tty_range(m, p, from, next - from); + from = next; + } + if (from != to) + show_tty_range(m, p, from, to - from); + return 0; +} + +/* iterator */ +static void *t_start(struct seq_file *m, loff_t *pos) +{ + struct list_head *p; + loff_t l = *pos; + list_for_each(p, &tty_drivers) + if (!l--) + return list_entry(p, struct tty_driver, tty_drivers); + return NULL; +} + +static void *t_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct list_head *p = ((struct tty_driver *)v)->tty_drivers.next; + (*pos)++; + return p==&tty_drivers ? NULL : + list_entry(p, struct tty_driver, tty_drivers); +} + +static void t_stop(struct seq_file *m, void *v) +{ +} + +static struct seq_operations tty_drivers_op = { + .start = t_start, + .next = t_next, + .stop = t_stop, + .show = show_tty_driver +}; + +static int tty_drivers_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &tty_drivers_op); +} + +static struct file_operations proc_tty_drivers_operations = { + .open = tty_drivers_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +/* + * This is the handler for /proc/tty/ldiscs + */ +static int tty_ldiscs_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int i; + int len = 0; + off_t begin = 0; + struct tty_ldisc *ld; + + for (i=0; i < NR_LDISCS; i++) { + ld = tty_ldisc_get(i); + if (ld == NULL) + continue; + len += sprintf(page+len, "%-10s %2d\n", + ld->name ? ld->name : "???", i); + tty_ldisc_put(i); + if (len+begin > off+count) + break; + if (len+begin < off) { + begin += len; + len = 0; + } + } + if (i >= NR_LDISCS) + *eof = 1; + if (off >= len+begin) + return 0; + *start = page + (off-begin); + return ((count < begin+len-off) ? count : begin+len-off); +} + +/* + * This function is called by tty_register_driver() to handle + * registering the driver's /proc handler into /proc/tty/driver/<foo> + */ +void proc_tty_register_driver(struct tty_driver *driver) +{ + struct proc_dir_entry *ent; + + if ((!driver->read_proc && !driver->write_proc) || + !driver->driver_name || + driver->proc_entry) + return; + + ent = create_proc_entry(driver->driver_name, 0, proc_tty_driver); + if (!ent) + return; + ent->read_proc = driver->read_proc; + ent->write_proc = driver->write_proc; + ent->owner = driver->owner; + ent->data = driver; + + driver->proc_entry = ent; +} + +/* + * This function is called by tty_unregister_driver() + */ +void proc_tty_unregister_driver(struct tty_driver *driver) +{ + struct proc_dir_entry *ent; + + ent = driver->proc_entry; + if (!ent) + return; + + remove_proc_entry(driver->driver_name, proc_tty_driver); + + driver->proc_entry = NULL; +} + +/* + * Called by proc_root_init() to initialize the /proc/tty subtree + */ +void __init proc_tty_init(void) +{ + struct proc_dir_entry *entry; + if (!proc_mkdir("tty", NULL)) + return; + proc_tty_ldisc = proc_mkdir("tty/ldisc", NULL); + /* + * /proc/tty/driver/serial reveals the exact character counts for + * serial links which is just too easy to abuse for inferring + * password lengths and inter-keystroke timings during password + * entry. + */ + proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR | S_IXUSR, NULL); + + create_proc_read_entry("tty/ldiscs", 0, NULL, tty_ldiscs_read_proc, NULL); + entry = create_proc_entry("tty/drivers", 0, NULL); + if (entry) + entry->proc_fops = &proc_tty_drivers_operations; +} diff --git a/fs/proc/root.c b/fs/proc/root.c new file mode 100644 index 0000000..aef148f --- /dev/null +++ b/fs/proc/root.c @@ -0,0 +1,161 @@ +/* + * linux/fs/proc/root.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * proc root directory handling functions + */ + +#include <asm/uaccess.h> + +#include <linux/errno.h> +#include <linux/time.h> +#include <linux/proc_fs.h> +#include <linux/stat.h> +#include <linux/config.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/bitops.h> +#include <linux/smp_lock.h> + +struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver; + +#ifdef CONFIG_SYSCTL +struct proc_dir_entry *proc_sys_root; +#endif + +static struct super_block *proc_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return get_sb_single(fs_type, flags, data, proc_fill_super); +} + +static struct file_system_type proc_fs_type = { + .name = "proc", + .get_sb = proc_get_sb, + .kill_sb = kill_anon_super, +}; + +extern int __init proc_init_inodecache(void); +void __init proc_root_init(void) +{ + int err = proc_init_inodecache(); + if (err) + return; + err = register_filesystem(&proc_fs_type); + if (err) + return; + proc_mnt = kern_mount(&proc_fs_type); + err = PTR_ERR(proc_mnt); + if (IS_ERR(proc_mnt)) { + unregister_filesystem(&proc_fs_type); + return; + } + proc_misc_init(); + proc_net = proc_mkdir("net", NULL); + proc_net_stat = proc_mkdir("net/stat", NULL); + +#ifdef CONFIG_SYSVIPC + proc_mkdir("sysvipc", NULL); +#endif +#ifdef CONFIG_SYSCTL + proc_sys_root = proc_mkdir("sys", NULL); +#endif +#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE) + proc_mkdir("sys/fs", NULL); + proc_mkdir("sys/fs/binfmt_misc", NULL); +#endif + proc_root_fs = proc_mkdir("fs", NULL); + proc_root_driver = proc_mkdir("driver", NULL); + proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */ +#if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE) + /* just give it a mountpoint */ + proc_mkdir("openprom", NULL); +#endif + proc_tty_init(); +#ifdef CONFIG_PROC_DEVICETREE + proc_device_tree_init(); +#endif + proc_bus = proc_mkdir("bus", NULL); +} + +static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) +{ + /* + * nr_threads is actually protected by the tasklist_lock; + * however, it's conventional to do reads, especially for + * reporting, without any locking whatsoever. + */ + if (dir->i_ino == PROC_ROOT_INO) /* check for safety... */ + dir->i_nlink = proc_root.nlink + nr_threads; + + if (!proc_lookup(dir, dentry, nd)) { + return NULL; + } + + return proc_pid_lookup(dir, dentry, nd); +} + +static int proc_root_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + unsigned int nr = filp->f_pos; + int ret; + + lock_kernel(); + + if (nr < FIRST_PROCESS_ENTRY) { + int error = proc_readdir(filp, dirent, filldir); + if (error <= 0) { + unlock_kernel(); + return error; + } + filp->f_pos = FIRST_PROCESS_ENTRY; + } + unlock_kernel(); + + ret = proc_pid_readdir(filp, dirent, filldir); + return ret; +} + +/* + * The root /proc directory is special, as it has the + * <pid> directories. Thus we don't use the generic + * directory handling functions for that.. + */ +static struct file_operations proc_root_operations = { + .read = generic_read_dir, + .readdir = proc_root_readdir, +}; + +/* + * proc root can do almost nothing.. + */ +static struct inode_operations proc_root_inode_operations = { + .lookup = proc_root_lookup, +}; + +/* + * This is the root "inode" in the /proc tree.. + */ +struct proc_dir_entry proc_root = { + .low_ino = PROC_ROOT_INO, + .namelen = 5, + .name = "/proc", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + .nlink = 2, + .proc_iops = &proc_root_inode_operations, + .proc_fops = &proc_root_operations, + .parent = &proc_root, +}; + +EXPORT_SYMBOL(proc_symlink); +EXPORT_SYMBOL(proc_mkdir); +EXPORT_SYMBOL(create_proc_entry); +EXPORT_SYMBOL(remove_proc_entry); +EXPORT_SYMBOL(proc_root); +EXPORT_SYMBOL(proc_root_fs); +EXPORT_SYMBOL(proc_net); +EXPORT_SYMBOL(proc_net_stat); +EXPORT_SYMBOL(proc_bus); +EXPORT_SYMBOL(proc_root_driver); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c new file mode 100644 index 0000000..28b4a02 --- /dev/null +++ b/fs/proc/task_mmu.c @@ -0,0 +1,235 @@ +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/mount.h> +#include <linux/seq_file.h> +#include <asm/elf.h> +#include <asm/uaccess.h> +#include "internal.h" + +char *task_mem(struct mm_struct *mm, char *buffer) +{ + unsigned long data, text, lib; + + data = mm->total_vm - mm->shared_vm - mm->stack_vm; + text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10; + lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text; + buffer += sprintf(buffer, + "VmSize:\t%8lu kB\n" + "VmLck:\t%8lu kB\n" + "VmRSS:\t%8lu kB\n" + "VmData:\t%8lu kB\n" + "VmStk:\t%8lu kB\n" + "VmExe:\t%8lu kB\n" + "VmLib:\t%8lu kB\n" + "VmPTE:\t%8lu kB\n", + (mm->total_vm - mm->reserved_vm) << (PAGE_SHIFT-10), + mm->locked_vm << (PAGE_SHIFT-10), + get_mm_counter(mm, rss) << (PAGE_SHIFT-10), + data << (PAGE_SHIFT-10), + mm->stack_vm << (PAGE_SHIFT-10), text, lib, + (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10); + return buffer; +} + +unsigned long task_vsize(struct mm_struct *mm) +{ + return PAGE_SIZE * mm->total_vm; +} + +int task_statm(struct mm_struct *mm, int *shared, int *text, + int *data, int *resident) +{ + int rss = get_mm_counter(mm, rss); + + *shared = rss - get_mm_counter(mm, anon_rss); + *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) + >> PAGE_SHIFT; + *data = mm->total_vm - mm->shared_vm; + *resident = rss; + return mm->total_vm; +} + +int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +{ + struct vm_area_struct * vma; + int result = -ENOENT; + struct task_struct *task = proc_task(inode); + struct mm_struct * mm = get_task_mm(task); + + if (!mm) + goto out; + down_read(&mm->mmap_sem); + + vma = mm->mmap; + while (vma) { + if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) + break; + vma = vma->vm_next; + } + + if (vma) { + *mnt = mntget(vma->vm_file->f_vfsmnt); + *dentry = dget(vma->vm_file->f_dentry); + result = 0; + } + + up_read(&mm->mmap_sem); + mmput(mm); +out: + return result; +} + +static void pad_len_spaces(struct seq_file *m, int len) +{ + len = 25 + sizeof(void*) * 6 - len; + if (len < 1) + len = 1; + seq_printf(m, "%*c", len, ' '); +} + +static int show_map(struct seq_file *m, void *v) +{ + struct task_struct *task = m->private; + struct vm_area_struct *map = v; + struct mm_struct *mm = map->vm_mm; + struct file *file = map->vm_file; + int flags = map->vm_flags; + unsigned long ino = 0; + dev_t dev = 0; + int len; + + if (file) { + struct inode *inode = map->vm_file->f_dentry->d_inode; + dev = inode->i_sb->s_dev; + ino = inode->i_ino; + } + + seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", + map->vm_start, + map->vm_end, + flags & VM_READ ? 'r' : '-', + flags & VM_WRITE ? 'w' : '-', + flags & VM_EXEC ? 'x' : '-', + flags & VM_MAYSHARE ? 's' : 'p', + map->vm_pgoff << PAGE_SHIFT, + MAJOR(dev), MINOR(dev), ino, &len); + + /* + * Print the dentry name for named mappings, and a + * special [heap] marker for the heap: + */ + if (map->vm_file) { + pad_len_spaces(m, len); + seq_path(m, file->f_vfsmnt, file->f_dentry, ""); + } else { + if (mm) { + if (map->vm_start <= mm->start_brk && + map->vm_end >= mm->brk) { + pad_len_spaces(m, len); + seq_puts(m, "[heap]"); + } else { + if (map->vm_start <= mm->start_stack && + map->vm_end >= mm->start_stack) { + + pad_len_spaces(m, len); + seq_puts(m, "[stack]"); + } + } + } else { + pad_len_spaces(m, len); + seq_puts(m, "[vdso]"); + } + } + seq_putc(m, '\n'); + if (m->count < m->size) /* map is copied successfully */ + m->version = (map != get_gate_vma(task))? map->vm_start: 0; + return 0; +} + +static void *m_start(struct seq_file *m, loff_t *pos) +{ + struct task_struct *task = m->private; + unsigned long last_addr = m->version; + struct mm_struct *mm; + struct vm_area_struct *map, *tail_map; + loff_t l = *pos; + + /* + * We remember last_addr rather than next_addr to hit with + * mmap_cache most of the time. We have zero last_addr at + * the begining and also after lseek. We will have -1 last_addr + * after the end of the maps. + */ + + if (last_addr == -1UL) + return NULL; + + mm = get_task_mm(task); + if (!mm) + return NULL; + + tail_map = get_gate_vma(task); + down_read(&mm->mmap_sem); + + /* Start with last addr hint */ + if (last_addr && (map = find_vma(mm, last_addr))) { + map = map->vm_next; + goto out; + } + + /* + * Check the map index is within the range and do + * sequential scan until m_index. + */ + map = NULL; + if ((unsigned long)l < mm->map_count) { + map = mm->mmap; + while (l-- && map) + map = map->vm_next; + goto out; + } + + if (l != mm->map_count) + tail_map = NULL; /* After gate map */ + +out: + if (map) + return map; + + /* End of maps has reached */ + m->version = (tail_map != NULL)? 0: -1UL; + up_read(&mm->mmap_sem); + mmput(mm); + return tail_map; +} + +static void m_stop(struct seq_file *m, void *v) +{ + struct task_struct *task = m->private; + struct vm_area_struct *map = v; + if (map && map != get_gate_vma(task)) { + struct mm_struct *mm = map->vm_mm; + up_read(&mm->mmap_sem); + mmput(mm); + } +} + +static void *m_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct task_struct *task = m->private; + struct vm_area_struct *map = v; + struct vm_area_struct *tail_map = get_gate_vma(task); + + (*pos)++; + if (map && (map != tail_map) && map->vm_next) + return map->vm_next; + m_stop(m, v); + return (map != tail_map)? tail_map: NULL; +} + +struct seq_operations proc_pid_maps_op = { + .start = m_start, + .next = m_next, + .stop = m_stop, + .show = show_map +}; diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c new file mode 100644 index 0000000..8f68827 --- /dev/null +++ b/fs/proc/task_nommu.c @@ -0,0 +1,164 @@ + +#include <linux/mm.h> +#include <linux/file.h> +#include <linux/mount.h> +#include <linux/seq_file.h> +#include "internal.h" + +/* + * Logic: we've got two memory sums for each process, "shared", and + * "non-shared". Shared memory may get counted more then once, for + * each process that owns it. Non-shared memory is counted + * accurately. + */ +char *task_mem(struct mm_struct *mm, char *buffer) +{ + struct vm_list_struct *vml; + unsigned long bytes = 0, sbytes = 0, slack = 0; + + down_read(&mm->mmap_sem); + for (vml = mm->context.vmlist; vml; vml = vml->next) { + if (!vml->vma) + continue; + + bytes += kobjsize(vml); + if (atomic_read(&mm->mm_count) > 1 || + atomic_read(&vml->vma->vm_usage) > 1 + ) { + sbytes += kobjsize((void *) vml->vma->vm_start); + sbytes += kobjsize(vml->vma); + } else { + bytes += kobjsize((void *) vml->vma->vm_start); + bytes += kobjsize(vml->vma); + slack += kobjsize((void *) vml->vma->vm_start) - + (vml->vma->vm_end - vml->vma->vm_start); + } + } + + if (atomic_read(&mm->mm_count) > 1) + sbytes += kobjsize(mm); + else + bytes += kobjsize(mm); + + if (current->fs && atomic_read(¤t->fs->count) > 1) + sbytes += kobjsize(current->fs); + else + bytes += kobjsize(current->fs); + + if (current->files && atomic_read(¤t->files->count) > 1) + sbytes += kobjsize(current->files); + else + bytes += kobjsize(current->files); + + if (current->sighand && atomic_read(¤t->sighand->count) > 1) + sbytes += kobjsize(current->sighand); + else + bytes += kobjsize(current->sighand); + + bytes += kobjsize(current); /* includes kernel stack */ + + buffer += sprintf(buffer, + "Mem:\t%8lu bytes\n" + "Slack:\t%8lu bytes\n" + "Shared:\t%8lu bytes\n", + bytes, slack, sbytes); + + up_read(&mm->mmap_sem); + return buffer; +} + +unsigned long task_vsize(struct mm_struct *mm) +{ + struct vm_list_struct *tbp; + unsigned long vsize = 0; + + down_read(&mm->mmap_sem); + for (tbp = mm->context.vmlist; tbp; tbp = tbp->next) { + if (tbp->vma) + vsize += kobjsize((void *) tbp->vma->vm_start); + } + up_read(&mm->mmap_sem); + return vsize; +} + +int task_statm(struct mm_struct *mm, int *shared, int *text, + int *data, int *resident) +{ + struct vm_list_struct *tbp; + int size = kobjsize(mm); + + down_read(&mm->mmap_sem); + for (tbp = mm->context.vmlist; tbp; tbp = tbp->next) { + size += kobjsize(tbp); + if (tbp->vma) { + size += kobjsize(tbp->vma); + size += kobjsize((void *) tbp->vma->vm_start); + } + } + + size += (*text = mm->end_code - mm->start_code); + size += (*data = mm->start_stack - mm->start_data); + up_read(&mm->mmap_sem); + *resident = size; + return size; +} + +int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +{ + struct vm_list_struct *vml; + struct vm_area_struct *vma; + struct task_struct *task = proc_task(inode); + struct mm_struct *mm = get_task_mm(task); + int result = -ENOENT; + + if (!mm) + goto out; + down_read(&mm->mmap_sem); + + vml = mm->context.vmlist; + vma = NULL; + while (vml) { + if ((vml->vma->vm_flags & VM_EXECUTABLE) && vml->vma->vm_file) { + vma = vml->vma; + break; + } + vml = vml->next; + } + + if (vma) { + *mnt = mntget(vma->vm_file->f_vfsmnt); + *dentry = dget(vma->vm_file->f_dentry); + result = 0; + } + + up_read(&mm->mmap_sem); + mmput(mm); +out: + return result; +} + +/* + * Albert D. Cahalan suggested to fake entries for the traditional + * sections here. This might be worth investigating. + */ +static int show_map(struct seq_file *m, void *v) +{ + return 0; +} +static void *m_start(struct seq_file *m, loff_t *pos) +{ + return NULL; +} +static void m_stop(struct seq_file *m, void *v) +{ +} +static void *m_next(struct seq_file *m, void *v, loff_t *pos) +{ + return NULL; +} +struct seq_operations proc_pid_maps_op = { + .start = m_start, + .next = m_next, + .stop = m_stop, + .show = show_map +}; |