diff options
Diffstat (limited to 'arch/um/kernel')
26 files changed, 633 insertions, 990 deletions
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index a8918e80..614b8eb 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -8,25 +8,24 @@ clean-files := obj-y = config.o exec_kern.o exitcode.o \ helper.o init_task.o irq.o irq_user.o ksyms.o main.o mem.o mem_user.o \ - physmem.o process.o process_kern.o ptrace.o reboot.o resource.o \ - sigio_user.o sigio_kern.o signal_kern.o signal_user.o smp.o \ - syscall_kern.o sysrq.o tempfile.o time.o time_kern.o \ - tlb.o trap_kern.o trap_user.o uaccess_user.o um_arch.o umid.o \ - user_util.o + physmem.o process_kern.o ptrace.o reboot.o resource.o sigio_user.o \ + sigio_kern.o signal_kern.o signal_user.o smp.o syscall_kern.o sysrq.o \ + tempfile.o time.o time_kern.o tlb.o trap_kern.o trap_user.o \ + uaccess_user.o um_arch.o umid.o user_util.o obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o obj-$(CONFIG_GPROF) += gprof_syms.o obj-$(CONFIG_GCOV) += gmon_syms.o obj-$(CONFIG_TTY_LOG) += tty_log.o -obj-$(CONFIG_SYSCALL_DEBUG) += syscall_user.o +obj-$(CONFIG_SYSCALL_DEBUG) += syscall.o obj-$(CONFIG_MODE_TT) += tt/ obj-$(CONFIG_MODE_SKAS) += skas/ user-objs-$(CONFIG_TTY_LOG) += tty_log.o -USER_OBJS := $(user-objs-y) config.o helper.o main.o process.o tempfile.o \ - time.o tty_log.o umid.o user_util.o +USER_OBJS := $(user-objs-y) config.o helper.o main.o tempfile.o time.o \ + tty_log.o umid.o user_util.o include arch/um/scripts/Makefile.rules diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 9f18061..dcd8149 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -31,7 +31,7 @@ #include "kern_util.h" #include "irq_user.h" #include "irq_kern.h" - +#include "os.h" /* * Generic, controller-independent functions: @@ -168,13 +168,32 @@ void __init init_IRQ(void) } } -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ +int init_aio_irq(int irq, char *name, irqreturn_t (*handler)(int, void *, + struct pt_regs *)) +{ + int fds[2], err; + + err = os_pipe(fds, 1, 1); + if(err){ + printk("init_aio_irq - os_pipe failed, err = %d\n", -err); + goto out; + } + + err = um_request_irq(irq, fds[0], IRQ_READ, handler, + SA_INTERRUPT | SA_SAMPLE_RANDOM, name, + (void *) (long) fds[0]); + if(err){ + printk("init_aio_irq - : um_request_irq failed, err = %d\n", + err); + goto out_close; + } + + err = fds[1]; + goto out; + + out_close: + os_close_file(fds[0]); + os_close_file(fds[1]); + out: + return(err); +} diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c index 99439fa..32d3076 100644 --- a/arch/um/kernel/ksyms.c +++ b/arch/um/kernel/ksyms.c @@ -114,22 +114,3 @@ extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); EXPORT_SYMBOL(__read_lock_failed); #endif - -#ifdef CONFIG_HIGHMEM -EXPORT_SYMBOL(kmap); -EXPORT_SYMBOL(kunmap); -EXPORT_SYMBOL(kmap_atomic); -EXPORT_SYMBOL(kunmap_atomic); -EXPORT_SYMBOL(kmap_atomic_to_page); -#endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/main.c b/arch/um/kernel/main.c index 1e1a87f..d31027f 100644 --- a/arch/um/kernel/main.c +++ b/arch/um/kernel/main.c @@ -97,7 +97,7 @@ int main(int argc, char **argv, char **envp) exit(1); } -#ifdef UML_CONFIG_MODE_TT +#ifdef UML_CONFIG_CMDLINE_ON_HOST /* Allocate memory for thread command lines */ if(argc < 2 || strlen(argv[1]) < THREAD_NAME_LEN - 1){ diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index 5597bd3..64fa062 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -196,7 +196,7 @@ static void init_highmem(void) static void __init fixaddr_user_init( void) { -#if CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA +#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA long size = FIXADDR_USER_END - FIXADDR_USER_START; pgd_t *pgd; pud_t *pud; diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c deleted file mode 100644 index 67acd92..0000000 --- a/arch/um/kernel/process.c +++ /dev/null @@ -1,439 +0,0 @@ -/* - * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#include <stdio.h> -#include <unistd.h> -#include <signal.h> -#include <sched.h> -#include <errno.h> -#include <stdarg.h> -#include <stdlib.h> -#include <setjmp.h> -#include <sys/time.h> -#include <sys/wait.h> -#include <sys/mman.h> -#include <asm/unistd.h> -#include <asm/page.h> -#include "user_util.h" -#include "kern_util.h" -#include "user.h" -#include "process.h" -#include "signal_kern.h" -#include "signal_user.h" -#include "sysdep/ptrace.h" -#include "sysdep/sigcontext.h" -#include "irq_user.h" -#include "ptrace_user.h" -#include "time_user.h" -#include "init.h" -#include "os.h" -#include "uml-config.h" -#include "choose-mode.h" -#include "mode.h" -#include "tempfile.h" -#ifdef UML_CONFIG_MODE_SKAS -#include "skas.h" -#include "skas_ptrace.h" -#include "registers.h" -#endif - -void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int)) -{ - int flags = 0, pages; - - if(sig_stack != NULL){ - pages = (1 << UML_CONFIG_KERNEL_STACK_ORDER); - set_sigstack(sig_stack, pages * page_size()); - flags = SA_ONSTACK; - } - if(usr1_handler) set_handler(SIGUSR1, usr1_handler, flags, -1); -} - -void init_new_thread_signals(int altstack) -{ - int flags = altstack ? SA_ONSTACK : 0; - - set_handler(SIGSEGV, (__sighandler_t) sig_handler, flags, - SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - set_handler(SIGTRAP, (__sighandler_t) sig_handler, flags, - SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - set_handler(SIGFPE, (__sighandler_t) sig_handler, flags, - SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - set_handler(SIGILL, (__sighandler_t) sig_handler, flags, - SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - set_handler(SIGBUS, (__sighandler_t) sig_handler, flags, - SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - set_handler(SIGUSR2, (__sighandler_t) sig_handler, - flags, SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - signal(SIGHUP, SIG_IGN); - - init_irq_signals(altstack); -} - -struct tramp { - int (*tramp)(void *); - void *tramp_data; - unsigned long temp_stack; - int flags; - int pid; -}; - -/* See above for why sigkill is here */ - -int sigkill = SIGKILL; - -int outer_tramp(void *arg) -{ - struct tramp *t; - int sig = sigkill; - - t = arg; - t->pid = clone(t->tramp, (void *) t->temp_stack + page_size()/2, - t->flags, t->tramp_data); - if(t->pid > 0) wait_for_stop(t->pid, SIGSTOP, PTRACE_CONT, NULL); - kill(os_getpid(), sig); - _exit(0); -} - -int start_fork_tramp(void *thread_arg, unsigned long temp_stack, - int clone_flags, int (*tramp)(void *)) -{ - struct tramp arg; - unsigned long sp; - int new_pid, status, err; - - /* The trampoline will run on the temporary stack */ - sp = stack_sp(temp_stack); - - clone_flags |= CLONE_FILES | SIGCHLD; - - arg.tramp = tramp; - arg.tramp_data = thread_arg; - arg.temp_stack = temp_stack; - arg.flags = clone_flags; - - /* Start the process and wait for it to kill itself */ - new_pid = clone(outer_tramp, (void *) sp, clone_flags, &arg); - if(new_pid < 0) - return(new_pid); - - CATCH_EINTR(err = waitpid(new_pid, &status, 0)); - if(err < 0) - panic("Waiting for outer trampoline failed - errno = %d", - errno); - - if(!WIFSIGNALED(status) || (WTERMSIG(status) != SIGKILL)) - panic("outer trampoline didn't exit with SIGKILL, " - "status = %d", status); - - return(arg.pid); -} - -static int ptrace_child(void *arg) -{ - int ret; - int pid = os_getpid(), ppid = getppid(); - int sc_result; - - if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){ - perror("ptrace"); - os_kill_process(pid, 0); - } - os_stop_process(pid); - - /*This syscall will be intercepted by the parent. Don't call more than - * once, please.*/ - sc_result = os_getpid(); - - if (sc_result == pid) - ret = 1; /*Nothing modified by the parent, we are running - normally.*/ - else if (sc_result == ppid) - ret = 0; /*Expected in check_ptrace and check_sysemu when they - succeed in modifying the stack frame*/ - else - ret = 2; /*Serious trouble! This could be caused by a bug in - host 2.6 SKAS3/2.6 patch before release -V6, together - with a bug in the UML code itself.*/ - _exit(ret); -} - -static int start_ptraced_child(void **stack_out) -{ - void *stack; - unsigned long sp; - int pid, n, status; - - stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if(stack == MAP_FAILED) - panic("check_ptrace : mmap failed, errno = %d", errno); - sp = (unsigned long) stack + PAGE_SIZE - sizeof(void *); - pid = clone(ptrace_child, (void *) sp, SIGCHLD, NULL); - if(pid < 0) - panic("check_ptrace : clone failed, errno = %d", errno); - CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); - if(n < 0) - panic("check_ptrace : wait failed, errno = %d", errno); - if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) - panic("check_ptrace : expected SIGSTOP, got status = %d", - status); - - *stack_out = stack; - return(pid); -} - -/* When testing for SYSEMU support, if it is one of the broken versions, we must - * just avoid using sysemu, not panic, but only if SYSEMU features are broken. - * So only for SYSEMU features we test mustpanic, while normal host features - * must work anyway!*/ -static int stop_ptraced_child(int pid, void *stack, int exitcode, int mustpanic) -{ - int status, n, ret = 0; - - if(ptrace(PTRACE_CONT, pid, 0, 0) < 0) - panic("check_ptrace : ptrace failed, errno = %d", errno); - CATCH_EINTR(n = waitpid(pid, &status, 0)); - if(!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) { - int exit_with = WEXITSTATUS(status); - if (exit_with == 2) - printk("check_ptrace : child exited with status 2. " - "Serious trouble happening! Try updating your " - "host skas patch!\nDisabling SYSEMU support."); - printk("check_ptrace : child exited with exitcode %d, while " - "expecting %d; status 0x%x", exit_with, - exitcode, status); - if (mustpanic) - panic("\n"); - else - printk("\n"); - ret = -1; - } - - if(munmap(stack, PAGE_SIZE) < 0) - panic("check_ptrace : munmap failed, errno = %d", errno); - return ret; -} - -static int force_sysemu_disabled = 0; - -int ptrace_faultinfo = 1; -int proc_mm = 1; - -static int __init skas0_cmd_param(char *str, int* add) -{ - ptrace_faultinfo = proc_mm = 0; - return 0; -} - -static int __init nosysemu_cmd_param(char *str, int* add) -{ - force_sysemu_disabled = 1; - return 0; -} - -__uml_setup("skas0", skas0_cmd_param, - "skas0\n" - " Disables SKAS3 usage, so that SKAS0 is used, unless you \n" - " specify mode=tt.\n\n"); - -__uml_setup("nosysemu", nosysemu_cmd_param, - "nosysemu\n" - " Turns off syscall emulation patch for ptrace (SYSEMU) on.\n" - " SYSEMU is a performance-patch introduced by Laurent Vivier. It changes\n" - " behaviour of ptrace() and helps reducing host context switch rate.\n" - " To make it working, you need a kernel patch for your host, too.\n" - " See http://perso.wanadoo.fr/laurent.vivier/UML/ for further information.\n\n"); - -static void __init check_sysemu(void) -{ - void *stack; - int pid, syscall, n, status, count=0; - - printk("Checking syscall emulation patch for ptrace..."); - sysemu_supported = 0; - pid = start_ptraced_child(&stack); - - if(ptrace(PTRACE_SYSEMU, pid, 0, 0) < 0) - goto fail; - - CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); - if (n < 0) - panic("check_sysemu : wait failed, errno = %d", errno); - if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP)) - panic("check_sysemu : expected SIGTRAP, " - "got status = %d", status); - - n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET, - os_getpid()); - if(n < 0) - panic("check_sysemu : failed to modify system " - "call return, errno = %d", errno); - - if (stop_ptraced_child(pid, stack, 0, 0) < 0) - goto fail_stopped; - - sysemu_supported = 1; - printk("OK\n"); - set_using_sysemu(!force_sysemu_disabled); - - printk("Checking advanced syscall emulation patch for ptrace..."); - pid = start_ptraced_child(&stack); - while(1){ - count++; - if(ptrace(PTRACE_SYSEMU_SINGLESTEP, pid, 0, 0) < 0) - goto fail; - CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); - if(n < 0) - panic("check_ptrace : wait failed, errno = %d", errno); - if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP)) - panic("check_ptrace : expected (SIGTRAP|SYSCALL_TRAP), " - "got status = %d", status); - - syscall = ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_NR_OFFSET, - 0); - if(syscall == __NR_getpid){ - if (!count) - panic("check_ptrace : SYSEMU_SINGLESTEP doesn't singlestep"); - n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET, - os_getpid()); - if(n < 0) - panic("check_sysemu : failed to modify system " - "call return, errno = %d", errno); - break; - } - } - if (stop_ptraced_child(pid, stack, 0, 0) < 0) - goto fail_stopped; - - sysemu_supported = 2; - printk("OK\n"); - - if ( !force_sysemu_disabled ) - set_using_sysemu(sysemu_supported); - return; - -fail: - stop_ptraced_child(pid, stack, 1, 0); -fail_stopped: - printk("missing\n"); -} - -void __init check_ptrace(void) -{ - void *stack; - int pid, syscall, n, status; - - printk("Checking that ptrace can change system call numbers..."); - pid = start_ptraced_child(&stack); - - if (ptrace(PTRACE_OLDSETOPTIONS, pid, 0, (void *)PTRACE_O_TRACESYSGOOD) < 0) - panic("check_ptrace: PTRACE_SETOPTIONS failed, errno = %d", errno); - - while(1){ - if(ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) - panic("check_ptrace : ptrace failed, errno = %d", - errno); - CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); - if(n < 0) - panic("check_ptrace : wait failed, errno = %d", errno); - if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP + 0x80)) - panic("check_ptrace : expected SIGTRAP + 0x80, " - "got status = %d", status); - - syscall = ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_NR_OFFSET, - 0); - if(syscall == __NR_getpid){ - n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET, - __NR_getppid); - if(n < 0) - panic("check_ptrace : failed to modify system " - "call, errno = %d", errno); - break; - } - } - stop_ptraced_child(pid, stack, 0, 1); - printk("OK\n"); - check_sysemu(); -} - -int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr) -{ - sigjmp_buf buf; - int n; - - *jmp_ptr = &buf; - n = sigsetjmp(buf, 1); - if(n != 0) - return(n); - (*fn)(arg); - return(0); -} - -void forward_pending_sigio(int target) -{ - sigset_t sigs; - - if(sigpending(&sigs)) - panic("forward_pending_sigio : sigpending failed"); - if(sigismember(&sigs, SIGIO)) - kill(target, SIGIO); -} - -extern void *__syscall_stub_start, __syscall_stub_end; - -#ifdef UML_CONFIG_MODE_SKAS - -static inline void check_skas3_ptrace_support(void) -{ - struct ptrace_faultinfo fi; - void *stack; - int pid, n; - - printf("Checking for the skas3 patch in the host..."); - pid = start_ptraced_child(&stack); - - n = ptrace(PTRACE_FAULTINFO, pid, 0, &fi); - if (n < 0) { - ptrace_faultinfo = 0; - if(errno == EIO) - printf("not found\n"); - else { - perror("not found"); - } - } - else { - if (!ptrace_faultinfo) - printf("found but disabled on command line\n"); - else - printf("found\n"); - } - - init_registers(pid); - stop_ptraced_child(pid, stack, 1, 1); -} - -int can_do_skas(void) -{ - printf("Checking for /proc/mm..."); - if (os_access("/proc/mm", OS_ACC_W_OK) < 0) { - proc_mm = 0; - printf("not found\n"); - } else { - if (!proc_mm) - printf("found but disabled on command line\n"); - else - printf("found\n"); - } - - check_skas3_ptrace_support(); - return 1; -} -#else -int can_do_skas(void) -{ - return(0); -} -#endif diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile index d296d55..db36c7c 100644 --- a/arch/um/kernel/skas/Makefile +++ b/arch/um/kernel/skas/Makefile @@ -4,7 +4,7 @@ # obj-y := clone.o exec_kern.o mem.o mem_user.o mmu.o process.o process_kern.o \ - syscall_kern.o syscall_user.o tlb.o trap_user.o uaccess.o \ + syscall.o tlb.o trap_user.o uaccess.o subdir- := util diff --git a/arch/um/kernel/skas/include/mmu-skas.h b/arch/um/kernel/skas/include/mmu-skas.h index 278b72f..09536f8 100644 --- a/arch/um/kernel/skas/include/mmu-skas.h +++ b/arch/um/kernel/skas/include/mmu-skas.h @@ -6,11 +6,15 @@ #ifndef __SKAS_MMU_H #define __SKAS_MMU_H +#include "linux/config.h" #include "mm_id.h" struct mmu_context_skas { struct mm_id id; unsigned long last_page_table; +#ifdef CONFIG_3_LEVEL_PGTABLES + unsigned long last_pmd; +#endif }; extern void switch_mm_skas(struct mm_id * mm_idp); diff --git a/arch/um/kernel/skas/include/skas.h b/arch/um/kernel/skas/include/skas.h index d983ea8..0609347 100644 --- a/arch/um/kernel/skas/include/skas.h +++ b/arch/um/kernel/skas/include/skas.h @@ -24,28 +24,26 @@ extern void new_thread_proc(void *stack, void (*handler)(int sig)); extern void remove_sigstack(void); extern void new_thread_handler(int sig); extern void handle_syscall(union uml_pt_regs *regs); -extern int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, - int r, int w, int x, int phys_fd, unsigned long long offset); -extern int unmap(struct mm_id * mm_idp, void *addr, unsigned long len); +extern int map(struct mm_id * mm_idp, unsigned long virt, + unsigned long len, int r, int w, int x, int phys_fd, + unsigned long long offset, int done, void **data); +extern int unmap(struct mm_id * mm_idp, void *addr, unsigned long len, + int done, void **data); extern int protect(struct mm_id * mm_idp, unsigned long addr, - unsigned long len, int r, int w, int x); + unsigned long len, int r, int w, int x, int done, + void **data); extern void user_signal(int sig, union uml_pt_regs *regs, int pid); -extern int new_mm(int from); +extern int new_mm(int from, unsigned long stack); extern int start_userspace(unsigned long stub_stack); extern int copy_context_skas0(unsigned long stack, int pid); extern void get_skas_faultinfo(int pid, struct faultinfo * fi); extern long execute_syscall_skas(void *r); extern unsigned long current_stub_stack(void); +extern long run_syscall_stub(struct mm_id * mm_idp, + int syscall, unsigned long *args, long expected, + void **addr, int done); +extern long syscall_stub_data(struct mm_id * mm_idp, + unsigned long *data, int data_count, + void **addr, void **stub_addr); #endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/skas/mem_user.c b/arch/um/kernel/skas/mem_user.c index b0980ff..1d89640 100644 --- a/arch/um/kernel/skas/mem_user.c +++ b/arch/um/kernel/skas/mem_user.c @@ -5,13 +5,14 @@ #include <signal.h> #include <errno.h> +#include <string.h> #include <sys/mman.h> #include <sys/wait.h> #include <asm/page.h> #include <asm/unistd.h> #include "mem_user.h" #include "mem.h" -#include "mm_id.h" +#include "skas.h" #include "user.h" #include "os.h" #include "proc_mm.h" @@ -23,46 +24,155 @@ #include "uml-config.h" #include "sysdep/ptrace.h" #include "sysdep/stub.h" -#include "skas.h" -extern unsigned long syscall_stub, __syscall_stub_start; +extern unsigned long batch_syscall_stub, __syscall_stub_start; extern void wait_stub_done(int pid, int sig, char * fname); -static long run_syscall_stub(struct mm_id * mm_idp, int syscall, - unsigned long *args) +static inline unsigned long *check_init_stack(struct mm_id * mm_idp, + unsigned long *stack) +{ + if(stack == NULL){ + stack = (unsigned long *) mm_idp->stack + 2; + *stack = 0; + } + return stack; +} + +extern int proc_mm; + +int single_count = 0; +int multi_count = 0; +int multi_op_count = 0; + +static long do_syscall_stub(struct mm_id *mm_idp, void **addr) { + unsigned long regs[MAX_REG_NR]; + unsigned long *data; + unsigned long *syscall; + long ret, offset; int n, pid = mm_idp->u.pid; - unsigned long regs[MAX_REG_NR]; + + if(proc_mm) +#warning Need to look up userspace_pid by cpu + pid = userspace_pid[0]; + + multi_count++; get_safe_registers(regs); regs[REGS_IP_INDEX] = UML_CONFIG_STUB_CODE + - ((unsigned long) &syscall_stub - + ((unsigned long) &batch_syscall_stub - (unsigned long) &__syscall_stub_start); - /* XXX Don't have a define for starting a syscall */ - regs[REGS_SYSCALL_NR] = syscall; - regs[REGS_SYSCALL_ARG1] = args[0]; - regs[REGS_SYSCALL_ARG2] = args[1]; - regs[REGS_SYSCALL_ARG3] = args[2]; - regs[REGS_SYSCALL_ARG4] = args[3]; - regs[REGS_SYSCALL_ARG5] = args[4]; - regs[REGS_SYSCALL_ARG6] = args[5]; - n = ptrace_setregs(pid, regs); - if(n < 0){ - printk("run_syscall_stub : PTRACE_SETREGS failed, " - "errno = %d\n", n); - return(n); + n = ptrace_setregs(pid, regs); + if(n < 0) + panic("do_syscall_stub : PTRACE_SETREGS failed, errno = %d\n", + n); + + wait_stub_done(pid, 0, "do_syscall_stub"); + + /* When the stub stops, we find the following values on the + * beginning of the stack: + * (long )return_value + * (long )offset to failed sycall-data (0, if no error) + */ + ret = *((unsigned long *) mm_idp->stack); + offset = *((unsigned long *) mm_idp->stack + 1); + if (offset) { + data = (unsigned long *)(mm_idp->stack + + offset - UML_CONFIG_STUB_DATA); + syscall = (unsigned long *)((unsigned long)data + data[0]); + printk("do_syscall_stub: syscall %ld failed, return value = " + "0x%lx, expected return value = 0x%lx\n", + syscall[0], ret, syscall[7]); + printk(" syscall parameters: " + "0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", + syscall[1], syscall[2], syscall[3], + syscall[4], syscall[5], syscall[6]); + for(n = 1; n < data[0]/sizeof(long); n++) { + if(n == 1) + printk(" additional syscall data:"); + if(n % 4 == 1) + printk("\n "); + printk(" 0x%lx", data[n]); + } + if(n > 1) + printk("\n"); + } + else ret = 0; + + *addr = check_init_stack(mm_idp, NULL); + + return ret; +} + +long run_syscall_stub(struct mm_id * mm_idp, int syscall, + unsigned long *args, long expected, void **addr, + int done) +{ + unsigned long *stack = check_init_stack(mm_idp, *addr); + + if(done && *addr == NULL) + single_count++; + + *stack += sizeof(long); + stack += *stack / sizeof(long); + + *stack++ = syscall; + *stack++ = args[0]; + *stack++ = args[1]; + *stack++ = args[2]; + *stack++ = args[3]; + *stack++ = args[4]; + *stack++ = args[5]; + *stack++ = expected; + *stack = 0; + multi_op_count++; + + if(!done && ((((unsigned long) stack) & ~PAGE_MASK) < + PAGE_SIZE - 10 * sizeof(long))){ + *addr = stack; + return 0; } - wait_stub_done(pid, 0, "run_syscall_stub"); + return do_syscall_stub(mm_idp, addr); +} + +long syscall_stub_data(struct mm_id * mm_idp, + unsigned long *data, int data_count, + void **addr, void **stub_addr) +{ + unsigned long *stack; + int ret = 0; - return(*((unsigned long *) mm_idp->stack)); + /* If *addr still is uninitialized, it *must* contain NULL. + * Thus in this case do_syscall_stub correctly won't be called. + */ + if((((unsigned long) *addr) & ~PAGE_MASK) >= + PAGE_SIZE - (10 + data_count) * sizeof(long)) { + ret = do_syscall_stub(mm_idp, addr); + /* in case of error, don't overwrite data on stack */ + if(ret) + return ret; + } + + stack = check_init_stack(mm_idp, *addr); + *addr = stack; + + *stack = data_count * sizeof(long); + + memcpy(stack + 1, data, data_count * sizeof(long)); + + *stub_addr = (void *)(((unsigned long)(stack + 1) & ~PAGE_MASK) + + UML_CONFIG_STUB_DATA); + + return 0; } -int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, - int r, int w, int x, int phys_fd, unsigned long long offset) +int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, + int r, int w, int x, int phys_fd, unsigned long long offset, + int done, void **data) { - int prot, n; + int prot, ret; prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | (x ? PROT_EXEC : 0); @@ -70,6 +180,7 @@ int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, if(proc_mm){ struct proc_mm_op map; int fd = mm_idp->u.mm_fd; + map = ((struct proc_mm_op) { .op = MM_MMAP, .u = { .mmap = @@ -81,63 +192,61 @@ int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, .fd = phys_fd, .offset= offset } } } ); - n = os_write_file(fd, &map, sizeof(map)); - if(n != sizeof(map)) - printk("map : /proc/mm map failed, err = %d\n", -n); + ret = os_write_file(fd, &map, sizeof(map)); + if(ret != sizeof(map)) + printk("map : /proc/mm map failed, err = %d\n", -ret); + else ret = 0; } else { - long res; unsigned long args[] = { virt, len, prot, MAP_SHARED | MAP_FIXED, phys_fd, MMAP_OFFSET(offset) }; - res = run_syscall_stub(mm_idp, STUB_MMAP_NR, args); - if((void *) res == MAP_FAILED) - printk("mmap stub failed, errno = %d\n", res); + ret = run_syscall_stub(mm_idp, STUB_MMAP_NR, args, virt, + data, done); } - return 0; + return ret; } -int unmap(struct mm_id *mm_idp, void *addr, unsigned long len) +int unmap(struct mm_id * mm_idp, void *addr, unsigned long len, int done, + void **data) { - int n; + int ret; if(proc_mm){ struct proc_mm_op unmap; int fd = mm_idp->u.mm_fd; + unmap = ((struct proc_mm_op) { .op = MM_MUNMAP, .u = { .munmap = { .addr = (unsigned long) addr, .len = len } } } ); - n = os_write_file(fd, &unmap, sizeof(unmap)); - if(n != sizeof(unmap)) { - if(n < 0) - return(n); - else if(n > 0) - return(-EIO); - } + ret = os_write_file(fd, &unmap, sizeof(unmap)); + if(ret != sizeof(unmap)) + printk("unmap - proc_mm write returned %d\n", ret); + else ret = 0; } else { - int res; unsigned long args[] = { (unsigned long) addr, len, 0, 0, 0, 0 }; - res = run_syscall_stub(mm_idp, __NR_munmap, args); - if(res < 0) - printk("munmap stub failed, errno = %d\n", res); + ret = run_syscall_stub(mm_idp, __NR_munmap, args, 0, + data, done); + if(ret < 0) + printk("munmap stub failed, errno = %d\n", ret); } - return(0); + return ret; } -int protect(struct mm_id *mm_idp, unsigned long addr, unsigned long len, - int r, int w, int x) +int protect(struct mm_id * mm_idp, unsigned long addr, unsigned long len, + int r, int w, int x, int done, void **data) { struct proc_mm_op protect; - int prot, n; + int prot, ret; prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | (x ? PROT_EXEC : 0); @@ -152,20 +261,19 @@ int protect(struct mm_id *mm_idp, unsigned long addr, unsigned long len, .len = len, .prot = prot } } } ); - n = os_write_file(fd, &protect, sizeof(protect)); - if(n != sizeof(protect)) - panic("protect failed, err = %d", -n); + ret = os_write_file(fd, &protect, sizeof(protect)); + if(ret != sizeof(protect)) + printk("protect failed, err = %d", -ret); + else ret = 0; } else { - int res; unsigned long args[] = { addr, len, prot, 0, 0, 0 }; - res = run_syscall_stub(mm_idp, __NR_mprotect, args); - if(res < 0) - panic("mprotect stub failed, errno = %d\n", res); + ret = run_syscall_stub(mm_idp, __NR_mprotect, args, 0, + data, done); } - return(0); + return ret; } void before_mem_skas(unsigned long unused) diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index d232daa..240143b 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -56,6 +56,9 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc, */ mm->context.skas.last_page_table = pmd_page_kernel(*pmd); +#ifdef CONFIG_3_LEVEL_PGTABLES + mm->context.skas.last_pmd = (unsigned long) __va(pud_val(*pud)); +#endif *pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT)); *pte = pte_mkexec(*pte); @@ -77,23 +80,14 @@ int init_new_context_skas(struct task_struct *task, struct mm_struct *mm) struct mm_struct *cur_mm = current->mm; struct mm_id *cur_mm_id = &cur_mm->context.skas.id; struct mm_id *mm_id = &mm->context.skas.id; - unsigned long stack; - int from, ret; + unsigned long stack = 0; + int from, ret = -ENOMEM; - if(proc_mm){ - if((cur_mm != NULL) && (cur_mm != &init_mm)) - from = cur_mm->context.skas.id.u.mm_fd; - else from = -1; + if(!proc_mm || !ptrace_faultinfo){ + stack = get_zeroed_page(GFP_KERNEL); + if(stack == 0) + goto out; - ret = new_mm(from); - if(ret < 0){ - printk("init_new_context_skas - new_mm failed, " - "errno = %d\n", ret); - return ret; - } - mm_id->u.mm_fd = ret; - } - else { /* This zeros the entry that pgd_alloc didn't, needed since * we are about to reinitialize it, and want mm.nr_ptes to * be accurate. @@ -103,20 +97,30 @@ int init_new_context_skas(struct task_struct *task, struct mm_struct *mm) ret = init_stub_pte(mm, CONFIG_STUB_CODE, (unsigned long) &__syscall_stub_start); if(ret) - goto out; - - ret = -ENOMEM; - stack = get_zeroed_page(GFP_KERNEL); - if(stack == 0) - goto out; - mm_id->stack = stack; + goto out_free; ret = init_stub_pte(mm, CONFIG_STUB_DATA, stack); if(ret) goto out_free; mm->nr_ptes--; + } + mm_id->stack = stack; + if(proc_mm){ + if((cur_mm != NULL) && (cur_mm != &init_mm)) + from = cur_mm_id->u.mm_fd; + else from = -1; + + ret = new_mm(from, stack); + if(ret < 0){ + printk("init_new_context_skas - new_mm failed, " + "errno = %d\n", ret); + goto out_free; + } + mm_id->u.mm_fd = ret; + } + else { if((cur_mm != NULL) && (cur_mm != &init_mm)) mm_id->u.pid = copy_context_skas0(stack, cur_mm_id->u.pid); @@ -126,7 +130,8 @@ int init_new_context_skas(struct task_struct *task, struct mm_struct *mm) return 0; out_free: - free_page(mm_id->stack); + if(mm_id->stack != 0) + free_page(mm_id->stack); out: return ret; } @@ -137,9 +142,15 @@ void destroy_context_skas(struct mm_struct *mm) if(proc_mm) os_close_file(mmu->id.u.mm_fd); - else { + else os_kill_ptraced_process(mmu->id.u.pid, 1); + + if(!proc_mm || !ptrace_faultinfo){ free_page(mmu->id.stack); - free_page(mmu->last_page_table); + pte_free_kernel((pte_t *) mmu->last_page_table); + dec_page_state(nr_page_table_pages); +#ifdef CONFIG_3_LEVEL_PGTABLES + pmd_free((pmd_t *) mmu->last_pmd); +#endif } } diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index f228f8b..5cd0e99 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -138,6 +138,8 @@ static void handle_trap(int pid, union uml_pt_regs *regs, int local_using_sysemu } extern int __syscall_stub_start; +int stub_code_fd = -1; +__u64 stub_code_offset; static int userspace_tramp(void *stack) { @@ -152,31 +154,31 @@ static int userspace_tramp(void *stack) /* This has a pte, but it can't be mapped in with the usual * tlb_flush mechanism because this is part of that mechanism */ - int fd; - __u64 offset; - - fd = phys_mapping(to_phys(&__syscall_stub_start), &offset); addr = mmap64((void *) UML_CONFIG_STUB_CODE, page_size(), - PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset); + PROT_EXEC, MAP_FIXED | MAP_PRIVATE, + stub_code_fd, stub_code_offset); if(addr == MAP_FAILED){ - printk("mapping mmap stub failed, errno = %d\n", + printk("mapping stub code failed, errno = %d\n", errno); exit(1); } if(stack != NULL){ + int fd; + __u64 offset; + fd = phys_mapping(to_phys(stack), &offset); addr = mmap((void *) UML_CONFIG_STUB_DATA, page_size(), PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, fd, offset); if(addr == MAP_FAILED){ - printk("mapping segfault stack failed, " + printk("mapping stub stack failed, " "errno = %d\n", errno); exit(1); } } } - if(!ptrace_faultinfo && (stack != NULL)){ + if(!ptrace_faultinfo){ unsigned long v = UML_CONFIG_STUB_CODE + (unsigned long) stub_segv_handler - (unsigned long) &__syscall_stub_start; @@ -202,6 +204,10 @@ int start_userspace(unsigned long stub_stack) unsigned long sp; int pid, status, n, flags; + if ( stub_code_fd == -1 ) + stub_code_fd = phys_mapping(to_phys(&__syscall_stub_start), + &stub_code_offset); + stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if(stack == MAP_FAILED) @@ -363,6 +369,53 @@ int copy_context_skas0(unsigned long new_stack, int pid) return pid; } +/* + * This is used only, if proc_mm is available, while PTRACE_FAULTINFO + * isn't. Opening /proc/mm creates a new mm_context, which lacks the stub-pages + * Thus, we map them using /proc/mm-fd + */ +void map_stub_pages(int fd, unsigned long code, + unsigned long data, unsigned long stack) +{ + struct proc_mm_op mmop; + int n; + + mmop = ((struct proc_mm_op) { .op = MM_MMAP, + .u = + { .mmap = + { .addr = code, + .len = PAGE_SIZE, + .prot = PROT_EXEC, + .flags = MAP_FIXED | MAP_PRIVATE, + .fd = stub_code_fd, + .offset = stub_code_offset + } } }); + n = os_write_file(fd, &mmop, sizeof(mmop)); + if(n != sizeof(mmop)) + panic("map_stub_pages : /proc/mm map for code failed, " + "err = %d\n", -n); + + if ( stack ) { + __u64 map_offset; + int map_fd = phys_mapping(to_phys((void *)stack), &map_offset); + mmop = ((struct proc_mm_op) + { .op = MM_MMAP, + .u = + { .mmap = + { .addr = data, + .len = PAGE_SIZE, + .prot = PROT_READ | PROT_WRITE, + .flags = MAP_FIXED | MAP_SHARED, + .fd = map_fd, + .offset = map_offset + } } }); + n = os_write_file(fd, &mmop, sizeof(mmop)); + if(n != sizeof(mmop)) + panic("map_stub_pages : /proc/mm map for data failed, " + "err = %d\n", -n); + } +} + void new_thread(void *stack, void **switch_buf_ptr, void **fork_buf_ptr, void (*handler)(int)) { diff --git a/arch/um/kernel/skas/process_kern.c b/arch/um/kernel/skas/process_kern.c index cbabab1..3d1b227 100644 --- a/arch/um/kernel/skas/process_kern.c +++ b/arch/um/kernel/skas/process_kern.c @@ -129,7 +129,9 @@ int copy_thread_skas(int nr, unsigned long clone_flags, unsigned long sp, return(0); } -int new_mm(int from) +extern void map_stub_pages(int fd, unsigned long code, + unsigned long data, unsigned long stack); +int new_mm(int from, unsigned long stack) { struct proc_mm_op copy; int n, fd; @@ -148,6 +150,9 @@ int new_mm(int from) "err = %d\n", -n); } + if(!ptrace_faultinfo) + map_stub_pages(fd, CONFIG_STUB_CODE, CONFIG_STUB_DATA, stack); + return(fd); } diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c new file mode 100644 index 0000000..51fb940 --- /dev/null +++ b/arch/um/kernel/skas/syscall.c @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/sys.h" +#include "linux/ptrace.h" +#include "asm/errno.h" +#include "asm/unistd.h" +#include "asm/ptrace.h" +#include "asm/current.h" +#include "sysdep/syscalls.h" +#include "kern_util.h" +#include "syscall.h" + +void handle_syscall(union uml_pt_regs *r) +{ + struct pt_regs *regs = container_of(r, struct pt_regs, regs); + long result; + int syscall; +#ifdef UML_CONFIG_SYSCALL_DEBUG + int index; + + index = record_syscall_start(UPT_SYSCALL_NR(r)); +#endif + syscall_trace(r, 0); + + current->thread.nsyscalls++; + nsyscalls++; + + /* This should go in the declaration of syscall, but when I do that, + * strace -f -c bash -c 'ls ; ls' breaks, sometimes not tracing + * children at all, sometimes hanging when bash doesn't see the first + * ls exit. + * The assembly looks functionally the same to me. This is + * gcc version 4.0.1 20050727 (Red Hat 4.0.1-5) + * in case it's a compiler bug. + */ + syscall = UPT_SYSCALL_NR(r); + if((syscall >= NR_syscalls) || (syscall < 0)) + result = -ENOSYS; + else result = EXECUTE_SYSCALL(syscall, regs); + + REGS_SET_SYSCALL_RETURN(r->skas.regs, result); + + syscall_trace(r, 1); +#ifdef UML_CONFIG_SYSCALL_DEBUG + record_syscall_end(index, result); +#endif +} diff --git a/arch/um/kernel/skas/syscall_kern.c b/arch/um/kernel/skas/syscall_kern.c deleted file mode 100644 index bdf040c..0000000 --- a/arch/um/kernel/skas/syscall_kern.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) - * Licensed under the GPL - */ - -#include "linux/sys.h" -#include "linux/ptrace.h" -#include "asm/errno.h" -#include "asm/unistd.h" -#include "asm/ptrace.h" -#include "asm/current.h" -#include "sysdep/syscalls.h" -#include "kern_util.h" - -extern syscall_handler_t *sys_call_table[]; - -long execute_syscall_skas(void *r) -{ - struct pt_regs *regs = r; - long res; - int syscall; - - current->thread.nsyscalls++; - nsyscalls++; - syscall = UPT_SYSCALL_NR(®s->regs); - - if((syscall >= NR_syscalls) || (syscall < 0)) - res = -ENOSYS; - else res = EXECUTE_SYSCALL(syscall, regs); - - return(res); -} - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/skas/syscall_user.c b/arch/um/kernel/skas/syscall_user.c deleted file mode 100644 index 6b06649..0000000 --- a/arch/um/kernel/skas/syscall_user.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#include <stdlib.h> -#include <signal.h> -#include "kern_util.h" -#include "uml-config.h" -#include "syscall_user.h" -#include "sysdep/ptrace.h" -#include "sysdep/sigcontext.h" -#include "skas.h" - -void handle_syscall(union uml_pt_regs *regs) -{ - long result; -#ifdef UML_CONFIG_SYSCALL_DEBUG - int index; - - index = record_syscall_start(UPT_SYSCALL_NR(regs)); -#endif - - syscall_trace(regs, 0); - result = execute_syscall_skas(regs); - - REGS_SET_SYSCALL_RETURN(regs->skas.regs, result); - - syscall_trace(regs, 1); -#ifdef UML_CONFIG_SYSCALL_DEBUG - record_syscall_end(index, result); -#endif -} - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/skas/tlb.c b/arch/um/kernel/skas/tlb.c index 6230999..6e84963 100644 --- a/arch/um/kernel/skas/tlb.c +++ b/arch/um/kernel/skas/tlb.c @@ -18,33 +18,39 @@ #include "os.h" #include "tlb.h" -static void do_ops(union mm_context *mmu, struct host_vm_op *ops, int last) +static int do_ops(union mm_context *mmu, struct host_vm_op *ops, int last, + int finished, void **flush) { struct host_vm_op *op; - int i; + int i, ret = 0; - for(i = 0; i <= last; i++){ + for(i = 0; i <= last && !ret; i++){ op = &ops[i]; switch(op->type){ case MMAP: - map(&mmu->skas.id, op->u.mmap.addr, op->u.mmap.len, - op->u.mmap.r, op->u.mmap.w, op->u.mmap.x, - op->u.mmap.fd, op->u.mmap.offset); + ret = map(&mmu->skas.id, op->u.mmap.addr, + op->u.mmap.len, op->u.mmap.r, op->u.mmap.w, + op->u.mmap.x, op->u.mmap.fd, + op->u.mmap.offset, finished, flush); break; case MUNMAP: - unmap(&mmu->skas.id, (void *) op->u.munmap.addr, - op->u.munmap.len); + ret = unmap(&mmu->skas.id, + (void *) op->u.munmap.addr, + op->u.munmap.len, finished, flush); break; case MPROTECT: - protect(&mmu->skas.id, op->u.mprotect.addr, - op->u.mprotect.len, op->u.mprotect.r, - op->u.mprotect.w, op->u.mprotect.x); + ret = protect(&mmu->skas.id, op->u.mprotect.addr, + op->u.mprotect.len, op->u.mprotect.r, + op->u.mprotect.w, op->u.mprotect.x, + finished, flush); break; default: printk("Unknown op type %d in do_ops\n", op->type); break; } } + + return ret; } extern int proc_mm; diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c new file mode 100644 index 0000000..1429c13 --- /dev/null +++ b/arch/um/kernel/syscall.c @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "kern_util.h" +#include "syscall.h" +#include "os.h" + +struct { + int syscall; + int pid; + long result; + unsigned long long start; + unsigned long long end; +} syscall_record[1024]; + +int record_syscall_start(int syscall) +{ + int max, index; + + max = sizeof(syscall_record)/sizeof(syscall_record[0]); + index = next_syscall_index(max); + + syscall_record[index].syscall = syscall; + syscall_record[index].pid = current_pid(); + syscall_record[index].result = 0xdeadbeef; + syscall_record[index].start = os_usecs(); + return(index); +} + +void record_syscall_end(int index, long result) +{ + syscall_record[index].result = result; + syscall_record[index].end = os_usecs(); +} diff --git a/arch/um/kernel/syscall_user.c b/arch/um/kernel/syscall_user.c deleted file mode 100644 index 01b711e..0000000 --- a/arch/um/kernel/syscall_user.c +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#include <stdlib.h> -#include <sys/time.h> -#include "kern_util.h" -#include "syscall_user.h" - -struct { - int syscall; - int pid; - long result; - struct timeval start; - struct timeval end; -} syscall_record[1024]; - -int record_syscall_start(int syscall) -{ - int max, index; - - max = sizeof(syscall_record)/sizeof(syscall_record[0]); - index = next_syscall_index(max); - - syscall_record[index].syscall = syscall; - syscall_record[index].pid = current_pid(); - syscall_record[index].result = 0xdeadbeef; - gettimeofday(&syscall_record[index].start, NULL); - return(index); -} - -void record_syscall_end(int index, long result) -{ - syscall_record[index].result = result; - gettimeofday(&syscall_record[index].end, NULL); -} - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 83ec8d47..80ed618 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -15,12 +15,118 @@ #include "mem_user.h" #include "os.h" +static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, + int r, int w, int x, struct host_vm_op *ops, int *index, + int last_filled, union mm_context *mmu, void **flush, + int (*do_ops)(union mm_context *, struct host_vm_op *, + int, int, void **)) +{ + __u64 offset; + struct host_vm_op *last; + int fd, ret = 0; + + fd = phys_mapping(phys, &offset); + if(*index != -1){ + last = &ops[*index]; + if((last->type == MMAP) && + (last->u.mmap.addr + last->u.mmap.len == virt) && + (last->u.mmap.r == r) && (last->u.mmap.w == w) && + (last->u.mmap.x == x) && (last->u.mmap.fd == fd) && + (last->u.mmap.offset + last->u.mmap.len == offset)){ + last->u.mmap.len += len; + return 0; + } + } + + if(*index == last_filled){ + ret = (*do_ops)(mmu, ops, last_filled, 0, flush); + *index = -1; + } + + ops[++*index] = ((struct host_vm_op) { .type = MMAP, + .u = { .mmap = { + .addr = virt, + .len = len, + .r = r, + .w = w, + .x = x, + .fd = fd, + .offset = offset } + } }); + return ret; +} + +static int add_munmap(unsigned long addr, unsigned long len, + struct host_vm_op *ops, int *index, int last_filled, + union mm_context *mmu, void **flush, + int (*do_ops)(union mm_context *, struct host_vm_op *, + int, int, void **)) +{ + struct host_vm_op *last; + int ret = 0; + + if(*index != -1){ + last = &ops[*index]; + if((last->type == MUNMAP) && + (last->u.munmap.addr + last->u.mmap.len == addr)){ + last->u.munmap.len += len; + return 0; + } + } + + if(*index == last_filled){ + ret = (*do_ops)(mmu, ops, last_filled, 0, flush); + *index = -1; + } + + ops[++*index] = ((struct host_vm_op) { .type = MUNMAP, + .u = { .munmap = { + .addr = addr, + .len = len } } }); + return ret; +} + +static int add_mprotect(unsigned long addr, unsigned long len, int r, int w, + int x, struct host_vm_op *ops, int *index, + int last_filled, union mm_context *mmu, void **flush, + int (*do_ops)(union mm_context *, struct host_vm_op *, + int, int, void **)) +{ + struct host_vm_op *last; + int ret = 0; + + if(*index != -1){ + last = &ops[*index]; + if((last->type == MPROTECT) && + (last->u.mprotect.addr + last->u.mprotect.len == addr) && + (last->u.mprotect.r == r) && (last->u.mprotect.w == w) && + (last->u.mprotect.x == x)){ + last->u.mprotect.len += len; + return 0; + } + } + + if(*index == last_filled){ + ret = (*do_ops)(mmu, ops, last_filled, 0, flush); + *index = -1; + } + + ops[++*index] = ((struct host_vm_op) { .type = MPROTECT, + .u = { .mprotect = { + .addr = addr, + .len = len, + .r = r, + .w = w, + .x = x } } }); + return ret; +} + #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1)) void fix_range_common(struct mm_struct *mm, unsigned long start_addr, unsigned long end_addr, int force, - void (*do_ops)(union mm_context *, struct host_vm_op *, - int)) + int (*do_ops)(union mm_context *, struct host_vm_op *, + int, int, void **)) { pgd_t *npgd; pud_t *npud; @@ -29,21 +135,24 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, union mm_context *mmu = &mm->context; unsigned long addr, end; int r, w, x; - struct host_vm_op ops[16]; + struct host_vm_op ops[1]; + void *flush = NULL; int op_index = -1, last_op = sizeof(ops) / sizeof(ops[0]) - 1; + int ret = 0; if(mm == NULL) return; - for(addr = start_addr; addr < end_addr;){ + ops[0].type = NONE; + for(addr = start_addr; addr < end_addr && !ret;){ npgd = pgd_offset(mm, addr); if(!pgd_present(*npgd)){ end = ADD_ROUND(addr, PGDIR_SIZE); if(end > end_addr) end = end_addr; if(force || pgd_newpage(*npgd)){ - op_index = add_munmap(addr, end - addr, ops, - op_index, last_op, mmu, - do_ops); + ret = add_munmap(addr, end - addr, ops, + &op_index, last_op, mmu, + &flush, do_ops); pgd_mkuptodate(*npgd); } addr = end; @@ -56,9 +165,9 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, if(end > end_addr) end = end_addr; if(force || pud_newpage(*npud)){ - op_index = add_munmap(addr, end - addr, ops, - op_index, last_op, mmu, - do_ops); + ret = add_munmap(addr, end - addr, ops, + &op_index, last_op, mmu, + &flush, do_ops); pud_mkuptodate(*npud); } addr = end; @@ -71,9 +180,9 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, if(end > end_addr) end = end_addr; if(force || pmd_newpage(*npmd)){ - op_index = add_munmap(addr, end - addr, ops, - op_index, last_op, mmu, - do_ops); + ret = add_munmap(addr, end - addr, ops, + &op_index, last_op, mmu, + &flush, do_ops); pmd_mkuptodate(*npmd); } addr = end; @@ -92,24 +201,32 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, } if(force || pte_newpage(*npte)){ if(pte_present(*npte)) - op_index = add_mmap(addr, - pte_val(*npte) & PAGE_MASK, - PAGE_SIZE, r, w, x, ops, - op_index, last_op, mmu, - do_ops); - else op_index = add_munmap(addr, PAGE_SIZE, ops, - op_index, last_op, mmu, - do_ops); + ret = add_mmap(addr, + pte_val(*npte) & PAGE_MASK, + PAGE_SIZE, r, w, x, ops, + &op_index, last_op, mmu, + &flush, do_ops); + else ret = add_munmap(addr, PAGE_SIZE, ops, + &op_index, last_op, mmu, + &flush, do_ops); } else if(pte_newprot(*npte)) - op_index = add_mprotect(addr, PAGE_SIZE, r, w, x, ops, - op_index, last_op, mmu, - do_ops); + ret = add_mprotect(addr, PAGE_SIZE, r, w, x, ops, + &op_index, last_op, mmu, + &flush, do_ops); *npte = pte_mkuptodate(*npte); addr += PAGE_SIZE; } - (*do_ops)(mmu, ops, op_index); + + if(!ret) + ret = (*do_ops)(mmu, ops, op_index, 1, &flush); + + /* This is not an else because ret is modified above */ + if(ret) { + printk("fix_range_common: failed, killing current process\n"); + force_sig(SIGKILL, current); + } } int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) @@ -226,106 +343,6 @@ pte_t *addr_pte(struct task_struct *task, unsigned long addr) return(pte_offset_map(pmd, addr)); } -int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, - int r, int w, int x, struct host_vm_op *ops, int index, - int last_filled, union mm_context *mmu, - void (*do_ops)(union mm_context *, struct host_vm_op *, int)) -{ - __u64 offset; - struct host_vm_op *last; - int fd; - - fd = phys_mapping(phys, &offset); - if(index != -1){ - last = &ops[index]; - if((last->type == MMAP) && - (last->u.mmap.addr + last->u.mmap.len == virt) && - (last->u.mmap.r == r) && (last->u.mmap.w == w) && - (last->u.mmap.x == x) && (last->u.mmap.fd == fd) && - (last->u.mmap.offset + last->u.mmap.len == offset)){ - last->u.mmap.len += len; - return(index); - } - } - - if(index == last_filled){ - (*do_ops)(mmu, ops, last_filled); - index = -1; - } - - ops[++index] = ((struct host_vm_op) { .type = MMAP, - .u = { .mmap = { - .addr = virt, - .len = len, - .r = r, - .w = w, - .x = x, - .fd = fd, - .offset = offset } - } }); - return(index); -} - -int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *ops, - int index, int last_filled, union mm_context *mmu, - void (*do_ops)(union mm_context *, struct host_vm_op *, int)) -{ - struct host_vm_op *last; - - if(index != -1){ - last = &ops[index]; - if((last->type == MUNMAP) && - (last->u.munmap.addr + last->u.mmap.len == addr)){ - last->u.munmap.len += len; - return(index); - } - } - - if(index == last_filled){ - (*do_ops)(mmu, ops, last_filled); - index = -1; - } - - ops[++index] = ((struct host_vm_op) { .type = MUNMAP, - .u = { .munmap = { - .addr = addr, - .len = len } } }); - return(index); -} - -int add_mprotect(unsigned long addr, unsigned long len, int r, int w, int x, - struct host_vm_op *ops, int index, int last_filled, - union mm_context *mmu, - void (*do_ops)(union mm_context *, struct host_vm_op *, int)) -{ - struct host_vm_op *last; - - if(index != -1){ - last = &ops[index]; - if((last->type == MPROTECT) && - (last->u.mprotect.addr + last->u.mprotect.len == addr) && - (last->u.mprotect.r == r) && (last->u.mprotect.w == w) && - (last->u.mprotect.x == x)){ - last->u.mprotect.len += len; - return(index); - } - } - - if(index == last_filled){ - (*do_ops)(mmu, ops, last_filled); - index = -1; - } - - ops[++index] = ((struct host_vm_op) { .type = MPROTECT, - .u = { .mprotect = { - .addr = addr, - .len = len, - .r = r, - .w = w, - .x = x } } }); - return(index); -} - void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) { address &= PAGE_MASK; diff --git a/arch/um/kernel/trap_kern.c b/arch/um/kernel/trap_kern.c index c20aef1..b5fc89f 100644 --- a/arch/um/kernel/trap_kern.c +++ b/arch/um/kernel/trap_kern.c @@ -26,6 +26,7 @@ #include "mem.h" #include "mem_kern.h" +/* Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by segv(). */ int handle_page_fault(unsigned long address, unsigned long ip, int is_write, int is_user, int *code_out) { @@ -35,7 +36,6 @@ int handle_page_fault(unsigned long address, unsigned long ip, pud_t *pud; pmd_t *pmd; pte_t *pte; - unsigned long page; int err = -EFAULT; *code_out = SEGV_MAPERR; @@ -52,7 +52,7 @@ int handle_page_fault(unsigned long address, unsigned long ip, else if(expand_stack(vma, address)) goto out; - good_area: +good_area: *code_out = SEGV_ACCERR; if(is_write && !(vma->vm_flags & VM_WRITE)) goto out; @@ -60,9 +60,8 @@ int handle_page_fault(unsigned long address, unsigned long ip, if(!(vma->vm_flags & (VM_READ | VM_EXEC))) goto out; - page = address & PAGE_MASK; do { - survive: +survive: switch (handle_mm_fault(mm, vma, address, is_write)){ case VM_FAULT_MINOR: current->min_flt++; @@ -79,16 +78,16 @@ int handle_page_fault(unsigned long address, unsigned long ip, default: BUG(); } - pgd = pgd_offset(mm, page); - pud = pud_offset(pgd, page); - pmd = pmd_offset(pud, page); - pte = pte_offset_kernel(pmd, page); + pgd = pgd_offset(mm, address); + pud = pud_offset(pgd, address); + pmd = pmd_offset(pud, address); + pte = pte_offset_kernel(pmd, address); } while(!pte_present(*pte)); err = 0; *pte = pte_mkyoung(*pte); if(pte_write(*pte)) *pte = pte_mkdirty(*pte); - flush_tlb_page(vma, page); - out: + flush_tlb_page(vma, address); +out: up_read(&mm->mmap_sem); return(err); @@ -144,19 +143,18 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, void *sc) panic("Kernel mode fault at addr 0x%lx, ip 0x%lx", address, ip); - if(err == -EACCES){ + if (err == -EACCES) { si.si_signo = SIGBUS; si.si_errno = 0; si.si_code = BUS_ADRERR; si.si_addr = (void *)address; current->thread.arch.faultinfo = fi; force_sig_info(SIGBUS, &si, current); - } - else if(err == -ENOMEM){ + } else if (err == -ENOMEM) { printk("VM: killing process %s\n", current->comm); do_exit(SIGKILL); - } - else { + } else { + BUG_ON(err != -EFAULT); si.si_signo = SIGSEGV; si.si_addr = (void *) address; current->thread.arch.faultinfo = fi; @@ -200,30 +198,3 @@ void winch(int sig, union uml_pt_regs *regs) void trap_init(void) { } - -DEFINE_SPINLOCK(trap_lock); - -static int trap_index = 0; - -int next_trap_index(int limit) -{ - int ret; - - spin_lock(&trap_lock); - ret = trap_index; - if(++trap_index == limit) - trap_index = 0; - spin_unlock(&trap_lock); - return(ret); -} - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/trap_user.c b/arch/um/kernel/trap_user.c index f825a6e..e9ccd6b 100644 --- a/arch/um/kernel/trap_user.c +++ b/arch/um/kernel/trap_user.c @@ -40,35 +40,14 @@ void kill_child_dead(int pid) } while(1); } -/* Unlocked - don't care if this is a bit off */ -int nsegfaults = 0; - -struct { - unsigned long address; - int is_write; - int pid; - unsigned long sp; - int is_user; -} segfault_record[1024]; - void segv_handler(int sig, union uml_pt_regs *regs) { - int index, max; struct faultinfo * fi = UPT_FAULTINFO(regs); if(UPT_IS_USER(regs) && !SEGV_IS_FIXABLE(fi)){ bad_segv(*fi, UPT_IP(regs)); return; } - max = sizeof(segfault_record)/sizeof(segfault_record[0]); - index = next_trap_index(max); - - nsegfaults++; - segfault_record[index].address = FAULT_ADDRESS(*fi); - segfault_record[index].pid = os_getpid(); - segfault_record[index].is_write = FAULT_WRITE(*fi); - segfault_record[index].sp = UPT_SP(regs); - segfault_record[index].is_user = UPT_IS_USER(regs); segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs); } diff --git a/arch/um/kernel/tt/syscall_kern.c b/arch/um/kernel/tt/syscall_kern.c index 2650a62..3d29c90 100644 --- a/arch/um/kernel/tt/syscall_kern.c +++ b/arch/um/kernel/tt/syscall_kern.c @@ -12,36 +12,41 @@ #include "asm/uaccess.h" #include "asm/stat.h" #include "sysdep/syscalls.h" +#include "sysdep/sigcontext.h" #include "kern_util.h" +#include "syscall.h" -extern syscall_handler_t *sys_call_table[]; - -long execute_syscall_tt(void *r) +void syscall_handler_tt(int sig, struct pt_regs *regs) { - struct pt_regs *regs = r; - long res; + void *sc; + long result; int syscall; - #ifdef CONFIG_SYSCALL_DEBUG + int index; + index = record_syscall_start(syscall); +#endif + sc = UPT_SC(®s->regs); + SC_START_SYSCALL(sc); + + syscall_trace(®s->regs, 0); + current->thread.nsyscalls++; nsyscalls++; -#endif syscall = UPT_SYSCALL_NR(®s->regs); if((syscall >= NR_syscalls) || (syscall < 0)) - res = -ENOSYS; - else res = EXECUTE_SYSCALL(syscall, regs); + result = -ENOSYS; + else result = EXECUTE_SYSCALL(syscall, regs); - return(res); -} + /* regs->sc may have changed while the system call ran (there may + * have been an interrupt or segfault), so it needs to be refreshed. + */ + UPT_SC(®s->regs) = sc; -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ + SC_SET_SYSCALL_RETURN(sc, result); + + syscall_trace(®s->regs, 1); +#ifdef CONFIG_SYSCALL_DEBUG + record_syscall_end(index, result); +#endif +} diff --git a/arch/um/kernel/tt/syscall_user.c b/arch/um/kernel/tt/syscall_user.c index b218316..902987b 100644 --- a/arch/um/kernel/tt/syscall_user.c +++ b/arch/um/kernel/tt/syscall_user.c @@ -13,42 +13,9 @@ #include "task.h" #include "user_util.h" #include "kern_util.h" -#include "syscall_user.h" +#include "syscall.h" #include "tt.h" - -void syscall_handler_tt(int sig, union uml_pt_regs *regs) -{ - void *sc; - long result; - int syscall; -#ifdef UML_CONFIG_DEBUG_SYSCALL - int index; -#endif - - syscall = UPT_SYSCALL_NR(regs); - sc = UPT_SC(regs); - SC_START_SYSCALL(sc); - -#ifdef UML_CONFIG_DEBUG_SYSCALL - index = record_syscall_start(syscall); -#endif - syscall_trace(regs, 0); - result = execute_syscall_tt(regs); - - /* regs->sc may have changed while the system call ran (there may - * have been an interrupt or segfault), so it needs to be refreshed. - */ - UPT_SC(regs) = sc; - - SC_SET_SYSCALL_RETURN(sc, result); - - syscall_trace(regs, 1); -#ifdef UML_CONFIG_DEBUG_SYSCALL - record_syscall_end(index, result); -#endif -} - void do_sigtrap(void *task) { UPT_SYSCALL_NR(TASK_REGS(task)) = -1; diff --git a/arch/um/kernel/tt/tlb.c b/arch/um/kernel/tt/tlb.c index 2eefb43..f1d85db 100644 --- a/arch/um/kernel/tt/tlb.c +++ b/arch/um/kernel/tt/tlb.c @@ -17,25 +17,31 @@ #include "os.h" #include "tlb.h" -static void do_ops(union mm_context *mmu, struct host_vm_op *ops, int last) +static int do_ops(union mm_context *mmu, struct host_vm_op *ops, int last, + int finished, void **flush) { struct host_vm_op *op; - int i; + int i, ret=0; - for(i = 0; i <= last; i++){ + for(i = 0; i <= last && !ret; i++){ op = &ops[i]; switch(op->type){ case MMAP: - os_map_memory((void *) op->u.mmap.addr, op->u.mmap.fd, - op->u.mmap.offset, op->u.mmap.len, - op->u.mmap.r, op->u.mmap.w, - op->u.mmap.x); + ret = os_map_memory((void *) op->u.mmap.addr, + op->u.mmap.fd, op->u.mmap.offset, + op->u.mmap.len, op->u.mmap.r, + op->u.mmap.w, op->u.mmap.x); break; case MUNMAP: - os_unmap_memory((void *) op->u.munmap.addr, - op->u.munmap.len); + ret = os_unmap_memory((void *) op->u.munmap.addr, + op->u.munmap.len); break; case MPROTECT: + ret = protect_memory(op->u.mprotect.addr, + op->u.munmap.len, + op->u.mprotect.r, + op->u.mprotect.w, + op->u.mprotect.x, 1); protect_memory(op->u.mprotect.addr, op->u.munmap.len, op->u.mprotect.r, op->u.mprotect.w, op->u.mprotect.x, 1); @@ -45,6 +51,8 @@ static void do_ops(union mm_context *mmu, struct host_vm_op *ops, int last) break; } } + + return ret; } static void fix_range(struct mm_struct *mm, unsigned long start_addr, diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index ca2bb6f..09f6f7c 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -126,7 +126,7 @@ unsigned long start_vm; unsigned long end_vm; int ncpus = 1; -#ifdef CONFIG_MODE_TT +#ifdef CONFIG_CMDLINE_ON_HOST /* Pointer set in linux_main, the array itself is private to each thread, * and changed at address space creation time so this poses no concurrency * problems. @@ -141,7 +141,7 @@ long physmem_size = 32 * 1024 * 1024; void set_cmdline(char *cmd) { -#ifdef CONFIG_MODE_TT +#ifdef CONFIG_CMDLINE_ON_HOST char *umid, *ptr; if(CHOOSE_MODE(honeypot, 0)) return; @@ -333,6 +333,7 @@ int linux_main(int argc, char **argv) if(have_root == 0) add_arg(DEFAULT_COMMAND_LINE); + os_early_checks(); mode_tt = force_tt ? 1 : !can_do_skas(); #ifndef CONFIG_MODE_TT if (mode_tt) { @@ -385,7 +386,7 @@ int linux_main(int argc, char **argv) setup_machinename(system_utsname.machine); -#ifdef CONFIG_MODE_TT +#ifdef CONFIG_CMDLINE_ON_HOST argv1_begin = argv[1]; argv1_end = &argv[1][strlen(argv[1])]; #endif @@ -470,7 +471,6 @@ void __init setup_arch(char **cmdline_p) void __init check_bugs(void) { arch_check_bugs(); - check_ptrace(); check_sigio(); check_devanon(); } |