diff options
author | Richard Weinberger <richard@nod.at> | 2011-07-25 17:12:54 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-25 20:57:13 -0700 |
commit | f1c2bb8b9964ed31de988910f8b1cfb586d30091 (patch) | |
tree | ccf58acd47dbf659dca3087b7ebf2704ffd14aba | |
parent | fc9a00187ba1300a0baae8e613cc62598e1a7de7 (diff) | |
download | op-kernel-dev-f1c2bb8b9964ed31de988910f8b1cfb586d30091.zip op-kernel-dev-f1c2bb8b9964ed31de988910f8b1cfb586d30091.tar.gz |
um: implement a x86_64 vDSO
Until now UML had no x86_64 vDSO. So glibc always used the vsyscall page
for gettimeday() and friends. Calls to gettimeday() returned falsely the
host time and confused some programs.
This patch adds a vDSO which turns all __vdso_* calls into a system call
so that UML can trap them.
As glibc still uses the vsyscall page for static binaries this patch
improves the situation only for dynamic binaries.
Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | arch/um/sys-x86_64/Makefile | 2 | ||||
-rw-r--r-- | arch/um/sys-x86_64/vdso/Makefile | 90 | ||||
-rw-r--r-- | arch/um/sys-x86_64/vdso/checkundef.sh | 10 | ||||
-rw-r--r-- | arch/um/sys-x86_64/vdso/um_vdso.c | 71 | ||||
-rw-r--r-- | arch/um/sys-x86_64/vdso/vdso-layout.lds.S | 64 | ||||
-rw-r--r-- | arch/um/sys-x86_64/vdso/vdso-note.S | 12 | ||||
-rw-r--r-- | arch/um/sys-x86_64/vdso/vdso.S | 10 | ||||
-rw-r--r-- | arch/um/sys-x86_64/vdso/vdso.lds.S | 32 | ||||
-rw-r--r-- | arch/um/sys-x86_64/vdso/vma.c | 74 |
9 files changed, 365 insertions, 0 deletions
diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile index bf2fe83..bd4d1d3 100644 --- a/arch/um/sys-x86_64/Makefile +++ b/arch/um/sys-x86_64/Makefile @@ -8,6 +8,8 @@ obj-y = bug.o bugs.o delay.o fault.o ldt.o ptrace.o ptrace_user.o mem.o \ setjmp.o signal.o stub.o stub_segv.o syscalls.o syscall_table.o \ sysrq.o ksyms.o tls.o +obj-y += vdso/ + subarch-obj-y = lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o \ lib/rwsem.o subarch-obj-$(CONFIG_MODULES) += kernel/module.o diff --git a/arch/um/sys-x86_64/vdso/Makefile b/arch/um/sys-x86_64/vdso/Makefile new file mode 100644 index 0000000..5dffe6d --- /dev/null +++ b/arch/um/sys-x86_64/vdso/Makefile @@ -0,0 +1,90 @@ +# +# Building vDSO images for x86. +# + +VDSO64-y := y + +vdso-install-$(VDSO64-y) += vdso.so + + +# files to link into the vdso +vobjs-y := vdso-note.o um_vdso.o + +# files to link into kernel +obj-$(VDSO64-y) += vdso.o vma.o + +vobjs := $(foreach F,$(vobjs-y),$(obj)/$F) + +$(obj)/vdso.o: $(obj)/vdso.so + +targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y) + +export CPPFLAGS_vdso.lds += -P -C + +VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ + -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 + +$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so + +$(obj)/vdso.so.dbg: $(src)/vdso.lds $(vobjs) FORCE + $(call if_changed,vdso) + +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# +# Don't omit frame pointers for ease of userspace debugging, but do +# optimize sibling calls. +# +CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ + $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \ + -fno-omit-frame-pointer -foptimize-sibling-calls + +$(vobjs): KBUILD_CFLAGS += $(CFL) + +# +# vDSO code runs in userspace and -pg doesn't help with profiling anyway. +# +CFLAGS_REMOVE_vdso-note.o = -pg +CFLAGS_REMOVE_um_vdso.o = -pg + +targets += vdso-syms.lds +obj-$(VDSO64-y) += vdso-syms.lds + +# +# Match symbols in the DSO that look like VDSO*; produce a file of constants. +# +sed-vdsosym := -e 's/^00*/0/' \ + -e 's/^\([0-9a-fA-F]*\) . \(VDSO[a-zA-Z0-9_]*\)$$/\2 = 0x\1;/p' +quiet_cmd_vdsosym = VDSOSYM $@ +define cmd_vdsosym + $(NM) $< | LC_ALL=C sed -n $(sed-vdsosym) | LC_ALL=C sort > $@ +endef + +$(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE + $(call if_changed,vdsosym) + +# +# The DSO images are built using a special linker script. +# +quiet_cmd_vdso = VDSO $@ + cmd_vdso = $(CC) -nostdlib -o $@ \ + $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ + -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \ + sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@' + +VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +GCOV_PROFILE := n + +# +# Install the unstripped copy of vdso*.so listed in $(vdso-install-y). +# +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ +$(vdso-install-y): %.so: $(obj)/%.so.dbg FORCE + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +PHONY += vdso_install $(vdso-install-y) +vdso_install: $(vdso-install-y) diff --git a/arch/um/sys-x86_64/vdso/checkundef.sh b/arch/um/sys-x86_64/vdso/checkundef.sh new file mode 100644 index 0000000..7ee90a9 --- /dev/null +++ b/arch/um/sys-x86_64/vdso/checkundef.sh @@ -0,0 +1,10 @@ +#!/bin/sh +nm="$1" +file="$2" +$nm "$file" | grep '^ *U' > /dev/null 2>&1 +if [ $? -eq 1 ]; then + exit 0 +else + echo "$file: undefined symbols found" >&2 + exit 1 +fi diff --git a/arch/um/sys-x86_64/vdso/um_vdso.c b/arch/um/sys-x86_64/vdso/um_vdso.c new file mode 100644 index 0000000..7c441b5 --- /dev/null +++ b/arch/um/sys-x86_64/vdso/um_vdso.c @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2011 Richard Weinberger <richrd@nod.at> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This vDSO turns all calls into a syscall so that UML can trap them. + */ + + +/* Disable profiling for userspace code */ +#define DISABLE_BRANCH_PROFILING + +#include <linux/time.h> +#include <linux/getcpu.h> +#include <asm/unistd.h> + +int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) +{ + long ret; + + asm("syscall" : "=a" (ret) : + "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory"); + + return ret; +} +int clock_gettime(clockid_t, struct timespec *) + __attribute__((weak, alias("__vdso_clock_gettime"))); + +int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) +{ + long ret; + + asm("syscall" : "=a" (ret) : + "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); + + return ret; +} +int gettimeofday(struct timeval *, struct timezone *) + __attribute__((weak, alias("__vdso_gettimeofday"))); + +time_t __vdso_time(time_t *t) +{ + long secs; + + asm volatile("syscall" + : "=a" (secs) + : "0" (__NR_time), "D" (t) : "cc", "r11", "cx", "memory"); + + return secs; +} +int time(time_t *t) __attribute__((weak, alias("__vdso_time"))); + +long +__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) +{ + /* + * UML does not support SMP, we can cheat here. :) + */ + + if (cpu) + *cpu = 0; + if (node) + *node = 0; + + return 0; +} + +long getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) + __attribute__((weak, alias("__vdso_getcpu"))); diff --git a/arch/um/sys-x86_64/vdso/vdso-layout.lds.S b/arch/um/sys-x86_64/vdso/vdso-layout.lds.S new file mode 100644 index 0000000..634a2cf --- /dev/null +++ b/arch/um/sys-x86_64/vdso/vdso-layout.lds.S @@ -0,0 +1,64 @@ +/* + * Linker script for vDSO. This is an ELF shared object prelinked to + * its virtual address, and with only one read-only segment. + * This script controls its layout. + */ + +SECTIONS +{ + . = VDSO_PRELINK + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + + .dynamic : { *(.dynamic) } :text :dynamic + + .rodata : { *(.rodata*) } :text + .data : { + *(.data*) + *(.sdata*) + *(.got.plt) *(.got) + *(.gnu.linkonce.d.*) + *(.bss*) + *(.dynbss*) + *(.gnu.linkonce.b.*) + } + + .altinstructions : { *(.altinstructions) } + .altinstr_replacement : { *(.altinstr_replacement) } + + /* + * Align the actual code well away from the non-instruction data. + * This is the best thing for the I-cache. + */ + . = ALIGN(0x100); + + .text : { *(.text*) } :text =0x90909090 +} + +/* + * Very old versions of ld do not recognize this name token; use the constant. + */ +#define PT_GNU_EH_FRAME 0x6474e550 + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} diff --git a/arch/um/sys-x86_64/vdso/vdso-note.S b/arch/um/sys-x86_64/vdso/vdso-note.S new file mode 100644 index 0000000..79a071e --- /dev/null +++ b/arch/um/sys-x86_64/vdso/vdso-note.S @@ -0,0 +1,12 @@ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include <linux/uts.h> +#include <linux/version.h> +#include <linux/elfnote.h> + +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END diff --git a/arch/um/sys-x86_64/vdso/vdso.S b/arch/um/sys-x86_64/vdso/vdso.S new file mode 100644 index 0000000..ec82c16 --- /dev/null +++ b/arch/um/sys-x86_64/vdso/vdso.S @@ -0,0 +1,10 @@ +#include <linux/init.h> + +__INITDATA + + .globl vdso_start, vdso_end +vdso_start: + .incbin "arch/um/sys-x86_64/vdso/vdso.so" +vdso_end: + +__FINIT diff --git a/arch/um/sys-x86_64/vdso/vdso.lds.S b/arch/um/sys-x86_64/vdso/vdso.lds.S new file mode 100644 index 0000000..b96b267 --- /dev/null +++ b/arch/um/sys-x86_64/vdso/vdso.lds.S @@ -0,0 +1,32 @@ +/* + * Linker script for 64-bit vDSO. + * We #include the file to define the layout details. + * Here we only choose the prelinked virtual address. + * + * This file defines the version script giving the user-exported symbols in + * the DSO. We can define local symbols here called VDSO* to make their + * values visible using the asm-x86/vdso.h macros from the kernel proper. + */ + +#define VDSO_PRELINK 0xffffffffff700000 +#include "vdso-layout.lds.S" + +/* + * This controls what userland symbols we export from the vDSO. + */ +VERSION { + LINUX_2.6 { + global: + clock_gettime; + __vdso_clock_gettime; + gettimeofday; + __vdso_gettimeofday; + getcpu; + __vdso_getcpu; + time; + __vdso_time; + local: *; + }; +} + +VDSO64_PRELINK = VDSO_PRELINK; diff --git a/arch/um/sys-x86_64/vdso/vma.c b/arch/um/sys-x86_64/vdso/vma.c new file mode 100644 index 0000000..9495c8d --- /dev/null +++ b/arch/um/sys-x86_64/vdso/vma.c @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2011 Richard Weinberger <richrd@nod.at> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <asm/page.h> +#include <linux/init.h> + +unsigned int __read_mostly vdso_enabled = 1; +unsigned long um_vdso_addr; + +extern unsigned long task_size; +extern char vdso_start[], vdso_end[]; + +static struct page **vdsop; + +static int __init init_vdso(void) +{ + struct page *um_vdso; + + BUG_ON(vdso_end - vdso_start > PAGE_SIZE); + + um_vdso_addr = task_size - PAGE_SIZE; + + vdsop = kmalloc(GFP_KERNEL, sizeof(struct page *)); + if (!vdsop) + goto oom; + + um_vdso = alloc_page(GFP_KERNEL); + if (!um_vdso) { + kfree(vdsop); + + goto oom; + } + + copy_page(page_address(um_vdso), vdso_start); + *vdsop = um_vdso; + + return 0; + +oom: + printk(KERN_ERR "Cannot allocate vdso\n"); + vdso_enabled = 0; + + return -ENOMEM; +} +subsys_initcall(init_vdso); + +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + int err; + struct mm_struct *mm = current->mm; + + if (!vdso_enabled) + return 0; + + down_write(&mm->mmap_sem); + + err = install_special_mapping(mm, um_vdso_addr, PAGE_SIZE, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| + VM_ALWAYSDUMP, + vdsop); + + up_write(&mm->mmap_sem); + + return err; +} |