From 52fdd08903a1d1162e184114837e232640191627 Mon Sep 17 00:00:00 2001 From: Benjamin LaHaise Date: Sat, 3 Sep 2005 15:56:52 -0700 Subject: [PATCH] unify x86/x86-64 semaphore code This patch moves the common code in x86 and x86-64's semaphore.c into a single file in lib/semaphore-sleepers.c. The arch specific asm stubs are left in the arch tree (in semaphore.c for i386 and in the asm for x86-64). There should be no changes in code/functionality with this patch. Signed-off-by: Benjamin LaHaise Cc: Andi Kleen Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/Kconfig_i386 | 4 ++++ arch/um/Kconfig_x86_64 | 4 ++++ arch/um/sys-x86_64/Makefile | 5 ++--- 3 files changed, 10 insertions(+), 3 deletions(-) (limited to 'arch/um') diff --git a/arch/um/Kconfig_i386 b/arch/um/Kconfig_i386 index 27c18a8..8ad156a 100644 --- a/arch/um/Kconfig_i386 +++ b/arch/um/Kconfig_i386 @@ -6,6 +6,10 @@ config 64BIT bool default n +config SEMAPHORE_SLEEPERS + bool + default y + config TOP_ADDR hex default 0xc0000000 if !HOST_2G_2G diff --git a/arch/um/Kconfig_x86_64 b/arch/um/Kconfig_x86_64 index 735a047..6e5357c 100644 --- a/arch/um/Kconfig_x86_64 +++ b/arch/um/Kconfig_x86_64 @@ -6,6 +6,10 @@ config 64BIT bool default y +config SEMAPHORE_SLEEPERS + bool + default y + config TOP_ADDR hex default 0x80000000 diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile index 7488206..736d3ed 100644 --- a/arch/um/sys-x86_64/Makefile +++ b/arch/um/sys-x86_64/Makefile @@ -6,7 +6,7 @@ #XXX: why into lib-y? lib-y = bitops.o bugs.o csum-partial.o delay.o fault.o mem.o memcpy.o \ - ptrace.o ptrace_user.o semaphore.o sigcontext.o signal.o stub.o \ + ptrace.o ptrace_user.o sigcontext.o signal.o stub.o \ stub_segv.o syscalls.o syscall_table.o sysrq.o thunk.o obj-y := ksyms.o @@ -15,7 +15,7 @@ obj-$(CONFIG_MODULES) += module.o um_module.o USER_OBJS := ptrace_user.o sigcontext.o SYMLINKS = bitops.c csum-copy.S csum-partial.c csum-wrappers.c memcpy.S \ - semaphore.c thunk.S module.c + thunk.S module.c include arch/um/scripts/Makefile.rules @@ -24,7 +24,6 @@ csum-copy.S-dir = lib csum-partial.c-dir = lib csum-wrappers.c-dir = lib memcpy.S-dir = lib -semaphore.c-dir = kernel thunk.S-dir = lib module.c-dir = kernel -- cgit v1.1 From 96e59245e1abf3ea2e98c4b9ee2ebd975db653db Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Sat, 3 Sep 2005 15:57:12 -0700 Subject: [PATCH] uml: remove debugging code from page fault path This eliminates the segfault info ring buffer, which added a system call to each page fault, and which hadn't been useful for debugging in ages. Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/trap_kern.c | 27 --------------------------- arch/um/kernel/trap_user.c | 21 --------------------- 2 files changed, 48 deletions(-) (limited to 'arch/um') diff --git a/arch/um/kernel/trap_kern.c b/arch/um/kernel/trap_kern.c index c20aef1..bef8abd 100644 --- a/arch/um/kernel/trap_kern.c +++ b/arch/um/kernel/trap_kern.c @@ -200,30 +200,3 @@ void winch(int sig, union uml_pt_regs *regs) void trap_init(void) { } - -DEFINE_SPINLOCK(trap_lock); - -static int trap_index = 0; - -int next_trap_index(int limit) -{ - int ret; - - spin_lock(&trap_lock); - ret = trap_index; - if(++trap_index == limit) - trap_index = 0; - spin_unlock(&trap_lock); - return(ret); -} - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/trap_user.c b/arch/um/kernel/trap_user.c index f825a6e..e9ccd6b 100644 --- a/arch/um/kernel/trap_user.c +++ b/arch/um/kernel/trap_user.c @@ -40,35 +40,14 @@ void kill_child_dead(int pid) } while(1); } -/* Unlocked - don't care if this is a bit off */ -int nsegfaults = 0; - -struct { - unsigned long address; - int is_write; - int pid; - unsigned long sp; - int is_user; -} segfault_record[1024]; - void segv_handler(int sig, union uml_pt_regs *regs) { - int index, max; struct faultinfo * fi = UPT_FAULTINFO(regs); if(UPT_IS_USER(regs) && !SEGV_IS_FIXABLE(fi)){ bad_segv(*fi, UPT_IP(regs)); return; } - max = sizeof(segfault_record)/sizeof(segfault_record[0]); - index = next_trap_index(max); - - nsegfaults++; - segfault_record[index].address = FAULT_ADDRESS(*fi); - segfault_record[index].pid = os_getpid(); - segfault_record[index].is_write = FAULT_WRITE(*fi); - segfault_record[index].sp = UPT_SP(regs); - segfault_record[index].is_user = UPT_IS_USER(regs); segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs); } -- cgit v1.1 From 08b178ebf37bbfb78329e0ae6ea688b103d205bf Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Sat, 3 Sep 2005 15:57:12 -0700 Subject: [PATCH] uml: Rename Kconfig files to be like the other arches To the extent that sub-Kconfig files exist elsewhere in the tree, they are named Kconfig.foo, rather than the Kconfig_foo that UML has. This patch brings the names in line with the rest of the tree. Signed-off-by: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/Kconfig | 8 +- arch/um/Kconfig.char | 214 +++++++++++++++++++++++++++++++++++++++++++++++++ arch/um/Kconfig.i386 | 44 ++++++++++ arch/um/Kconfig.net | 180 +++++++++++++++++++++++++++++++++++++++++ arch/um/Kconfig.scsi | 58 ++++++++++++++ arch/um/Kconfig.x86_64 | 39 +++++++++ arch/um/Kconfig_char | 214 ------------------------------------------------- arch/um/Kconfig_i386 | 44 ---------- arch/um/Kconfig_net | 180 ----------------------------------------- arch/um/Kconfig_scsi | 58 -------------- arch/um/Kconfig_x86_64 | 39 --------- arch/um/Makefile | 8 +- 12 files changed, 543 insertions(+), 543 deletions(-) create mode 100644 arch/um/Kconfig.char create mode 100644 arch/um/Kconfig.i386 create mode 100644 arch/um/Kconfig.net create mode 100644 arch/um/Kconfig.scsi create mode 100644 arch/um/Kconfig.x86_64 delete mode 100644 arch/um/Kconfig_char delete mode 100644 arch/um/Kconfig_i386 delete mode 100644 arch/um/Kconfig_net delete mode 100644 arch/um/Kconfig_scsi delete mode 100644 arch/um/Kconfig_x86_64 (limited to 'arch/um') diff --git a/arch/um/Kconfig b/arch/um/Kconfig index f945444..d5e22f0 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -73,7 +73,7 @@ config MODE_SKAS to CONFIG_MODE_TT). Otherwise, it is safe to say Y. Disabling this option will shrink the UML binary slightly. -source "arch/um/Kconfig_arch" +source "arch/um/Kconfig.arch" source "mm/Kconfig" config LD_SCRIPT_STATIC @@ -279,7 +279,7 @@ source "net/Kconfig" source "drivers/base/Kconfig" -source "arch/um/Kconfig_char" +source "arch/um/Kconfig.char" source "drivers/block/Kconfig" @@ -287,7 +287,7 @@ config NETDEVICES bool default NET -source "arch/um/Kconfig_net" +source "arch/um/Kconfig.net" source "drivers/net/Kconfig" @@ -311,7 +311,7 @@ config GENERIC_ISA_DMA depends on SCSI default y -source "arch/um/Kconfig_scsi" +source "arch/um/Kconfig.scsi" endmenu diff --git a/arch/um/Kconfig.char b/arch/um/Kconfig.char new file mode 100644 index 0000000..62d87b7 --- /dev/null +++ b/arch/um/Kconfig.char @@ -0,0 +1,214 @@ + +menu "Character Devices" + +config STDERR_CONSOLE + bool "stderr console" + default y + help + console driver which dumps all printk messages to stderr. + +config STDIO_CONSOLE + bool + default y + +config SSL + bool "Virtual serial line" + help + The User-Mode Linux environment allows you to create virtual serial + lines on the UML that are usually made to show up on the host as + ttys or ptys. + + See for more + information and command line examples of how to use this facility. + + Unless you have a specific reason for disabling this, say Y. + +config NULL_CHAN + bool "null channel support" + help + This option enables support for attaching UML consoles and serial + lines to a device similar to /dev/null. Data written to it disappears + and there is never any data to be read. + +config PORT_CHAN + bool "port channel support" + help + This option enables support for attaching UML consoles and serial + lines to host portals. They may be accessed with 'telnet + '. Any number of consoles and serial lines may be + attached to a single portal, although what UML device you get when + you telnet to that portal will be unpredictable. + It is safe to say 'Y' here. + +config PTY_CHAN + bool "pty channel support" + help + This option enables support for attaching UML consoles and serial + lines to host pseudo-terminals. Access to both traditional + pseudo-terminals (/dev/pty*) and pts pseudo-terminals are controlled + with this option. The assignment of UML devices to host devices + will be announced in the kernel message log. + It is safe to say 'Y' here. + +config TTY_CHAN + bool "tty channel support" + help + This option enables support for attaching UML consoles and serial + lines to host terminals. Access to both virtual consoles + (/dev/tty*) and the slave side of pseudo-terminals (/dev/ttyp* and + /dev/pts/*) are controlled by this option. + It is safe to say 'Y' here. + +config XTERM_CHAN + bool "xterm channel support" + help + This option enables support for attaching UML consoles and serial + lines to xterms. Each UML device so assigned will be brought up in + its own xterm. + If you disable this option, then CONFIG_PT_PROXY will be disabled as + well, since UML's gdb currently requires an xterm. + It is safe to say 'Y' here. + +config NOCONFIG_CHAN + bool + default !(XTERM_CHAN && TTY_CHAN && PTY_CHAN && PORT_CHAN && NULL_CHAN) + +config CON_ZERO_CHAN + string "Default main console channel initialization" + default "fd:0,fd:1" + help + This is the string describing the channel to which the main console + will be attached by default. This value can be overridden from the + command line. The default value is "fd:0,fd:1", which attaches the + main console to stdin and stdout. + It is safe to leave this unchanged. + +config CON_CHAN + string "Default console channel initialization" + default "xterm" + help + This is the string describing the channel to which all consoles + except the main console will be attached by default. This value can + be overridden from the command line. The default value is "xterm", + which brings them up in xterms. + It is safe to leave this unchanged, although you may wish to change + this if you expect the UML that you build to be run in environments + which don't have X or xterm available. + +config SSL_CHAN + string "Default serial line channel initialization" + default "pty" + help + This is the string describing the channel to which the serial lines + will be attached by default. This value can be overridden from the + command line. The default value is "pty", which attaches them to + traditional pseudo-terminals. + It is safe to leave this unchanged, although you may wish to change + this if you expect the UML that you build to be run in environments + which don't have a set of /dev/pty* devices. + +config UNIX98_PTYS + bool "Unix98 PTY support" + ---help--- + A pseudo terminal (PTY) is a software device consisting of two + halves: a master and a slave. The slave device behaves identical to + a physical terminal; the master device is used by a process to + read data from and write data to the slave, thereby emulating a + terminal. Typical programs for the master side are telnet servers + and xterms. + + Linux has traditionally used the BSD-like names /dev/ptyxx for + masters and /dev/ttyxx for slaves of pseudo terminals. This scheme + has a number of problems. The GNU C library glibc 2.1 and later, + however, supports the Unix98 naming standard: in order to acquire a + pseudo terminal, a process opens /dev/ptmx; the number of the pseudo + terminal is then made available to the process and the pseudo + terminal slave can be accessed as /dev/pts/. What was + traditionally /dev/ttyp2 will then be /dev/pts/2, for example. + + All modern Linux systems use the Unix98 ptys. Say Y unless + you're on an embedded system and want to conserve memory. + +config LEGACY_PTYS + bool "Legacy (BSD) PTY support" + default y + ---help--- + A pseudo terminal (PTY) is a software device consisting of two + halves: a master and a slave. The slave device behaves identical to + a physical terminal; the master device is used by a process to + read data from and write data to the slave, thereby emulating a + terminal. Typical programs for the master side are telnet servers + and xterms. + + Linux has traditionally used the BSD-like names /dev/ptyxx + for masters and /dev/ttyxx for slaves of pseudo + terminals. This scheme has a number of problems, including + security. This option enables these legacy devices; on most + systems, it is safe to say N. + + +config LEGACY_PTY_COUNT + int "Maximum number of legacy PTY in use" + depends on LEGACY_PTYS + default "256" + ---help--- + The maximum number of legacy PTYs that can be used at any one time. + The default is 256, and should be more than enough. Embedded + systems may want to reduce this to save memory. + + When not in use, each legacy PTY occupies 12 bytes on 32-bit + architectures and 24 bytes on 64-bit architectures. + +config WATCHDOG + bool "Watchdog Timer Support" + +config WATCHDOG_NOWAYOUT + bool "Disable watchdog shutdown on close" + depends on WATCHDOG + +config SOFT_WATCHDOG + tristate "Software Watchdog" + depends on WATCHDOG + +config UML_WATCHDOG + tristate "UML watchdog" + depends on WATCHDOG + +config UML_SOUND + tristate "Sound support" + help + This option enables UML sound support. If enabled, it will pull in + soundcore and the UML hostaudio relay, which acts as a intermediary + between the host's dsp and mixer devices and the UML sound system. + It is safe to say 'Y' here. + +config SOUND + tristate + default UML_SOUND + +config HOSTAUDIO + tristate + default UML_SOUND + +config UML_RANDOM + tristate "Hardware random number generator" + help + This option enables UML's "hardware" random number generator. It + attaches itself to the host's /dev/random, supplying as much entropy + as the host has, rather than the small amount the UML gets from its + own drivers. It registers itself as a standard hardware random number + generator, major 10, minor 183, and the canonical device name is + /dev/hwrng. + The way to make use of this is to install the rng-tools package + (check your distro, or download from + http://sourceforge.net/projects/gkernel/). rngd periodically reads + /dev/hwrng and injects the entropy into /dev/random. + +config MMAPPER + tristate "iomem emulation driver" + help + This driver allows a host file to be used as emulated IO memory inside + UML. + +endmenu + diff --git a/arch/um/Kconfig.i386 b/arch/um/Kconfig.i386 new file mode 100644 index 0000000..8ad156a --- /dev/null +++ b/arch/um/Kconfig.i386 @@ -0,0 +1,44 @@ +config UML_X86 + bool + default y + +config 64BIT + bool + default n + +config SEMAPHORE_SLEEPERS + bool + default y + +config TOP_ADDR + hex + default 0xc0000000 if !HOST_2G_2G + default 0x80000000 if HOST_2G_2G + +config 3_LEVEL_PGTABLES + bool "Three-level pagetables" + default n + help + Three-level pagetables will let UML have more than 4G of physical + memory. All the memory that can't be mapped directly will be treated + as high memory. + +config STUB_CODE + hex + default 0xbfffe000 + +config STUB_DATA + hex + default 0xbffff000 + +config STUB_START + hex + default STUB_CODE + +config ARCH_HAS_SC_SIGNALS + bool + default y + +config ARCH_REUSE_HOST_VSYSCALL_AREA + bool + default y diff --git a/arch/um/Kconfig.net b/arch/um/Kconfig.net new file mode 100644 index 0000000..14a04eb --- /dev/null +++ b/arch/um/Kconfig.net @@ -0,0 +1,180 @@ + +menu "UML Network Devices" + depends on NET + +# UML virtual driver +config UML_NET + bool "Virtual network device" + help + While the User-Mode port cannot directly talk to any physical + hardware devices, this choice and the following transport options + provide one or more virtual network devices through which the UML + kernels can talk to each other, the host, and with the host's help, + machines on the outside world. + + For more information, including explanations of the networking and + sample configurations, see + . + + If you'd like to be able to enable networking in the User-Mode + linux environment, say Y; otherwise say N. Note that you must + enable at least one of the following transport options to actually + make use of UML networking. + +config UML_NET_ETHERTAP + bool "Ethertap transport" + depends on UML_NET + help + The Ethertap User-Mode Linux network transport allows a single + running UML to exchange packets with its host over one of the + host's Ethertap devices, such as /dev/tap0. Additional running + UMLs can use additional Ethertap devices, one per running UML. + While the UML believes it's on a (multi-device, broadcast) virtual + Ethernet network, it's in fact communicating over a point-to-point + link with the host. + + To use this, your host kernel must have support for Ethertap + devices. Also, if your host kernel is 2.4.x, it must have + CONFIG_NETLINK_DEV configured as Y or M. + + For more information, see + That site + has examples of the UML command line to use to enable Ethertap + networking. + + If you'd like to set up an IP network with the host and/or the + outside world, say Y to this, the Daemon Transport and/or the + Slip Transport. You'll need at least one of them, but may choose + more than one without conflict. If you don't need UML networking, + say N. + +config UML_NET_TUNTAP + bool "TUN/TAP transport" + depends on UML_NET + help + The UML TUN/TAP network transport allows a UML instance to exchange + packets with the host over a TUN/TAP device. This option will only + work with a 2.4 host, unless you've applied the TUN/TAP patch to + your 2.2 host kernel. + + To use this transport, your host kernel must have support for TUN/TAP + devices, either built-in or as a module. + +config UML_NET_SLIP + bool "SLIP transport" + depends on UML_NET + help + The slip User-Mode Linux network transport allows a running UML to + network with its host over a point-to-point link. Unlike Ethertap, + which can carry any Ethernet frame (and hence even non-IP packets), + the slip transport can only carry IP packets. + + To use this, your host must support slip devices. + + For more information, see + . That site + has examples of the UML command line to use to enable slip + networking, and details of a few quirks with it. + + The Ethertap Transport is preferred over slip because of its + limitations. If you prefer slip, however, say Y here. Otherwise + choose the Multicast transport (to network multiple UMLs on + multiple hosts), Ethertap (to network with the host and the + outside world), and/or the Daemon transport (to network multiple + UMLs on a single host). You may choose more than one without + conflict. If you don't need UML networking, say N. + +config UML_NET_DAEMON + bool "Daemon transport" + depends on UML_NET + help + This User-Mode Linux network transport allows one or more running + UMLs on a single host to communicate with each other, but not to + the host. + + To use this form of networking, you'll need to run the UML + networking daemon on the host. + + For more information, see + That site + has examples of the UML command line to use to enable Daemon + networking. + + If you'd like to set up a network with other UMLs on a single host, + say Y. If you need a network between UMLs on multiple physical + hosts, choose the Multicast Transport. To set up a network with + the host and/or other IP machines, say Y to the Ethertap or Slip + transports. You'll need at least one of them, but may choose + more than one without conflict. If you don't need UML networking, + say N. + +config UML_NET_MCAST + bool "Multicast transport" + depends on UML_NET + help + This Multicast User-Mode Linux network transport allows multiple + UMLs (even ones running on different host machines!) to talk to + each other over a virtual ethernet network. However, it requires + at least one UML with one of the other transports to act as a + bridge if any of them need to be able to talk to their hosts or any + other IP machines. + + To use this, your host kernel(s) must support IP Multicasting. + + For more information, see + That site + has examples of the UML command line to use to enable Multicast + networking, and notes about the security of this approach. + + If you need UMLs on multiple physical hosts to communicate as if + they shared an Ethernet network, say Y. If you need to communicate + with other IP machines, make sure you select one of the other + transports (possibly in addition to Multicast; they're not + exclusive). If you don't need to network UMLs say N to each of + the transports. + +config UML_NET_PCAP + bool "pcap transport" + depends on UML_NET && EXPERIMENTAL + help + The pcap transport makes a pcap packet stream on the host look + like an ethernet device inside UML. This is useful for making + UML act as a network monitor for the host. You must have libcap + installed in order to build the pcap transport into UML. + + For more information, see + That site + has examples of the UML command line to use to enable this option. + + If you intend to use UML as a network monitor for the host, say + Y here. Otherwise, say N. + +config UML_NET_SLIRP + bool "SLiRP transport" + depends on UML_NET + help + The SLiRP User-Mode Linux network transport allows a running UML + to network by invoking a program that can handle SLIP encapsulated + packets. This is commonly (but not limited to) the application + known as SLiRP, a program that can re-socket IP packets back onto + the host on which it is run. Only IP packets are supported, + unlike other network transports that can handle all Ethernet + frames. In general, slirp allows the UML the same IP connectivity + to the outside world that the host user is permitted, and unlike + other transports, SLiRP works without the need of root level + privleges, setuid binaries, or SLIP devices on the host. This + also means not every type of connection is possible, but most + situations can be accomodated with carefully crafted slirp + commands that can be passed along as part of the network device's + setup string. The effect of this transport on the UML is similar + that of a host behind a firewall that masquerades all network + connections passing through it (but is less secure). + + To use this you should first have slirp compiled somewhere + accessible on the host, and have read its documentation. If you + don't need UML networking, say N. + + Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp" + +endmenu + diff --git a/arch/um/Kconfig.scsi b/arch/um/Kconfig.scsi new file mode 100644 index 0000000..c291c94 --- /dev/null +++ b/arch/um/Kconfig.scsi @@ -0,0 +1,58 @@ +comment "SCSI support type (disk, tape, CD-ROM)" + depends on SCSI + +config BLK_DEV_SD + tristate "SCSI disk support" + depends on SCSI + +config SD_EXTRA_DEVS + int "Maximum number of SCSI disks that can be loaded as modules" + depends on BLK_DEV_SD + default "40" + +config CHR_DEV_ST + tristate "SCSI tape support" + depends on SCSI + +config BLK_DEV_SR + tristate "SCSI CD-ROM support" + depends on SCSI + +config BLK_DEV_SR_VENDOR + bool "Enable vendor-specific extensions (for SCSI CDROM)" + depends on BLK_DEV_SR + +config SR_EXTRA_DEVS + int "Maximum number of CDROM devices that can be loaded as modules" + depends on BLK_DEV_SR + default "2" + +config CHR_DEV_SG + tristate "SCSI generic support" + depends on SCSI + +comment "Some SCSI devices (e.g. CD jukebox) support multiple LUNs" + depends on SCSI + +#if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then +config SCSI_DEBUG_QUEUES + bool "Enable extra checks in new queueing code" + depends on SCSI + +#fi +config SCSI_MULTI_LUN + bool "Probe all LUNs on each SCSI device" + depends on SCSI + +config SCSI_CONSTANTS + bool "Verbose SCSI error reporting (kernel size +=12K)" + depends on SCSI + +config SCSI_LOGGING + bool "SCSI logging facility" + depends on SCSI + +config SCSI_DEBUG + tristate "SCSI debugging host simulator (EXPERIMENTAL)" + depends on SCSI + diff --git a/arch/um/Kconfig.x86_64 b/arch/um/Kconfig.x86_64 new file mode 100644 index 0000000..6e5357c --- /dev/null +++ b/arch/um/Kconfig.x86_64 @@ -0,0 +1,39 @@ +config UML_X86 + bool + default y + +config 64BIT + bool + default y + +config SEMAPHORE_SLEEPERS + bool + default y + +config TOP_ADDR + hex + default 0x80000000 + +config 3_LEVEL_PGTABLES + bool + default y + +config STUB_CODE + hex + default 0x7fbfffe000 + +config STUB_DATA + hex + default 0x7fbffff000 + +config STUB_START + hex + default STUB_CODE + +config ARCH_HAS_SC_SIGNALS + bool + default n + +config ARCH_REUSE_HOST_VSYSCALL_AREA + bool + default n diff --git a/arch/um/Kconfig_char b/arch/um/Kconfig_char deleted file mode 100644 index 62d87b7..0000000 --- a/arch/um/Kconfig_char +++ /dev/null @@ -1,214 +0,0 @@ - -menu "Character Devices" - -config STDERR_CONSOLE - bool "stderr console" - default y - help - console driver which dumps all printk messages to stderr. - -config STDIO_CONSOLE - bool - default y - -config SSL - bool "Virtual serial line" - help - The User-Mode Linux environment allows you to create virtual serial - lines on the UML that are usually made to show up on the host as - ttys or ptys. - - See for more - information and command line examples of how to use this facility. - - Unless you have a specific reason for disabling this, say Y. - -config NULL_CHAN - bool "null channel support" - help - This option enables support for attaching UML consoles and serial - lines to a device similar to /dev/null. Data written to it disappears - and there is never any data to be read. - -config PORT_CHAN - bool "port channel support" - help - This option enables support for attaching UML consoles and serial - lines to host portals. They may be accessed with 'telnet - '. Any number of consoles and serial lines may be - attached to a single portal, although what UML device you get when - you telnet to that portal will be unpredictable. - It is safe to say 'Y' here. - -config PTY_CHAN - bool "pty channel support" - help - This option enables support for attaching UML consoles and serial - lines to host pseudo-terminals. Access to both traditional - pseudo-terminals (/dev/pty*) and pts pseudo-terminals are controlled - with this option. The assignment of UML devices to host devices - will be announced in the kernel message log. - It is safe to say 'Y' here. - -config TTY_CHAN - bool "tty channel support" - help - This option enables support for attaching UML consoles and serial - lines to host terminals. Access to both virtual consoles - (/dev/tty*) and the slave side of pseudo-terminals (/dev/ttyp* and - /dev/pts/*) are controlled by this option. - It is safe to say 'Y' here. - -config XTERM_CHAN - bool "xterm channel support" - help - This option enables support for attaching UML consoles and serial - lines to xterms. Each UML device so assigned will be brought up in - its own xterm. - If you disable this option, then CONFIG_PT_PROXY will be disabled as - well, since UML's gdb currently requires an xterm. - It is safe to say 'Y' here. - -config NOCONFIG_CHAN - bool - default !(XTERM_CHAN && TTY_CHAN && PTY_CHAN && PORT_CHAN && NULL_CHAN) - -config CON_ZERO_CHAN - string "Default main console channel initialization" - default "fd:0,fd:1" - help - This is the string describing the channel to which the main console - will be attached by default. This value can be overridden from the - command line. The default value is "fd:0,fd:1", which attaches the - main console to stdin and stdout. - It is safe to leave this unchanged. - -config CON_CHAN - string "Default console channel initialization" - default "xterm" - help - This is the string describing the channel to which all consoles - except the main console will be attached by default. This value can - be overridden from the command line. The default value is "xterm", - which brings them up in xterms. - It is safe to leave this unchanged, although you may wish to change - this if you expect the UML that you build to be run in environments - which don't have X or xterm available. - -config SSL_CHAN - string "Default serial line channel initialization" - default "pty" - help - This is the string describing the channel to which the serial lines - will be attached by default. This value can be overridden from the - command line. The default value is "pty", which attaches them to - traditional pseudo-terminals. - It is safe to leave this unchanged, although you may wish to change - this if you expect the UML that you build to be run in environments - which don't have a set of /dev/pty* devices. - -config UNIX98_PTYS - bool "Unix98 PTY support" - ---help--- - A pseudo terminal (PTY) is a software device consisting of two - halves: a master and a slave. The slave device behaves identical to - a physical terminal; the master device is used by a process to - read data from and write data to the slave, thereby emulating a - terminal. Typical programs for the master side are telnet servers - and xterms. - - Linux has traditionally used the BSD-like names /dev/ptyxx for - masters and /dev/ttyxx for slaves of pseudo terminals. This scheme - has a number of problems. The GNU C library glibc 2.1 and later, - however, supports the Unix98 naming standard: in order to acquire a - pseudo terminal, a process opens /dev/ptmx; the number of the pseudo - terminal is then made available to the process and the pseudo - terminal slave can be accessed as /dev/pts/. What was - traditionally /dev/ttyp2 will then be /dev/pts/2, for example. - - All modern Linux systems use the Unix98 ptys. Say Y unless - you're on an embedded system and want to conserve memory. - -config LEGACY_PTYS - bool "Legacy (BSD) PTY support" - default y - ---help--- - A pseudo terminal (PTY) is a software device consisting of two - halves: a master and a slave. The slave device behaves identical to - a physical terminal; the master device is used by a process to - read data from and write data to the slave, thereby emulating a - terminal. Typical programs for the master side are telnet servers - and xterms. - - Linux has traditionally used the BSD-like names /dev/ptyxx - for masters and /dev/ttyxx for slaves of pseudo - terminals. This scheme has a number of problems, including - security. This option enables these legacy devices; on most - systems, it is safe to say N. - - -config LEGACY_PTY_COUNT - int "Maximum number of legacy PTY in use" - depends on LEGACY_PTYS - default "256" - ---help--- - The maximum number of legacy PTYs that can be used at any one time. - The default is 256, and should be more than enough. Embedded - systems may want to reduce this to save memory. - - When not in use, each legacy PTY occupies 12 bytes on 32-bit - architectures and 24 bytes on 64-bit architectures. - -config WATCHDOG - bool "Watchdog Timer Support" - -config WATCHDOG_NOWAYOUT - bool "Disable watchdog shutdown on close" - depends on WATCHDOG - -config SOFT_WATCHDOG - tristate "Software Watchdog" - depends on WATCHDOG - -config UML_WATCHDOG - tristate "UML watchdog" - depends on WATCHDOG - -config UML_SOUND - tristate "Sound support" - help - This option enables UML sound support. If enabled, it will pull in - soundcore and the UML hostaudio relay, which acts as a intermediary - between the host's dsp and mixer devices and the UML sound system. - It is safe to say 'Y' here. - -config SOUND - tristate - default UML_SOUND - -config HOSTAUDIO - tristate - default UML_SOUND - -config UML_RANDOM - tristate "Hardware random number generator" - help - This option enables UML's "hardware" random number generator. It - attaches itself to the host's /dev/random, supplying as much entropy - as the host has, rather than the small amount the UML gets from its - own drivers. It registers itself as a standard hardware random number - generator, major 10, minor 183, and the canonical device name is - /dev/hwrng. - The way to make use of this is to install the rng-tools package - (check your distro, or download from - http://sourceforge.net/projects/gkernel/). rngd periodically reads - /dev/hwrng and injects the entropy into /dev/random. - -config MMAPPER - tristate "iomem emulation driver" - help - This driver allows a host file to be used as emulated IO memory inside - UML. - -endmenu - diff --git a/arch/um/Kconfig_i386 b/arch/um/Kconfig_i386 deleted file mode 100644 index 8ad156a..0000000 --- a/arch/um/Kconfig_i386 +++ /dev/null @@ -1,44 +0,0 @@ -config UML_X86 - bool - default y - -config 64BIT - bool - default n - -config SEMAPHORE_SLEEPERS - bool - default y - -config TOP_ADDR - hex - default 0xc0000000 if !HOST_2G_2G - default 0x80000000 if HOST_2G_2G - -config 3_LEVEL_PGTABLES - bool "Three-level pagetables" - default n - help - Three-level pagetables will let UML have more than 4G of physical - memory. All the memory that can't be mapped directly will be treated - as high memory. - -config STUB_CODE - hex - default 0xbfffe000 - -config STUB_DATA - hex - default 0xbffff000 - -config STUB_START - hex - default STUB_CODE - -config ARCH_HAS_SC_SIGNALS - bool - default y - -config ARCH_REUSE_HOST_VSYSCALL_AREA - bool - default y diff --git a/arch/um/Kconfig_net b/arch/um/Kconfig_net deleted file mode 100644 index fa2ab2d..0000000 --- a/arch/um/Kconfig_net +++ /dev/null @@ -1,180 +0,0 @@ - -menu "UML Network Devices" - depends on NET - -# UML virtual driver -config UML_NET - bool "Virtual network device" - help - While the User-Mode port cannot directly talk to any physical - hardware devices, this choice and the following transport options - provide one or more virtual network devices through which the UML - kernels can talk to each other, the host, and with the host's help, - machines on the outside world. - - For more information, including explanations of the networking and - sample configurations, see - . - - If you'd like to be able to enable networking in the User-Mode - linux environment, say Y; otherwise say N. Note that you must - enable at least one of the following transport options to actually - make use of UML networking. - -config UML_NET_ETHERTAP - bool "Ethertap transport" - depends on UML_NET - help - The Ethertap User-Mode Linux network transport allows a single - running UML to exchange packets with its host over one of the - host's Ethertap devices, such as /dev/tap0. Additional running - UMLs can use additional Ethertap devices, one per running UML. - While the UML believes it's on a (multi-device, broadcast) virtual - Ethernet network, it's in fact communicating over a point-to-point - link with the host. - - To use this, your host kernel must have support for Ethertap - devices. Also, if your host kernel is 2.4.x, it must have - CONFIG_NETLINK_DEV configured as Y or M. - - For more information, see - That site - has examples of the UML command line to use to enable Ethertap - networking. - - If you'd like to set up an IP network with the host and/or the - outside world, say Y to this, the Daemon Transport and/or the - Slip Transport. You'll need at least one of them, but may choose - more than one without conflict. If you don't need UML networking, - say N. - -config UML_NET_TUNTAP - bool "TUN/TAP transport" - depends on UML_NET - help - The UML TUN/TAP network transport allows a UML instance to exchange - packets with the host over a TUN/TAP device. This option will only - work with a 2.4 host, unless you've applied the TUN/TAP patch to - your 2.2 host kernel. - - To use this transport, your host kernel must have support for TUN/TAP - devices, either built-in or as a module. - -config UML_NET_SLIP - bool "SLIP transport" - depends on UML_NET - help - The slip User-Mode Linux network transport allows a running UML to - network with its host over a point-to-point link. Unlike Ethertap, - which can carry any Ethernet frame (and hence even non-IP packets), - the slip transport can only carry IP packets. - - To use this, your host must support slip devices. - - For more information, see - . That site - has examples of the UML command line to use to enable slip - networking, and details of a few quirks with it. - - The Ethertap Transport is preferred over slip because of its - limitations. If you prefer slip, however, say Y here. Otherwise - choose the Multicast transport (to network multiple UMLs on - multiple hosts), Ethertap (to network with the host and the - outside world), and/or the Daemon transport (to network multiple - UMLs on a single host). You may choose more than one without - conflict. If you don't need UML networking, say N. - -config UML_NET_DAEMON - bool "Daemon transport" - depends on UML_NET - help - This User-Mode Linux network transport allows one or more running - UMLs on a single host to communicate with each other, but not to - the host. - - To use this form of networking, you'll need to run the UML - networking daemon on the host. - - For more information, see - That site - has examples of the UML command line to use to enable Daemon - networking. - - If you'd like to set up a network with other UMLs on a single host, - say Y. If you need a network between UMLs on multiple physical - hosts, choose the Multicast Transport. To set up a network with - the host and/or other IP machines, say Y to the Ethertap or Slip - transports. You'll need at least one of them, but may choose - more than one without conflict. If you don't need UML networking, - say N. - -config UML_NET_MCAST - bool "Multicast transport" - depends on UML_NET - help - This Multicast User-Mode Linux network transport allows multiple - UMLs (even ones running on different host machines!) to talk to - each other over a virtual ethernet network. However, it requires - at least one UML with one of the other transports to act as a - bridge if any of them need to be able to talk to their hosts or any - other IP machines. - - To use this, your host kernel(s) must support IP Multicasting. - - For more information, see - That site - has examples of the UML command line to use to enable Multicast - networking, and notes about the security of this approach. - - If you need UMLs on multiple physical hosts to communicate as if - they shared an Ethernet network, say Y. If you need to communicate - with other IP machines, make sure you select one of the other - transports (possibly in addition to Multicast; they're not - exclusive). If you don't need to network UMLs say N to each of - the transports. - -config UML_NET_PCAP - bool "pcap transport" - depends on UML_NET && EXPERIMENTAL - help - The pcap transport makes a pcap packet stream on the host look - like an ethernet device inside UML. This is useful for making - UML act as a network monitor for the host. You must have libcap - installed in order to build the pcap transport into UML. - - For more information, see - That site - has examples of the UML command line to use to enable this option. - - If you intend to use UML as a network monitor for the host, say - Y here. Otherwise, say N. - -config UML_NET_SLIRP - bool "SLiRP transport" - depends on UML_NET - help - The SLiRP User-Mode Linux network transport allows a running UML - to network by invoking a program that can handle SLIP encapsulated - packets. This is commonly (but not limited to) the application - known as SLiRP, a program that can re-socket IP packets back onto - the host on which it is run. Only IP packets are supported, - unlike other network transports that can handle all Ethernet - frames. In general, slirp allows the UML the same IP connectivity - to the outside world that the host user is permitted, and unlike - other transports, SLiRP works without the need of root level - privleges, setuid binaries, or SLIP devices on the host. This - also means not every type of connection is possible, but most - situations can be accomodated with carefully crafted slirp - commands that can be passed along as part of the network device's - setup string. The effect of this transport on the UML is similar - that of a host behind a firewall that masquerades all network - connections passing through it (but is less secure). - - To use this you should first have slirp compiled somewhere - accessible on the host, and have read its documentation. If you - don't need UML networking, say N. - - Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp" - -endmenu - diff --git a/arch/um/Kconfig_scsi b/arch/um/Kconfig_scsi deleted file mode 100644 index c291c94..0000000 --- a/arch/um/Kconfig_scsi +++ /dev/null @@ -1,58 +0,0 @@ -comment "SCSI support type (disk, tape, CD-ROM)" - depends on SCSI - -config BLK_DEV_SD - tristate "SCSI disk support" - depends on SCSI - -config SD_EXTRA_DEVS - int "Maximum number of SCSI disks that can be loaded as modules" - depends on BLK_DEV_SD - default "40" - -config CHR_DEV_ST - tristate "SCSI tape support" - depends on SCSI - -config BLK_DEV_SR - tristate "SCSI CD-ROM support" - depends on SCSI - -config BLK_DEV_SR_VENDOR - bool "Enable vendor-specific extensions (for SCSI CDROM)" - depends on BLK_DEV_SR - -config SR_EXTRA_DEVS - int "Maximum number of CDROM devices that can be loaded as modules" - depends on BLK_DEV_SR - default "2" - -config CHR_DEV_SG - tristate "SCSI generic support" - depends on SCSI - -comment "Some SCSI devices (e.g. CD jukebox) support multiple LUNs" - depends on SCSI - -#if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then -config SCSI_DEBUG_QUEUES - bool "Enable extra checks in new queueing code" - depends on SCSI - -#fi -config SCSI_MULTI_LUN - bool "Probe all LUNs on each SCSI device" - depends on SCSI - -config SCSI_CONSTANTS - bool "Verbose SCSI error reporting (kernel size +=12K)" - depends on SCSI - -config SCSI_LOGGING - bool "SCSI logging facility" - depends on SCSI - -config SCSI_DEBUG - tristate "SCSI debugging host simulator (EXPERIMENTAL)" - depends on SCSI - diff --git a/arch/um/Kconfig_x86_64 b/arch/um/Kconfig_x86_64 deleted file mode 100644 index 6e5357c..0000000 --- a/arch/um/Kconfig_x86_64 +++ /dev/null @@ -1,39 +0,0 @@ -config UML_X86 - bool - default y - -config 64BIT - bool - default y - -config SEMAPHORE_SLEEPERS - bool - default y - -config TOP_ADDR - hex - default 0x80000000 - -config 3_LEVEL_PGTABLES - bool - default y - -config STUB_CODE - hex - default 0x7fbfffe000 - -config STUB_DATA - hex - default 0x7fbffff000 - -config STUB_START - hex - default STUB_CODE - -config ARCH_HAS_SC_SIGNALS - bool - default n - -config ARCH_REUSE_HOST_VSYSCALL_AREA - bool - default n diff --git a/arch/um/Makefile b/arch/um/Makefile index f5a83a7..15ec6b8 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -101,10 +101,10 @@ define archhelp endef ifneq ($(KBUILD_SRC),) -$(shell mkdir -p $(ARCH_DIR) && ln -fsn $(srctree)/$(ARCH_DIR)/Kconfig_$(SUBARCH) $(ARCH_DIR)/Kconfig_arch) -CLEAN_FILES += $(ARCH_DIR)/Kconfig_arch +$(shell mkdir -p $(ARCH_DIR) && ln -fsn $(srctree)/$(ARCH_DIR)/Kconfig.$(SUBARCH) $(ARCH_DIR)/Kconfig.arch) +CLEAN_FILES += $(ARCH_DIR)/Kconfig.arch else -$(shell cd $(ARCH_DIR) && ln -sf Kconfig_$(SUBARCH) Kconfig_arch) +$(shell cd $(ARCH_DIR) && ln -sf Kconfig.$(SUBARCH) Kconfig.arch) endif prepare: $(ARCH_SYMLINKS) $(SYS_HEADERS) $(GEN_HEADERS) @@ -147,7 +147,7 @@ CLEAN_FILES += linux x.i gmon.out $(ARCH_DIR)/include/uml-config.h \ MRPROPER_FILES += $(SYMLINK_HEADERS) $(ARCH_SYMLINKS) \ $(addprefix $(ARCH_DIR)/kernel/,$(KERN_SYMLINKS)) $(ARCH_DIR)/os \ - $(ARCH_DIR)/Kconfig_arch + $(ARCH_DIR)/Kconfig.arch archclean: $(Q)$(MAKE) $(clean)=$(ARCH_DIR)/util -- cgit v1.1 From 0221575903ad68debea57679b5b46575bf57afb1 Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Sat, 3 Sep 2005 15:57:23 -0700 Subject: [PATCH] uml: workaround GDB problems on debugging Apparently, GDB gets confused when we do an execvp() on ourselves. Since it's simply done to allocate further space for command line arguments (which we'll use to allow gathering the startup command line for guest processes through the host), allow the user to disable that to get a debuggable UML binary. Signed-off-by: Paolo 'Blaisorblade' Giarrusso Cc: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/Kconfig.debug | 11 +++++++++++ arch/um/kernel/main.c | 2 +- arch/um/kernel/um_arch.c | 6 +++--- 3 files changed, 15 insertions(+), 4 deletions(-) (limited to 'arch/um') diff --git a/arch/um/Kconfig.debug b/arch/um/Kconfig.debug index bd41e42..5681a8b 100644 --- a/arch/um/Kconfig.debug +++ b/arch/um/Kconfig.debug @@ -2,6 +2,17 @@ menu "Kernel hacking" source "lib/Kconfig.debug" +config CMDLINE_ON_HOST + bool "Show command line arguments on the host in TT mode" + depends on MODE_TT + default !DEBUG_INFO + help + This controls whether arguments in guest processes should be shown on + the host's ps output. + Enabling this option hinders debugging on some recent GDB versions + (because GDB gets "confused" when we do an execvp()). So probably you + should disable it. + config PT_PROXY bool "Enable ptrace proxy" depends on XTERM_CHAN && DEBUG_INFO && MODE_TT diff --git a/arch/um/kernel/main.c b/arch/um/kernel/main.c index 1e1a87f..d31027f 100644 --- a/arch/um/kernel/main.c +++ b/arch/um/kernel/main.c @@ -97,7 +97,7 @@ int main(int argc, char **argv, char **envp) exit(1); } -#ifdef UML_CONFIG_MODE_TT +#ifdef UML_CONFIG_CMDLINE_ON_HOST /* Allocate memory for thread command lines */ if(argc < 2 || strlen(argv[1]) < THREAD_NAME_LEN - 1){ diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index ca2bb6f..b781b6f 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -126,7 +126,7 @@ unsigned long start_vm; unsigned long end_vm; int ncpus = 1; -#ifdef CONFIG_MODE_TT +#ifdef CONFIG_CMDLINE_ON_HOST /* Pointer set in linux_main, the array itself is private to each thread, * and changed at address space creation time so this poses no concurrency * problems. @@ -141,7 +141,7 @@ long physmem_size = 32 * 1024 * 1024; void set_cmdline(char *cmd) { -#ifdef CONFIG_MODE_TT +#ifdef CONFIG_CMDLINE_ON_HOST char *umid, *ptr; if(CHOOSE_MODE(honeypot, 0)) return; @@ -385,7 +385,7 @@ int linux_main(int argc, char **argv) setup_machinename(system_utsname.machine); -#ifdef CONFIG_MODE_TT +#ifdef CONFIG_CMDLINE_ON_HOST argv1_begin = argv[1]; argv1_end = &argv[1][strlen(argv[1])]; #endif -- cgit v1.1 From ed1b58d8b53519e10a35c6a2bb49cac35f439621 Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Sat, 3 Sep 2005 15:57:24 -0700 Subject: [PATCH] uml: fix SIGWINCH handler race while waiting for signals. If a SIGWINCH comes in, while winch_thread() isn't waiting in wait(), winch_thread could miss signals. It isn't very probable, that anyone will see this causing trouble, as it would need a very special timing, that a missed SIGWINCH results in a wrong window size. So, this is a minor problem. But why not fix, as it can be done so easy? Signed-off-by: Bodo Stroesser Signed-off-by: Paolo 'Blaisorblade' Giarrusso Cc: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/drivers/chan_user.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/um') diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c index 5d37681..de3bce7 100644 --- a/arch/um/drivers/chan_user.c +++ b/arch/um/drivers/chan_user.c @@ -63,7 +63,7 @@ error: * * SIGWINCH can't be received synchronously, so you have to set up to receive it * as a signal. That being the case, if you are going to wait for it, it is - * convenient to sit in a pause() and wait for the signal to bounce you out of + * convenient to sit in sigsuspend() and wait for the signal to bounce you out of * it (see below for how we make sure to exit only on SIGWINCH). */ @@ -94,18 +94,19 @@ static int winch_thread(void *arg) "byte, err = %d\n", -count); /* We are not using SIG_IGN on purpose, so don't fix it as I thought to - * do! If using SIG_IGN, the pause() call below would not stop on + * do! If using SIG_IGN, the sigsuspend() call below would not stop on * SIGWINCH. */ signal(SIGWINCH, winch_handler); sigfillset(&sigs); - sigdelset(&sigs, SIGWINCH); - /* Block anything else than SIGWINCH. */ + /* Block all signals possible. */ if(sigprocmask(SIG_SETMASK, &sigs, NULL) < 0){ printk("winch_thread : sigprocmask failed, errno = %d\n", errno); exit(1); } + /* In sigsuspend(), block anything else than SIGWINCH. */ + sigdelset(&sigs, SIGWINCH); if(setsid() < 0){ printk("winch_thread : setsid failed, errno = %d\n", errno); @@ -130,7 +131,7 @@ static int winch_thread(void *arg) while(1){ /* This will be interrupted by SIGWINCH only, since other signals * are blocked.*/ - pause(); + sigsuspend(&sigs); count = os_write_file(pipe_fd, &c, sizeof(c)); if(count != sizeof(c)) -- cgit v1.1 From 3b52166cf72f0826c6d8fa0541c7d4ae39c5a146 Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Sat, 3 Sep 2005 15:57:26 -0700 Subject: [PATCH] uml: fault handler micro-cleanups Avoid chomping low bits of address for functions doing it by themselves, fix whitespace, add a correctness checking. I did this for remap-file-pages protection support, it was useful on its own too. Signed-off-by: Paolo 'Blaisorblade' Giarrusso Cc: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/trap_kern.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'arch/um') diff --git a/arch/um/kernel/trap_kern.c b/arch/um/kernel/trap_kern.c index bef8abd..b5fc89f 100644 --- a/arch/um/kernel/trap_kern.c +++ b/arch/um/kernel/trap_kern.c @@ -26,6 +26,7 @@ #include "mem.h" #include "mem_kern.h" +/* Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by segv(). */ int handle_page_fault(unsigned long address, unsigned long ip, int is_write, int is_user, int *code_out) { @@ -35,7 +36,6 @@ int handle_page_fault(unsigned long address, unsigned long ip, pud_t *pud; pmd_t *pmd; pte_t *pte; - unsigned long page; int err = -EFAULT; *code_out = SEGV_MAPERR; @@ -52,7 +52,7 @@ int handle_page_fault(unsigned long address, unsigned long ip, else if(expand_stack(vma, address)) goto out; - good_area: +good_area: *code_out = SEGV_ACCERR; if(is_write && !(vma->vm_flags & VM_WRITE)) goto out; @@ -60,9 +60,8 @@ int handle_page_fault(unsigned long address, unsigned long ip, if(!(vma->vm_flags & (VM_READ | VM_EXEC))) goto out; - page = address & PAGE_MASK; do { - survive: +survive: switch (handle_mm_fault(mm, vma, address, is_write)){ case VM_FAULT_MINOR: current->min_flt++; @@ -79,16 +78,16 @@ int handle_page_fault(unsigned long address, unsigned long ip, default: BUG(); } - pgd = pgd_offset(mm, page); - pud = pud_offset(pgd, page); - pmd = pmd_offset(pud, page); - pte = pte_offset_kernel(pmd, page); + pgd = pgd_offset(mm, address); + pud = pud_offset(pgd, address); + pmd = pmd_offset(pud, address); + pte = pte_offset_kernel(pmd, address); } while(!pte_present(*pte)); err = 0; *pte = pte_mkyoung(*pte); if(pte_write(*pte)) *pte = pte_mkdirty(*pte); - flush_tlb_page(vma, page); - out: + flush_tlb_page(vma, address); +out: up_read(&mm->mmap_sem); return(err); @@ -144,19 +143,18 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, void *sc) panic("Kernel mode fault at addr 0x%lx, ip 0x%lx", address, ip); - if(err == -EACCES){ + if (err == -EACCES) { si.si_signo = SIGBUS; si.si_errno = 0; si.si_code = BUS_ADRERR; si.si_addr = (void *)address; current->thread.arch.faultinfo = fi; force_sig_info(SIGBUS, &si, current); - } - else if(err == -ENOMEM){ + } else if (err == -ENOMEM) { printk("VM: killing process %s\n", current->comm); do_exit(SIGKILL); - } - else { + } else { + BUG_ON(err != -EFAULT); si.si_signo = SIGSEGV; si.si_addr = (void *) address; current->thread.arch.faultinfo = fi; -- cgit v1.1 From e54a5dfb960053437f464a7ce372a8acc293fdcb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 3 Sep 2005 15:57:27 -0700 Subject: [PATCH] uml: fix signal frame copy_user The copy_user stuff in the signal frame code was broke. Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/sys-i386/signal.c | 2 +- arch/um/sys-x86_64/signal.c | 41 ++++++++++++++++++++++++----------------- 2 files changed, 25 insertions(+), 18 deletions(-) (limited to 'arch/um') diff --git a/arch/um/sys-i386/signal.c b/arch/um/sys-i386/signal.c index 4efc69a..16bc1992 100644 --- a/arch/um/sys-i386/signal.c +++ b/arch/um/sys-i386/signal.c @@ -122,9 +122,9 @@ int copy_sc_from_user_tt(struct sigcontext *to, struct sigcontext *from, int err; to_fp = to->fpstate; - from_fp = from->fpstate; sigs = to->oldmask; err = copy_from_user(to, from, sizeof(*to)); + from_fp = to->fpstate; to->oldmask = sigs; to->fpstate = to_fp; if(to_fp != NULL) diff --git a/arch/um/sys-x86_64/signal.c b/arch/um/sys-x86_64/signal.c index 8fdaed0..fe1d065 100644 --- a/arch/um/sys-x86_64/signal.c +++ b/arch/um/sys-x86_64/signal.c @@ -104,28 +104,35 @@ int copy_sc_to_user_skas(struct sigcontext *to, struct _fpstate *to_fp, int copy_sc_from_user_tt(struct sigcontext *to, struct sigcontext *from, int fpsize) { - struct _fpstate *to_fp, *from_fp; - unsigned long sigs; - int err; - - to_fp = to->fpstate; - from_fp = from->fpstate; - sigs = to->oldmask; - err = copy_from_user(to, from, sizeof(*to)); - to->oldmask = sigs; - return(err); + struct _fpstate *to_fp, *from_fp; + unsigned long sigs; + int err; + + to_fp = to->fpstate; + sigs = to->oldmask; + err = copy_from_user(to, from, sizeof(*to)); + from_fp = to->fpstate; + to->fpstate = to_fp; + to->oldmask = sigs; + if(to_fp != NULL) + err |= copy_from_user(to_fp, from_fp, fpsize); + return(err); } int copy_sc_to_user_tt(struct sigcontext *to, struct _fpstate *fp, struct sigcontext *from, int fpsize) { - struct _fpstate *to_fp, *from_fp; - int err; - - to_fp = (fp ? fp : (struct _fpstate *) (to + 1)); - from_fp = from->fpstate; - err = copy_to_user(to, from, sizeof(*to)); - return(err); + struct _fpstate *to_fp, *from_fp; + int err; + + to_fp = (fp ? fp : (struct _fpstate *) (to + 1)); + from_fp = from->fpstate; + err = copy_to_user(to, from, sizeof(*to)); + if(from_fp != NULL){ + err |= copy_to_user(&to->fpstate, &to_fp, sizeof(to->fpstate)); + err |= copy_to_user(to_fp, from_fp, fpsize); + } + return(err); } #endif -- cgit v1.1 From 7efd08c85523f9468a6a8748d6f02b3e73967569 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 3 Sep 2005 15:57:28 -0700 Subject: [PATCH] uml: fix a macro typo Fix a macro typo which could break if the macro is passed arguments with side-effects. Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/include/sysdep-x86_64/ptrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/um') diff --git a/arch/um/include/sysdep-x86_64/ptrace.h b/arch/um/include/sysdep-x86_64/ptrace.h index be8acd5..331aa2d 100644 --- a/arch/um/include/sysdep-x86_64/ptrace.h +++ b/arch/um/include/sysdep-x86_64/ptrace.h @@ -227,7 +227,7 @@ struct syscall_args { panic("Bad register in UPT_SET : %d\n", reg); \ break; \ } \ - val; \ + __upt_val; \ }) #define UPT_SET_SYSCALL_RETURN(r, res) \ -- cgit v1.1 From ec7cf783dd6cf5c8fb6b6767560686ed28d1cd6d Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Sat, 3 Sep 2005 15:57:29 -0700 Subject: [PATCH] uml: error path cleanup This cleans up the error path in ubd_open, causing it now to call ubd_close appropriately when something fails. Signed-off-by: Jeff Dike Cc: Paolo Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/drivers/ubd_kern.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'arch/um') diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 344b24d..f731343 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -668,21 +668,22 @@ static int ubd_add(int n) struct ubd *dev = &ubd_dev[n]; int err; + err = -ENODEV; if(dev->file == NULL) - return(-ENODEV); + goto out; if (ubd_open_dev(dev)) - return(-ENODEV); + goto out; err = ubd_file_size(dev, &dev->size); if(err < 0) - return(err); + goto out_close; dev->size = ROUND_BLOCK(dev->size); err = ubd_new_disk(MAJOR_NR, dev->size, n, &ubd_gendisk[n]); if(err) - return(err); + goto out_close; if(fake_major != MAJOR_NR) ubd_new_disk(fake_major, dev->size, n, @@ -693,8 +694,11 @@ static int ubd_add(int n) if (fake_ide) make_ide_entries(ubd_gendisk[n]->disk_name); + err = 0; +out_close: ubd_close(dev); - return 0; +out: + return err; } static int ubd_config(char *str) -- cgit v1.1 From 93ea5a5b5c71ddbefd4082627d8f33e4bc038a6f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 3 Sep 2005 15:57:30 -0700 Subject: [PATCH] uml: build cleanup Build cleanups Signed-off-by: Jeff Dike Cc: Paolo Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/Makefile | 1 + arch/um/sys-i386/Makefile | 2 -- arch/um/sys-x86_64/Makefile | 2 -- 3 files changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/um') diff --git a/arch/um/Makefile b/arch/um/Makefile index 15ec6b8..b15f604 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -56,6 +56,7 @@ SYS_DIR := $(ARCH_DIR)/include/sysdep-$(SUBARCH) CFLAGS += $(CFLAGS-y) -D__arch_um__ -DSUBARCH=\"$(SUBARCH)\" \ $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap +AFLAGS += $(ARCH_INCLUDE) USER_CFLAGS := $(patsubst -I%,,$(CFLAGS)) USER_CFLAGS := $(patsubst -D__KERNEL__,,$(USER_CFLAGS)) $(ARCH_INCLUDE) \ diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile index 77c3c4d..cdf6b6d 100644 --- a/arch/um/sys-i386/Makefile +++ b/arch/um/sys-i386/Makefile @@ -22,8 +22,6 @@ STUB_CFLAGS = -Wp,-MD,$(depfile) $(call unprofile,$(USER_CFLAGS)) # why ask why? $(obj)/stub_segv.o : c_flags = $(STUB_CFLAGS) -$(obj)/stub.o : a_flags = $(STUB_CFLAGS) - subdir- := util include arch/um/scripts/Makefile.unmap diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile index 736d3ed..32bb7d8 100644 --- a/arch/um/sys-x86_64/Makefile +++ b/arch/um/sys-x86_64/Makefile @@ -33,8 +33,6 @@ STUB_CFLAGS = -Wp,-MD,$(depfile) $(call unprofile,$(USER_CFLAGS)) # why ask why? $(obj)/stub_segv.o : c_flags = $(STUB_CFLAGS) -$(obj)/stub.o : a_flags = $(STUB_CFLAGS) - subdir- := util include arch/um/scripts/Makefile.unmap -- cgit v1.1 From 13abf8130139c2ccd4962a7e5a8902be5e6cb5a7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 3 Sep 2005 15:57:31 -0700 Subject: [PATCH] uml: remove libc reference in build Remove an unneeded reference to libc. Signed-off-by: Jeff Dike Cc: Paolo Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/scripts/Makefile.unmap | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/um') diff --git a/arch/um/scripts/Makefile.unmap b/arch/um/scripts/Makefile.unmap index 802d027..b216518 100644 --- a/arch/um/scripts/Makefile.unmap +++ b/arch/um/scripts/Makefile.unmap @@ -12,7 +12,7 @@ $(obj)/unmap.o: _c_flags = $(call unprofile,$(CFLAGS)) quiet_cmd_wrapld = LD $@ define cmd_wrapld - $(LD) $(LDFLAGS) -r -o $(obj)/unmap_tmp.o $< $(shell $(CC) $(CFLAGS) -print-file-name=libc.a); \ + $(LD) $(LDFLAGS) -r -o $(obj)/unmap_tmp.o $< ; \ $(OBJCOPY) $(UML_OBJCOPYFLAGS) $(obj)/unmap_tmp.o $@ -G switcheroo endef -- cgit v1.1 From 02edeb586ae4cdd17778923674700edb732a4741 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Sat, 3 Sep 2005 15:57:33 -0700 Subject: [PATCH] uml: mark SMP on UML/x86_64 as broken Noticed by Al Viro - SMP on x86_64 is fundamentally broken due to UML's reuse of the host arch's percpu stuff. This is OK on x86, but the x86_64 pda stuff just won't work for UML. Signed-off-by: Jeff Dike Cc: Paolo Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/Kconfig | 2 +- arch/um/Kconfig.x86_64 | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/um') diff --git a/arch/um/Kconfig b/arch/um/Kconfig index d5e22f0..684e1f8 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -196,7 +196,7 @@ config HOST_2G_2G config SMP bool "Symmetric multi-processing support (EXPERIMENTAL)" default n - depends on MODE_TT && EXPERIMENTAL + depends on (MODE_TT && EXPERIMENTAL && !SMP_BROKEN) || (BROKEN && SMP_BROKEN) help This option enables UML SMP support. It is NOT related to having a real SMP box. Not directly, at least. diff --git a/arch/um/Kconfig.x86_64 b/arch/um/Kconfig.x86_64 index 6e5357c..bd35e59 100644 --- a/arch/um/Kconfig.x86_64 +++ b/arch/um/Kconfig.x86_64 @@ -37,3 +37,7 @@ config ARCH_HAS_SC_SIGNALS config ARCH_REUSE_HOST_VSYSCALL_AREA bool default n + +config SMP_BROKEN + bool + default y -- cgit v1.1 From 77fa5adcda6d686d2f45a2b55dcb9a03e7d33fa1 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Sat, 3 Sep 2005 15:57:34 -0700 Subject: [PATCH] uml: remove duplicated exports Al Viro spotted a bunch of duplicated exports - this removes them. Signed-off-by: Jeff Dike Cc: Paolo Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/ksyms.c | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'arch/um') diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c index 99439fa..32d3076 100644 --- a/arch/um/kernel/ksyms.c +++ b/arch/um/kernel/ksyms.c @@ -114,22 +114,3 @@ extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); EXPORT_SYMBOL(__read_lock_failed); #endif - -#ifdef CONFIG_HIGHMEM -EXPORT_SYMBOL(kmap); -EXPORT_SYMBOL(kunmap); -EXPORT_SYMBOL(kmap_atomic); -EXPORT_SYMBOL(kunmap_atomic); -EXPORT_SYMBOL(kmap_atomic_to_page); -#endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ -- cgit v1.1 From c56004901fa5dcf55f92318f192ab3c0e87db2d1 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Sat, 3 Sep 2005 15:57:36 -0700 Subject: [PATCH] uml: TLB operation batching This adds VM op batching to skas0. Rather than having a context switch to and from the userspace stub for each address space change, we write a number of operations to the stub data page and invoke a different stub which loops over them and executes them all in one go. The operations are stored as [ system call number, arg1, arg2, ... ] tuples. The set is terminated by a system call number of 0. Single operations, i.e. page faults, are handled in the old way, since that is slightly more efficient. For a kernel build, a minority (~1/4) of the operations are part of a set. These sets averaged ~100 in length, so for this quarter, the context switching overhead is greatly reduced. Signed-off-by: Jeff Dike Cc: Paolo Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/include/tlb.h | 22 +--- arch/um/kernel/skas/include/skas.h | 13 ++- arch/um/kernel/skas/mem_user.c | 112 ++++++++++++++---- arch/um/kernel/skas/tlb.c | 23 ++-- arch/um/kernel/tlb.c | 226 +++++++++++++++++++------------------ arch/um/kernel/tt/tlb.c | 5 +- arch/um/sys-i386/stub.S | 17 +++ arch/um/sys-x86_64/stub.S | 21 ++++ 8 files changed, 273 insertions(+), 166 deletions(-) (limited to 'arch/um') diff --git a/arch/um/include/tlb.h b/arch/um/include/tlb.h index c6f9628..2deefb9 100644 --- a/arch/um/include/tlb.h +++ b/arch/um/include/tlb.h @@ -9,7 +9,7 @@ #include "um_mmu.h" struct host_vm_op { - enum { MMAP, MUNMAP, MPROTECT } type; + enum { NONE, MMAP, MUNMAP, MPROTECT } type; union { struct { unsigned long addr; @@ -38,24 +38,10 @@ extern void mprotect_kernel_vm(int w); extern void force_flush_all(void); extern void fix_range_common(struct mm_struct *mm, unsigned long start_addr, unsigned long end_addr, int force, - void (*do_ops)(union mm_context *, - struct host_vm_op *, int)); + void *(*do_ops)(union mm_context *, + struct host_vm_op *, int, int, + void *)); extern int flush_tlb_kernel_range_common(unsigned long start, unsigned long end); -extern int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, - int r, int w, int x, struct host_vm_op *ops, int index, - int last_filled, union mm_context *mmu, - void (*do_ops)(union mm_context *, struct host_vm_op *, - int)); -extern int add_munmap(unsigned long addr, unsigned long len, - struct host_vm_op *ops, int index, int last_filled, - union mm_context *mmu, - void (*do_ops)(union mm_context *, struct host_vm_op *, - int)); -extern int add_mprotect(unsigned long addr, unsigned long len, int r, int w, - int x, struct host_vm_op *ops, int index, - int last_filled, union mm_context *mmu, - void (*do_ops)(union mm_context *, struct host_vm_op *, - int)); #endif diff --git a/arch/um/kernel/skas/include/skas.h b/arch/um/kernel/skas/include/skas.h index d983ea8..e91064b 100644 --- a/arch/um/kernel/skas/include/skas.h +++ b/arch/um/kernel/skas/include/skas.h @@ -24,11 +24,14 @@ extern void new_thread_proc(void *stack, void (*handler)(int sig)); extern void remove_sigstack(void); extern void new_thread_handler(int sig); extern void handle_syscall(union uml_pt_regs *regs); -extern int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, - int r, int w, int x, int phys_fd, unsigned long long offset); -extern int unmap(struct mm_id * mm_idp, void *addr, unsigned long len); -extern int protect(struct mm_id * mm_idp, unsigned long addr, - unsigned long len, int r, int w, int x); +extern void *map(struct mm_id * mm_idp, unsigned long virt, + unsigned long len, int r, int w, int x, int phys_fd, + unsigned long long offset, int done, void *data); +extern void *unmap(struct mm_id * mm_idp, void *addr, + unsigned long len, int done, void *data); +extern void *protect(struct mm_id * mm_idp, unsigned long addr, + unsigned long len, int r, int w, int x, int done, + void *data); extern void user_signal(int sig, union uml_pt_regs *regs, int pid); extern int new_mm(int from); extern int start_userspace(unsigned long stub_stack); diff --git a/arch/um/kernel/skas/mem_user.c b/arch/um/kernel/skas/mem_user.c index b0980ff..c976320 100644 --- a/arch/um/kernel/skas/mem_user.c +++ b/arch/um/kernel/skas/mem_user.c @@ -25,12 +25,14 @@ #include "sysdep/stub.h" #include "skas.h" -extern unsigned long syscall_stub, __syscall_stub_start; +extern unsigned long syscall_stub, batch_syscall_stub, __syscall_stub_start; extern void wait_stub_done(int pid, int sig, char * fname); -static long run_syscall_stub(struct mm_id * mm_idp, int syscall, - unsigned long *args) +int single_count = 0; + +static long one_syscall_stub(struct mm_id * mm_idp, int syscall, + unsigned long *args) { int n, pid = mm_idp->u.pid; unsigned long regs[MAX_REG_NR]; @@ -49,18 +51,80 @@ static long run_syscall_stub(struct mm_id * mm_idp, int syscall, regs[REGS_SYSCALL_ARG6] = args[5]; n = ptrace_setregs(pid, regs); if(n < 0){ - printk("run_syscall_stub : PTRACE_SETREGS failed, " + printk("one_syscall_stub : PTRACE_SETREGS failed, " + "errno = %d\n", n); + return(n); + } + + wait_stub_done(pid, 0, "one_syscall_stub"); + + return(*((unsigned long *) mm_idp->stack)); +} + +int multi_count = 0; +int multi_op_count = 0; + +static long many_syscall_stub(struct mm_id * mm_idp, int syscall, + unsigned long *args, int done, void **addr_out) +{ + unsigned long regs[MAX_REG_NR], *stack; + int n, pid = mm_idp->u.pid; + + stack = *addr_out; + if(stack == NULL) + stack = (unsigned long *) current_stub_stack(); + *stack++ = syscall; + *stack++ = args[0]; + *stack++ = args[1]; + *stack++ = args[2]; + *stack++ = args[3]; + *stack++ = args[4]; + *stack++ = args[5]; + *stack = 0; + multi_op_count++; + + if(!done && ((((unsigned long) stack) & ~PAGE_MASK) < + PAGE_SIZE - 8 * sizeof(long))){ + *addr_out = stack; + return 0; + } + + multi_count++; + get_safe_registers(regs); + regs[REGS_IP_INDEX] = UML_CONFIG_STUB_CODE + + ((unsigned long) &batch_syscall_stub - + (unsigned long) &__syscall_stub_start); + regs[REGS_SP_INDEX] = UML_CONFIG_STUB_DATA; + + n = ptrace_setregs(pid, regs); + if(n < 0){ + printk("many_syscall_stub : PTRACE_SETREGS failed, " "errno = %d\n", n); return(n); } - wait_stub_done(pid, 0, "run_syscall_stub"); + wait_stub_done(pid, 0, "many_syscall_stub"); + stack = (unsigned long *) mm_idp->stack; - return(*((unsigned long *) mm_idp->stack)); + *addr_out = stack; + return(*stack); } -int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, - int r, int w, int x, int phys_fd, unsigned long long offset) +static long run_syscall_stub(struct mm_id * mm_idp, int syscall, + unsigned long *args, void **addr, int done) +{ + long res; + + if((*addr == NULL) && done) + res = one_syscall_stub(mm_idp, syscall, args); + else res = many_syscall_stub(mm_idp, syscall, args, done, addr); + + return res; +} + +void *map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, + int r, int w, int x, int phys_fd, unsigned long long offset, + int done, void *data) { int prot, n; @@ -70,6 +134,7 @@ int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, if(proc_mm){ struct proc_mm_op map; int fd = mm_idp->u.mm_fd; + map = ((struct proc_mm_op) { .op = MM_MMAP, .u = { .mmap = @@ -91,21 +156,24 @@ int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, MAP_SHARED | MAP_FIXED, phys_fd, MMAP_OFFSET(offset) }; - res = run_syscall_stub(mm_idp, STUB_MMAP_NR, args); + res = run_syscall_stub(mm_idp, STUB_MMAP_NR, args, + &data, done); if((void *) res == MAP_FAILED) printk("mmap stub failed, errno = %d\n", res); } - return 0; + return data; } -int unmap(struct mm_id *mm_idp, void *addr, unsigned long len) +void *unmap(struct mm_id * mm_idp, void *addr, unsigned long len, int done, + void *data) { int n; if(proc_mm){ struct proc_mm_op unmap; int fd = mm_idp->u.mm_fd; + unmap = ((struct proc_mm_op) { .op = MM_MUNMAP, .u = { .munmap = @@ -113,28 +181,25 @@ int unmap(struct mm_id *mm_idp, void *addr, unsigned long len) (unsigned long) addr, .len = len } } } ); n = os_write_file(fd, &unmap, sizeof(unmap)); - if(n != sizeof(unmap)) { - if(n < 0) - return(n); - else if(n > 0) - return(-EIO); - } + if(n != sizeof(unmap)) + printk("unmap - proc_mm write returned %d\n", n); } else { int res; unsigned long args[] = { (unsigned long) addr, len, 0, 0, 0, 0 }; - res = run_syscall_stub(mm_idp, __NR_munmap, args); + res = run_syscall_stub(mm_idp, __NR_munmap, args, + &data, done); if(res < 0) printk("munmap stub failed, errno = %d\n", res); } - return(0); + return data; } -int protect(struct mm_id *mm_idp, unsigned long addr, unsigned long len, - int r, int w, int x) +void *protect(struct mm_id * mm_idp, unsigned long addr, unsigned long len, + int r, int w, int x, int done, void *data) { struct proc_mm_op protect; int prot, n; @@ -160,12 +225,13 @@ int protect(struct mm_id *mm_idp, unsigned long addr, unsigned long len, int res; unsigned long args[] = { addr, len, prot, 0, 0, 0 }; - res = run_syscall_stub(mm_idp, __NR_mprotect, args); + res = run_syscall_stub(mm_idp, __NR_mprotect, args, + &data, done); if(res < 0) panic("mprotect stub failed, errno = %d\n", res); } - return(0); + return data; } void before_mem_skas(unsigned long unused) diff --git a/arch/um/kernel/skas/tlb.c b/arch/um/kernel/skas/tlb.c index 6230999..4b5fd20 100644 --- a/arch/um/kernel/skas/tlb.c +++ b/arch/um/kernel/skas/tlb.c @@ -18,7 +18,8 @@ #include "os.h" #include "tlb.h" -static void do_ops(union mm_context *mmu, struct host_vm_op *ops, int last) +static void *do_ops(union mm_context *mmu, struct host_vm_op *ops, int last, + int finished, void *flush) { struct host_vm_op *op; int i; @@ -27,24 +28,28 @@ static void do_ops(union mm_context *mmu, struct host_vm_op *ops, int last) op = &ops[i]; switch(op->type){ case MMAP: - map(&mmu->skas.id, op->u.mmap.addr, op->u.mmap.len, - op->u.mmap.r, op->u.mmap.w, op->u.mmap.x, - op->u.mmap.fd, op->u.mmap.offset); + flush = map(&mmu->skas.id, op->u.mmap.addr, + op->u.mmap.len, op->u.mmap.r, op->u.mmap.w, + op->u.mmap.x, op->u.mmap.fd, + op->u.mmap.offset, finished, flush); break; case MUNMAP: - unmap(&mmu->skas.id, (void *) op->u.munmap.addr, - op->u.munmap.len); + flush = unmap(&mmu->skas.id, (void *) op->u.munmap.addr, + op->u.munmap.len, finished, flush); break; case MPROTECT: - protect(&mmu->skas.id, op->u.mprotect.addr, - op->u.mprotect.len, op->u.mprotect.r, - op->u.mprotect.w, op->u.mprotect.x); + flush = protect(&mmu->skas.id, op->u.mprotect.addr, + op->u.mprotect.len, op->u.mprotect.r, + op->u.mprotect.w, op->u.mprotect.x, + finished, flush); break; default: printk("Unknown op type %d in do_ops\n", op->type); break; } } + + return flush; } extern int proc_mm; diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 83ec8d47..7d914bb 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -15,12 +15,116 @@ #include "mem_user.h" #include "os.h" +static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, + int r, int w, int x, struct host_vm_op *ops, int index, + int last_filled, union mm_context *mmu, void **flush, + void *(*do_ops)(union mm_context *, struct host_vm_op *, + int, int, void *)) +{ + __u64 offset; + struct host_vm_op *last; + int fd; + + fd = phys_mapping(phys, &offset); + if(index != -1){ + last = &ops[index]; + if((last->type == MMAP) && + (last->u.mmap.addr + last->u.mmap.len == virt) && + (last->u.mmap.r == r) && (last->u.mmap.w == w) && + (last->u.mmap.x == x) && (last->u.mmap.fd == fd) && + (last->u.mmap.offset + last->u.mmap.len == offset)){ + last->u.mmap.len += len; + return index; + } + } + + if(index == last_filled){ + *flush = (*do_ops)(mmu, ops, last_filled, 0, *flush); + index = -1; + } + + ops[++index] = ((struct host_vm_op) { .type = MMAP, + .u = { .mmap = { + .addr = virt, + .len = len, + .r = r, + .w = w, + .x = x, + .fd = fd, + .offset = offset } + } }); + return index; +} + +static int add_munmap(unsigned long addr, unsigned long len, + struct host_vm_op *ops, int index, int last_filled, + union mm_context *mmu, void **flush, + void *(*do_ops)(union mm_context *, struct host_vm_op *, + int, int, void *)) +{ + struct host_vm_op *last; + + if(index != -1){ + last = &ops[index]; + if((last->type == MUNMAP) && + (last->u.munmap.addr + last->u.mmap.len == addr)){ + last->u.munmap.len += len; + return index; + } + } + + if(index == last_filled){ + *flush = (*do_ops)(mmu, ops, last_filled, 0, *flush); + index = -1; + } + + ops[++index] = ((struct host_vm_op) { .type = MUNMAP, + .u = { .munmap = { + .addr = addr, + .len = len } } }); + return index; +} + +static int add_mprotect(unsigned long addr, unsigned long len, int r, int w, + int x, struct host_vm_op *ops, int index, + int last_filled, union mm_context *mmu, void **flush, + void *(*do_ops)(union mm_context *, + struct host_vm_op *, int, int, void *)) +{ + struct host_vm_op *last; + + if(index != -1){ + last = &ops[index]; + if((last->type == MPROTECT) && + (last->u.mprotect.addr + last->u.mprotect.len == addr) && + (last->u.mprotect.r == r) && (last->u.mprotect.w == w) && + (last->u.mprotect.x == x)){ + last->u.mprotect.len += len; + return index; + } + } + + if(index == last_filled){ + *flush = (*do_ops)(mmu, ops, last_filled, 0, *flush); + index = -1; + } + + ops[++index] = ((struct host_vm_op) { .type = MPROTECT, + .u = { .mprotect = { + .addr = addr, + .len = len, + .r = r, + .w = w, + .x = x } } }); + return index; +} + #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1)) void fix_range_common(struct mm_struct *mm, unsigned long start_addr, unsigned long end_addr, int force, - void (*do_ops)(union mm_context *, struct host_vm_op *, - int)) + void *(*do_ops)(union mm_context *, struct host_vm_op *, + int, int, void *)) { pgd_t *npgd; pud_t *npud; @@ -29,11 +133,13 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, union mm_context *mmu = &mm->context; unsigned long addr, end; int r, w, x; - struct host_vm_op ops[16]; + struct host_vm_op ops[1]; + void *flush = NULL; int op_index = -1, last_op = sizeof(ops) / sizeof(ops[0]) - 1; if(mm == NULL) return; + ops[0].type = NONE; for(addr = start_addr; addr < end_addr;){ npgd = pgd_offset(mm, addr); if(!pgd_present(*npgd)){ @@ -43,7 +149,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, if(force || pgd_newpage(*npgd)){ op_index = add_munmap(addr, end - addr, ops, op_index, last_op, mmu, - do_ops); + &flush, do_ops); pgd_mkuptodate(*npgd); } addr = end; @@ -58,7 +164,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, if(force || pud_newpage(*npud)){ op_index = add_munmap(addr, end - addr, ops, op_index, last_op, mmu, - do_ops); + &flush, do_ops); pud_mkuptodate(*npud); } addr = end; @@ -73,7 +179,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, if(force || pmd_newpage(*npmd)){ op_index = add_munmap(addr, end - addr, ops, op_index, last_op, mmu, - do_ops); + &flush, do_ops); pmd_mkuptodate(*npmd); } addr = end; @@ -96,20 +202,20 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, pte_val(*npte) & PAGE_MASK, PAGE_SIZE, r, w, x, ops, op_index, last_op, mmu, - do_ops); + &flush, do_ops); else op_index = add_munmap(addr, PAGE_SIZE, ops, op_index, last_op, mmu, - do_ops); + &flush, do_ops); } else if(pte_newprot(*npte)) op_index = add_mprotect(addr, PAGE_SIZE, r, w, x, ops, op_index, last_op, mmu, - do_ops); + &flush, do_ops); *npte = pte_mkuptodate(*npte); addr += PAGE_SIZE; } - (*do_ops)(mmu, ops, op_index); + flush = (*do_ops)(mmu, ops, op_index, 1, flush); } int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) @@ -226,106 +332,6 @@ pte_t *addr_pte(struct task_struct *task, unsigned long addr) return(pte_offset_map(pmd, addr)); } -int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, - int r, int w, int x, struct host_vm_op *ops, int index, - int last_filled, union mm_context *mmu, - void (*do_ops)(union mm_context *, struct host_vm_op *, int)) -{ - __u64 offset; - struct host_vm_op *last; - int fd; - - fd = phys_mapping(phys, &offset); - if(index != -1){ - last = &ops[index]; - if((last->type == MMAP) && - (last->u.mmap.addr + last->u.mmap.len == virt) && - (last->u.mmap.r == r) && (last->u.mmap.w == w) && - (last->u.mmap.x == x) && (last->u.mmap.fd == fd) && - (last->u.mmap.offset + last->u.mmap.len == offset)){ - last->u.mmap.len += len; - return(index); - } - } - - if(index == last_filled){ - (*do_ops)(mmu, ops, last_filled); - index = -1; - } - - ops[++index] = ((struct host_vm_op) { .type = MMAP, - .u = { .mmap = { - .addr = virt, - .len = len, - .r = r, - .w = w, - .x = x, - .fd = fd, - .offset = offset } - } }); - return(index); -} - -int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *ops, - int index, int last_filled, union mm_context *mmu, - void (*do_ops)(union mm_context *, struct host_vm_op *, int)) -{ - struct host_vm_op *last; - - if(index != -1){ - last = &ops[index]; - if((last->type == MUNMAP) && - (last->u.munmap.addr + last->u.mmap.len == addr)){ - last->u.munmap.len += len; - return(index); - } - } - - if(index == last_filled){ - (*do_ops)(mmu, ops, last_filled); - index = -1; - } - - ops[++index] = ((struct host_vm_op) { .type = MUNMAP, - .u = { .munmap = { - .addr = addr, - .len = len } } }); - return(index); -} - -int add_mprotect(unsigned long addr, unsigned long len, int r, int w, int x, - struct host_vm_op *ops, int index, int last_filled, - union mm_context *mmu, - void (*do_ops)(union mm_context *, struct host_vm_op *, int)) -{ - struct host_vm_op *last; - - if(index != -1){ - last = &ops[index]; - if((last->type == MPROTECT) && - (last->u.mprotect.addr + last->u.mprotect.len == addr) && - (last->u.mprotect.r == r) && (last->u.mprotect.w == w) && - (last->u.mprotect.x == x)){ - last->u.mprotect.len += len; - return(index); - } - } - - if(index == last_filled){ - (*do_ops)(mmu, ops, last_filled); - index = -1; - } - - ops[++index] = ((struct host_vm_op) { .type = MPROTECT, - .u = { .mprotect = { - .addr = addr, - .len = len, - .r = r, - .w = w, - .x = x } } }); - return(index); -} - void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) { address &= PAGE_MASK; diff --git a/arch/um/kernel/tt/tlb.c b/arch/um/kernel/tt/tlb.c index 2eefb43..16fc6a2 100644 --- a/arch/um/kernel/tt/tlb.c +++ b/arch/um/kernel/tt/tlb.c @@ -17,7 +17,8 @@ #include "os.h" #include "tlb.h" -static void do_ops(union mm_context *mmu, struct host_vm_op *ops, int last) +static void *do_ops(union mm_context *mmu, struct host_vm_op *ops, int last, + int finished, void *flush) { struct host_vm_op *op; int i; @@ -45,6 +46,8 @@ static void do_ops(union mm_context *mmu, struct host_vm_op *ops, int last) break; } } + + return NULL; } static void fix_range(struct mm_struct *mm, unsigned long start_addr, diff --git a/arch/um/sys-i386/stub.S b/arch/um/sys-i386/stub.S index 2f2c70a..a0f9506 100644 --- a/arch/um/sys-i386/stub.S +++ b/arch/um/sys-i386/stub.S @@ -6,3 +6,20 @@ syscall_stub: int $0x80 mov %eax, UML_CONFIG_STUB_DATA int3 + + .globl batch_syscall_stub +batch_syscall_stub: + mov $UML_CONFIG_STUB_DATA, %esp +again: pop %eax + cmpl $0, %eax + jz done + pop %ebx + pop %ecx + pop %edx + pop %esi + pop %edi + pop %ebp + int $0x80 + mov %eax, UML_CONFIG_STUB_DATA + jmp again +done: int3 diff --git a/arch/um/sys-x86_64/stub.S b/arch/um/sys-x86_64/stub.S index 31c1492..957f2ef 100644 --- a/arch/um/sys-x86_64/stub.S +++ b/arch/um/sys-x86_64/stub.S @@ -13,3 +13,24 @@ syscall_stub: or %rcx, %rbx movq %rax, (%rbx) int3 + + .globl batch_syscall_stub +batch_syscall_stub: + movq $(UML_CONFIG_STUB_DATA >> 32), %rbx + salq $32, %rbx + movq $(UML_CONFIG_STUB_DATA & 0xffffffff), %rcx + or %rcx, %rbx + movq %rbx, %rsp +again: pop %rax + cmpq $0, %rax +jz done + pop %rdi + pop %rsi + pop %rdx + pop %r10 + pop %r8 + pop %r9 + syscall + mov %rax, (%rbx) + jmp again +done: int3 -- cgit v1.1 From e32dacb9f481fd6decb41adb28e720c923d34f54 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Sat, 3 Sep 2005 15:57:42 -0700 Subject: [PATCH] uml: system call path cleanup This merges two sets of files which had no business being split apart in the first place. Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/include/syscall.h | 12 ++++++++ arch/um/include/syscall_user.h | 23 --------------- arch/um/include/sysdep-i386/syscalls.h | 2 ++ arch/um/include/sysdep-x86_64/syscalls.h | 2 ++ arch/um/kernel/Makefile | 2 +- arch/um/kernel/skas/Makefile | 2 +- arch/um/kernel/skas/syscall.c | 50 ++++++++++++++++++++++++++++++++ arch/um/kernel/skas/syscall_kern.c | 43 --------------------------- arch/um/kernel/skas/syscall_user.c | 44 ---------------------------- arch/um/kernel/syscall.c | 36 +++++++++++++++++++++++ arch/um/kernel/syscall_user.c | 48 ------------------------------ arch/um/kernel/tt/syscall_kern.c | 47 ++++++++++++++++-------------- arch/um/kernel/tt/syscall_user.c | 35 +--------------------- 13 files changed, 131 insertions(+), 215 deletions(-) create mode 100644 arch/um/include/syscall.h delete mode 100644 arch/um/include/syscall_user.h create mode 100644 arch/um/kernel/skas/syscall.c delete mode 100644 arch/um/kernel/skas/syscall_kern.c delete mode 100644 arch/um/kernel/skas/syscall_user.c create mode 100644 arch/um/kernel/syscall.c delete mode 100644 arch/um/kernel/syscall_user.c (limited to 'arch/um') diff --git a/arch/um/include/syscall.h b/arch/um/include/syscall.h new file mode 100644 index 0000000..dda1df9 --- /dev/null +++ b/arch/um/include/syscall.h @@ -0,0 +1,12 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYSCALL_USER_H +#define __SYSCALL_USER_H + +extern int record_syscall_start(int syscall); +extern void record_syscall_end(int index, long result); + +#endif diff --git a/arch/um/include/syscall_user.h b/arch/um/include/syscall_user.h deleted file mode 100644 index 811d0ec..0000000 --- a/arch/um/include/syscall_user.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#ifndef __SYSCALL_USER_H -#define __SYSCALL_USER_H - -extern int record_syscall_start(int syscall); -extern void record_syscall_end(int index, long result); - -#endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/include/sysdep-i386/syscalls.h b/arch/um/include/sysdep-i386/syscalls.h index be0a3e3..a0d5b74 100644 --- a/arch/um/include/sysdep-i386/syscalls.h +++ b/arch/um/include/sysdep-i386/syscalls.h @@ -16,6 +16,8 @@ extern syscall_handler_t sys_rt_sigaction; extern syscall_handler_t old_mmap_i386; +extern syscall_handler_t *sys_call_table[]; + #define EXECUTE_SYSCALL(syscall, regs) \ ((long (*)(struct syscall_args)) (*sys_call_table[syscall]))(SYSCALL_ARGS(®s->regs)) diff --git a/arch/um/include/sysdep-x86_64/syscalls.h b/arch/um/include/sysdep-x86_64/syscalls.h index 67923cc..e06f83e 100644 --- a/arch/um/include/sysdep-x86_64/syscalls.h +++ b/arch/um/include/sysdep-x86_64/syscalls.h @@ -14,6 +14,8 @@ typedef long syscall_handler_t(void); extern syscall_handler_t *ia32_sys_call_table[]; +extern syscall_handler_t *sys_call_table[]; + #define EXECUTE_SYSCALL(syscall, regs) \ (((long (*)(long, long, long, long, long, long)) \ (*sys_call_table[syscall]))(UPT_SYSCALL_ARG1(®s->regs), \ diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index a8918e80..a9cdd00 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -18,7 +18,7 @@ obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o obj-$(CONFIG_GPROF) += gprof_syms.o obj-$(CONFIG_GCOV) += gmon_syms.o obj-$(CONFIG_TTY_LOG) += tty_log.o -obj-$(CONFIG_SYSCALL_DEBUG) += syscall_user.o +obj-$(CONFIG_SYSCALL_DEBUG) += syscall.o obj-$(CONFIG_MODE_TT) += tt/ obj-$(CONFIG_MODE_SKAS) += skas/ diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile index d296d55..db36c7c 100644 --- a/arch/um/kernel/skas/Makefile +++ b/arch/um/kernel/skas/Makefile @@ -4,7 +4,7 @@ # obj-y := clone.o exec_kern.o mem.o mem_user.o mmu.o process.o process_kern.o \ - syscall_kern.o syscall_user.o tlb.o trap_user.o uaccess.o \ + syscall.o tlb.o trap_user.o uaccess.o subdir- := util diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c new file mode 100644 index 0000000..51fb940 --- /dev/null +++ b/arch/um/kernel/skas/syscall.c @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/sys.h" +#include "linux/ptrace.h" +#include "asm/errno.h" +#include "asm/unistd.h" +#include "asm/ptrace.h" +#include "asm/current.h" +#include "sysdep/syscalls.h" +#include "kern_util.h" +#include "syscall.h" + +void handle_syscall(union uml_pt_regs *r) +{ + struct pt_regs *regs = container_of(r, struct pt_regs, regs); + long result; + int syscall; +#ifdef UML_CONFIG_SYSCALL_DEBUG + int index; + + index = record_syscall_start(UPT_SYSCALL_NR(r)); +#endif + syscall_trace(r, 0); + + current->thread.nsyscalls++; + nsyscalls++; + + /* This should go in the declaration of syscall, but when I do that, + * strace -f -c bash -c 'ls ; ls' breaks, sometimes not tracing + * children at all, sometimes hanging when bash doesn't see the first + * ls exit. + * The assembly looks functionally the same to me. This is + * gcc version 4.0.1 20050727 (Red Hat 4.0.1-5) + * in case it's a compiler bug. + */ + syscall = UPT_SYSCALL_NR(r); + if((syscall >= NR_syscalls) || (syscall < 0)) + result = -ENOSYS; + else result = EXECUTE_SYSCALL(syscall, regs); + + REGS_SET_SYSCALL_RETURN(r->skas.regs, result); + + syscall_trace(r, 1); +#ifdef UML_CONFIG_SYSCALL_DEBUG + record_syscall_end(index, result); +#endif +} diff --git a/arch/um/kernel/skas/syscall_kern.c b/arch/um/kernel/skas/syscall_kern.c deleted file mode 100644 index bdf040c..0000000 --- a/arch/um/kernel/skas/syscall_kern.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) - * Licensed under the GPL - */ - -#include "linux/sys.h" -#include "linux/ptrace.h" -#include "asm/errno.h" -#include "asm/unistd.h" -#include "asm/ptrace.h" -#include "asm/current.h" -#include "sysdep/syscalls.h" -#include "kern_util.h" - -extern syscall_handler_t *sys_call_table[]; - -long execute_syscall_skas(void *r) -{ - struct pt_regs *regs = r; - long res; - int syscall; - - current->thread.nsyscalls++; - nsyscalls++; - syscall = UPT_SYSCALL_NR(®s->regs); - - if((syscall >= NR_syscalls) || (syscall < 0)) - res = -ENOSYS; - else res = EXECUTE_SYSCALL(syscall, regs); - - return(res); -} - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/skas/syscall_user.c b/arch/um/kernel/skas/syscall_user.c deleted file mode 100644 index 6b06649..0000000 --- a/arch/um/kernel/skas/syscall_user.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#include -#include -#include "kern_util.h" -#include "uml-config.h" -#include "syscall_user.h" -#include "sysdep/ptrace.h" -#include "sysdep/sigcontext.h" -#include "skas.h" - -void handle_syscall(union uml_pt_regs *regs) -{ - long result; -#ifdef UML_CONFIG_SYSCALL_DEBUG - int index; - - index = record_syscall_start(UPT_SYSCALL_NR(regs)); -#endif - - syscall_trace(regs, 0); - result = execute_syscall_skas(regs); - - REGS_SET_SYSCALL_RETURN(regs->skas.regs, result); - - syscall_trace(regs, 1); -#ifdef UML_CONFIG_SYSCALL_DEBUG - record_syscall_end(index, result); -#endif -} - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c new file mode 100644 index 0000000..1429c13 --- /dev/null +++ b/arch/um/kernel/syscall.c @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "kern_util.h" +#include "syscall.h" +#include "os.h" + +struct { + int syscall; + int pid; + long result; + unsigned long long start; + unsigned long long end; +} syscall_record[1024]; + +int record_syscall_start(int syscall) +{ + int max, index; + + max = sizeof(syscall_record)/sizeof(syscall_record[0]); + index = next_syscall_index(max); + + syscall_record[index].syscall = syscall; + syscall_record[index].pid = current_pid(); + syscall_record[index].result = 0xdeadbeef; + syscall_record[index].start = os_usecs(); + return(index); +} + +void record_syscall_end(int index, long result) +{ + syscall_record[index].result = result; + syscall_record[index].end = os_usecs(); +} diff --git a/arch/um/kernel/syscall_user.c b/arch/um/kernel/syscall_user.c deleted file mode 100644 index 01b711e..0000000 --- a/arch/um/kernel/syscall_user.c +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#include -#include -#include "kern_util.h" -#include "syscall_user.h" - -struct { - int syscall; - int pid; - long result; - struct timeval start; - struct timeval end; -} syscall_record[1024]; - -int record_syscall_start(int syscall) -{ - int max, index; - - max = sizeof(syscall_record)/sizeof(syscall_record[0]); - index = next_syscall_index(max); - - syscall_record[index].syscall = syscall; - syscall_record[index].pid = current_pid(); - syscall_record[index].result = 0xdeadbeef; - gettimeofday(&syscall_record[index].start, NULL); - return(index); -} - -void record_syscall_end(int index, long result) -{ - syscall_record[index].result = result; - gettimeofday(&syscall_record[index].end, NULL); -} - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/tt/syscall_kern.c b/arch/um/kernel/tt/syscall_kern.c index 2650a62..3d29c90 100644 --- a/arch/um/kernel/tt/syscall_kern.c +++ b/arch/um/kernel/tt/syscall_kern.c @@ -12,36 +12,41 @@ #include "asm/uaccess.h" #include "asm/stat.h" #include "sysdep/syscalls.h" +#include "sysdep/sigcontext.h" #include "kern_util.h" +#include "syscall.h" -extern syscall_handler_t *sys_call_table[]; - -long execute_syscall_tt(void *r) +void syscall_handler_tt(int sig, struct pt_regs *regs) { - struct pt_regs *regs = r; - long res; + void *sc; + long result; int syscall; - #ifdef CONFIG_SYSCALL_DEBUG + int index; + index = record_syscall_start(syscall); +#endif + sc = UPT_SC(®s->regs); + SC_START_SYSCALL(sc); + + syscall_trace(®s->regs, 0); + current->thread.nsyscalls++; nsyscalls++; -#endif syscall = UPT_SYSCALL_NR(®s->regs); if((syscall >= NR_syscalls) || (syscall < 0)) - res = -ENOSYS; - else res = EXECUTE_SYSCALL(syscall, regs); + result = -ENOSYS; + else result = EXECUTE_SYSCALL(syscall, regs); - return(res); -} + /* regs->sc may have changed while the system call ran (there may + * have been an interrupt or segfault), so it needs to be refreshed. + */ + UPT_SC(®s->regs) = sc; -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ + SC_SET_SYSCALL_RETURN(sc, result); + + syscall_trace(®s->regs, 1); +#ifdef CONFIG_SYSCALL_DEBUG + record_syscall_end(index, result); +#endif +} diff --git a/arch/um/kernel/tt/syscall_user.c b/arch/um/kernel/tt/syscall_user.c index b218316..902987b 100644 --- a/arch/um/kernel/tt/syscall_user.c +++ b/arch/um/kernel/tt/syscall_user.c @@ -13,42 +13,9 @@ #include "task.h" #include "user_util.h" #include "kern_util.h" -#include "syscall_user.h" +#include "syscall.h" #include "tt.h" - -void syscall_handler_tt(int sig, union uml_pt_regs *regs) -{ - void *sc; - long result; - int syscall; -#ifdef UML_CONFIG_DEBUG_SYSCALL - int index; -#endif - - syscall = UPT_SYSCALL_NR(regs); - sc = UPT_SC(regs); - SC_START_SYSCALL(sc); - -#ifdef UML_CONFIG_DEBUG_SYSCALL - index = record_syscall_start(syscall); -#endif - syscall_trace(regs, 0); - result = execute_syscall_tt(regs); - - /* regs->sc may have changed while the system call ran (there may - * have been an interrupt or segfault), so it needs to be refreshed. - */ - UPT_SC(regs) = sc; - - SC_SET_SYSCALL_RETURN(sc, result); - - syscall_trace(regs, 1); -#ifdef UML_CONFIG_DEBUG_SYSCALL - record_syscall_end(index, result); -#endif -} - void do_sigtrap(void *task) { UPT_SYSCALL_NR(TASK_REGS(task)) = -1; -- cgit v1.1 From 30f7dabb083f8ff4ce541b5ac4e5d70cc173051a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 3 Sep 2005 15:57:43 -0700 Subject: [PATCH] uml: build cleanups Added missing include list to uml AFLAGS Killed magic for stubs. [So] - it was needed only because of messed AFLAGS Switched segv_stubs.c to kernel CFLAGS sans profile, instead of user ones Killed STUBS_CFLAGS - it's not needed and the only remaining use had been gratitious - it only polluted CFLAGS Signed-off-by: Al Viro Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/Makefile-x86_64 | 2 +- arch/um/sys-i386/Makefile | 6 +----- arch/um/sys-i386/stub_segv.c | 3 +-- arch/um/sys-x86_64/Makefile | 6 +----- arch/um/sys-x86_64/stub_segv.c | 3 ++- 5 files changed, 6 insertions(+), 14 deletions(-) (limited to 'arch/um') diff --git a/arch/um/Makefile-x86_64 b/arch/um/Makefile-x86_64 index aa2f717..baddb5d 100644 --- a/arch/um/Makefile-x86_64 +++ b/arch/um/Makefile-x86_64 @@ -6,7 +6,7 @@ START := 0x60000000 #We #undef __x86_64__ for kernelspace, not for userspace where #it's needed for headers to work! -CFLAGS += -U__$(SUBARCH)__ -fno-builtin $(STUB_CFLAGS) +CFLAGS += -U__$(SUBARCH)__ -fno-builtin USER_CFLAGS += -fno-builtin ELF_ARCH := i386:x86-64 diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile index cdf6b6d..4ca2a22 100644 --- a/arch/um/sys-i386/Makefile +++ b/arch/um/sys-i386/Makefile @@ -16,11 +16,7 @@ semaphore.c-dir = kernel highmem.c-dir = mm module.c-dir = kernel -STUB_CFLAGS = -Wp,-MD,$(depfile) $(call unprofile,$(USER_CFLAGS)) - -# _cflags works with kernel files, not with userspace ones, but c_flags does, -# why ask why? -$(obj)/stub_segv.o : c_flags = $(STUB_CFLAGS) +$(obj)/stub_segv.o : _c_flags = $(call unprofile,$(CFLAGS)) subdir- := util diff --git a/arch/um/sys-i386/stub_segv.c b/arch/um/sys-i386/stub_segv.c index 68aeabe..1e88b27 100644 --- a/arch/um/sys-i386/stub_segv.c +++ b/arch/um/sys-i386/stub_segv.c @@ -3,8 +3,7 @@ * Licensed under the GPL */ -#include -#include +#include #include #include "uml-config.h" #include "sysdep/sigcontext.h" diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile index 32bb7d8..f0ab574d 100644 --- a/arch/um/sys-x86_64/Makefile +++ b/arch/um/sys-x86_64/Makefile @@ -27,11 +27,7 @@ memcpy.S-dir = lib thunk.S-dir = lib module.c-dir = kernel -STUB_CFLAGS = -Wp,-MD,$(depfile) $(call unprofile,$(USER_CFLAGS)) - -# _cflags works with kernel files, not with userspace ones, but c_flags does, -# why ask why? -$(obj)/stub_segv.o : c_flags = $(STUB_CFLAGS) +$(obj)/stub_segv.o: _c_flags = $(call unprofile,$(CFLAGS)) subdir- := util diff --git a/arch/um/sys-x86_64/stub_segv.c b/arch/um/sys-x86_64/stub_segv.c index 161d1fe..65a131b 100644 --- a/arch/um/sys-x86_64/stub_segv.c +++ b/arch/um/sys-x86_64/stub_segv.c @@ -3,9 +3,10 @@ * Licensed under the GPL */ -#include +#include #include #include +#include #include "uml-config.h" #include "sysdep/sigcontext.h" #include "sysdep/faultinfo.h" -- cgit v1.1 From 75e5584c89d213d6089f64f22cd899fb172e4c95 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Sat, 3 Sep 2005 15:57:45 -0700 Subject: [PATCH] uml: use host AIO support This patch makes UML use host AIO support when it (and /usr/include/linux/aio_abi.h) are present. This is only the support, with no consumers - a consumer is coming in the next patch. Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/include/aio.h | 28 ++++ arch/um/include/init.h | 10 +- arch/um/include/irq_kern.h | 3 + arch/um/kernel/irq.c | 41 +++-- arch/um/os-Linux/Makefile | 10 +- arch/um/os-Linux/aio.c | 398 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 475 insertions(+), 15 deletions(-) create mode 100644 arch/um/include/aio.h create mode 100644 arch/um/os-Linux/aio.c (limited to 'arch/um') diff --git a/arch/um/include/aio.h b/arch/um/include/aio.h new file mode 100644 index 0000000..423bae91 --- /dev/null +++ b/arch/um/include/aio.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2004 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef AIO_H__ +#define AIO_H__ + +enum aio_type { AIO_READ, AIO_WRITE, AIO_MMAP }; + +struct aio_thread_reply { + void *data; + int err; +}; + +struct aio_context { + int reply_fd; + struct aio_context *next; +}; + +#define INIT_AIO_CONTEXT { .reply_fd = -1, \ + .next = NULL } + +extern int submit_aio(enum aio_type type, int fd, char *buf, int len, + unsigned long long offset, int reply_fd, + struct aio_context *aio); + +#endif diff --git a/arch/um/include/init.h b/arch/um/include/init.h index 55c2693..cbd79a8 100644 --- a/arch/um/include/init.h +++ b/arch/um/include/init.h @@ -111,7 +111,15 @@ extern struct uml_param __uml_setup_start, __uml_setup_end; #ifndef __KERNEL__ -#define __initcall(fn) static initcall_t __initcall_##fn __init_call = fn +#define __define_initcall(level,fn) \ + static initcall_t __initcall_##fn __attribute_used__ \ + __attribute__((__section__(".initcall" level ".init"))) = fn + +/* Userspace initcalls shouldn't depend on anything in the kernel, so we'll + * make them run first. + */ +#define __initcall(fn) __define_initcall("1", fn) + #define __exitcall(fn) static exitcall_t __exitcall_##fn __exit_call = fn #define __init_call __attribute__ ((unused,__section__ (".initcall.init"))) diff --git a/arch/um/include/irq_kern.h b/arch/um/include/irq_kern.h index 3af52a6..c222d56 100644 --- a/arch/um/include/irq_kern.h +++ b/arch/um/include/irq_kern.h @@ -7,12 +7,15 @@ #define __IRQ_KERN_H__ #include "linux/interrupt.h" +#include "asm/ptrace.h" extern int um_request_irq(unsigned int irq, int fd, int type, irqreturn_t (*handler)(int, void *, struct pt_regs *), unsigned long irqflags, const char * devname, void *dev_id); +extern int init_aio_irq(int irq, char *name, + irqreturn_t (*handler)(int, void *, struct pt_regs *)); #endif diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 9f18061..dcd8149 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -31,7 +31,7 @@ #include "kern_util.h" #include "irq_user.h" #include "irq_kern.h" - +#include "os.h" /* * Generic, controller-independent functions: @@ -168,13 +168,32 @@ void __init init_IRQ(void) } } -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ +int init_aio_irq(int irq, char *name, irqreturn_t (*handler)(int, void *, + struct pt_regs *)) +{ + int fds[2], err; + + err = os_pipe(fds, 1, 1); + if(err){ + printk("init_aio_irq - os_pipe failed, err = %d\n", -err); + goto out; + } + + err = um_request_irq(irq, fds[0], IRQ_READ, handler, + SA_INTERRUPT | SA_SAMPLE_RANDOM, name, + (void *) (long) fds[0]); + if(err){ + printk("init_aio_irq - : um_request_irq failed, err = %d\n", + err); + goto out_close; + } + + err = fds[1]; + goto out; + + out_close: + os_close_file(fds[0]); + os_close_file(fds[1]); + out: + return(err); +} diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile index 4ddf540..351d969 100644 --- a/arch/um/os-Linux/Makefile +++ b/arch/um/os-Linux/Makefile @@ -3,11 +3,15 @@ # Licensed under the GPL # -obj-y = elf_aux.o file.o process.o signal.o time.o tty.o user_syms.o drivers/ \ - sys-$(SUBARCH)/ +obj-y = aio.o elf_aux.o file.o process.o signal.o time.o tty.o user_syms.o \ + drivers/ sys-$(SUBARCH)/ -USER_OBJS := elf_aux.o file.o process.o signal.o time.o tty.o +USER_OBJS := aio.o elf_aux.o file.o process.o signal.o time.o tty.o CFLAGS_user_syms.o += -DSUBARCH_$(SUBARCH) +HAVE_AIO_ABI := $(shell [ -r /usr/include/linux/aio_abi.h ] && \ + echo -DHAVE_AIO_ABI ) +CFLAGS_aio.o += $(HAVE_AIO_ABI) + include arch/um/scripts/Makefile.rules diff --git a/arch/um/os-Linux/aio.c b/arch/um/os-Linux/aio.c new file mode 100644 index 0000000..f2ca299 --- /dev/null +++ b/arch/um/os-Linux/aio.c @@ -0,0 +1,398 @@ +/* + * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include "os.h" +#include "helper.h" +#include "aio.h" +#include "init.h" +#include "user.h" +#include "mode.h" + +struct aio_thread_req { + enum aio_type type; + int io_fd; + unsigned long long offset; + char *buf; + int len; + struct aio_context *aio; +}; + +static int aio_req_fd_r = -1; +static int aio_req_fd_w = -1; + +#if defined(HAVE_AIO_ABI) +#include + +/* If we have the headers, we are going to build with AIO enabled. + * If we don't have aio in libc, we define the necessary stubs here. + */ + +#if !defined(HAVE_AIO_LIBC) + +static long io_setup(int n, aio_context_t *ctxp) +{ + return syscall(__NR_io_setup, n, ctxp); +} + +static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp) +{ + return syscall(__NR_io_submit, ctx, nr, iocbpp); +} + +static long io_getevents(aio_context_t ctx_id, long min_nr, long nr, + struct io_event *events, struct timespec *timeout) +{ + return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout); +} + +#endif + +/* The AIO_MMAP cases force the mmapped page into memory here + * rather than in whatever place first touches the data. I used + * to do this by touching the page, but that's delicate because + * gcc is prone to optimizing that away. So, what's done here + * is we read from the descriptor from which the page was + * mapped. The caller is required to pass an offset which is + * inside the page that was mapped. Thus, when the read + * returns, we know that the page is in the page cache, and + * that it now backs the mmapped area. + */ + +static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf, + int len, unsigned long long offset, struct aio_context *aio) +{ + struct iocb iocb, *iocbp = &iocb; + char c; + int err; + + iocb = ((struct iocb) { .aio_data = (unsigned long) aio, + .aio_reqprio = 0, + .aio_fildes = fd, + .aio_buf = (unsigned long) buf, + .aio_nbytes = len, + .aio_offset = offset, + .aio_reserved1 = 0, + .aio_reserved2 = 0, + .aio_reserved3 = 0 }); + + switch(type){ + case AIO_READ: + iocb.aio_lio_opcode = IOCB_CMD_PREAD; + err = io_submit(ctx, 1, &iocbp); + break; + case AIO_WRITE: + iocb.aio_lio_opcode = IOCB_CMD_PWRITE; + err = io_submit(ctx, 1, &iocbp); + break; + case AIO_MMAP: + iocb.aio_lio_opcode = IOCB_CMD_PREAD; + iocb.aio_buf = (unsigned long) &c; + iocb.aio_nbytes = sizeof(c); + err = io_submit(ctx, 1, &iocbp); + break; + default: + printk("Bogus op in do_aio - %d\n", type); + err = -EINVAL; + break; + } + if(err > 0) + err = 0; + + return err; +} + +static aio_context_t ctx = 0; + +static int aio_thread(void *arg) +{ + struct aio_thread_reply reply; + struct io_event event; + int err, n, reply_fd; + + signal(SIGWINCH, SIG_IGN); + + while(1){ + n = io_getevents(ctx, 1, 1, &event, NULL); + if(n < 0){ + if(errno == EINTR) + continue; + printk("aio_thread - io_getevents failed, " + "errno = %d\n", errno); + } + else { + reply = ((struct aio_thread_reply) + { .data = (void *) (long) event.data, + .err = event.res }); + reply_fd = ((struct aio_context *) reply.data)->reply_fd; + err = os_write_file(reply_fd, &reply, sizeof(reply)); + if(err != sizeof(reply)) + printk("not_aio_thread - write failed, " + "fd = %d, err = %d\n", + aio_req_fd_r, -err); + } + } + return 0; +} + +#endif + +static int do_not_aio(struct aio_thread_req *req) +{ + char c; + int err; + + switch(req->type){ + case AIO_READ: + err = os_seek_file(req->io_fd, req->offset); + if(err) + goto out; + + err = os_read_file(req->io_fd, req->buf, req->len); + break; + case AIO_WRITE: + err = os_seek_file(req->io_fd, req->offset); + if(err) + goto out; + + err = os_write_file(req->io_fd, req->buf, req->len); + break; + case AIO_MMAP: + err = os_seek_file(req->io_fd, req->offset); + if(err) + goto out; + + err = os_read_file(req->io_fd, &c, sizeof(c)); + break; + default: + printk("do_not_aio - bad request type : %d\n", req->type); + err = -EINVAL; + break; + } + + out: + return err; +} + +static int not_aio_thread(void *arg) +{ + struct aio_thread_req req; + struct aio_thread_reply reply; + int err; + + signal(SIGWINCH, SIG_IGN); + while(1){ + err = os_read_file(aio_req_fd_r, &req, sizeof(req)); + if(err != sizeof(req)){ + if(err < 0) + printk("not_aio_thread - read failed, " + "fd = %d, err = %d\n", aio_req_fd_r, + -err); + else { + printk("not_aio_thread - short read, fd = %d, " + "length = %d\n", aio_req_fd_r, err); + } + continue; + } + err = do_not_aio(&req); + reply = ((struct aio_thread_reply) { .data = req.aio, + .err = err }); + err = os_write_file(req.aio->reply_fd, &reply, sizeof(reply)); + if(err != sizeof(reply)) + printk("not_aio_thread - write failed, fd = %d, " + "err = %d\n", aio_req_fd_r, -err); + } +} + +static int aio_pid = -1; + +static int init_aio_24(void) +{ + unsigned long stack; + int fds[2], err; + + err = os_pipe(fds, 1, 1); + if(err) + goto out; + + aio_req_fd_w = fds[0]; + aio_req_fd_r = fds[1]; + err = run_helper_thread(not_aio_thread, NULL, + CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0); + if(err < 0) + goto out_close_pipe; + + aio_pid = err; + goto out; + + out_close_pipe: + os_close_file(fds[0]); + os_close_file(fds[1]); + aio_req_fd_w = -1; + aio_req_fd_r = -1; + out: +#ifndef HAVE_AIO_ABI + printk("/usr/include/linux/aio_abi.h not present during build\n"); +#endif + printk("2.6 host AIO support not used - falling back to I/O " + "thread\n"); + return 0; +} + +#ifdef HAVE_AIO_ABI +#define DEFAULT_24_AIO 0 +static int init_aio_26(void) +{ + unsigned long stack; + int err; + + if(io_setup(256, &ctx)){ + printk("aio_thread failed to initialize context, err = %d\n", + errno); + return -errno; + } + + err = run_helper_thread(aio_thread, NULL, + CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0); + if(err < 0) + return -errno; + + aio_pid = err; + + printk("Using 2.6 host AIO\n"); + return 0; +} + +static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, struct aio_context *aio) +{ + struct aio_thread_reply reply; + int err; + + err = do_aio(ctx, type, io_fd, buf, len, offset, aio); + if(err){ + reply = ((struct aio_thread_reply) { .data = aio, + .err = err }); + err = os_write_file(aio->reply_fd, &reply, sizeof(reply)); + if(err != sizeof(reply)) + printk("submit_aio_26 - write failed, " + "fd = %d, err = %d\n", aio->reply_fd, -err); + else err = 0; + } + + return err; +} + +#else +#define DEFAULT_24_AIO 1 +static int init_aio_26(void) +{ + return -ENOSYS; +} + +static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, struct aio_context *aio) +{ + return -ENOSYS; +} +#endif + +static int aio_24 = DEFAULT_24_AIO; + +static int __init set_aio_24(char *name, int *add) +{ + aio_24 = 1; + return 0; +} + +__uml_setup("aio=2.4", set_aio_24, +"aio=2.4\n" +" This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n" +" available. 2.4 AIO is a single thread that handles one request at a\n" +" time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n" +" interface to handle an arbitrary number of pending requests. 2.6 AIO \n" +" is not available in tt mode, on 2.4 hosts, or when UML is built with\n" +" /usr/include/linux/aio_abi.h not available. Many distributions don't\n" +" include aio_abi.h, so you will need to copy it from a kernel tree to\n" +" your /usr/include/linux in order to build an AIO-capable UML\n\n" +); + +static int init_aio(void) +{ + int err; + + CHOOSE_MODE(({ + if(!aio_24){ + printk("Disabling 2.6 AIO in tt mode\n"); + aio_24 = 1; + } }), (void) 0); + + if(!aio_24){ + err = init_aio_26(); + if(err && (errno == ENOSYS)){ + printk("2.6 AIO not supported on the host - " + "reverting to 2.4 AIO\n"); + aio_24 = 1; + } + else return err; + } + + if(aio_24) + return init_aio_24(); + + return 0; +} + +/* The reason for the __initcall/__uml_exitcall asymmetry is that init_aio + * needs to be called when the kernel is running because it calls run_helper, + * which needs get_free_page. exit_aio is a __uml_exitcall because the generic + * kernel does not run __exitcalls on shutdown, and can't because many of them + * break when called outside of module unloading. + */ +__initcall(init_aio); + +static void exit_aio(void) +{ + if(aio_pid != -1) + os_kill_process(aio_pid, 1); +} + +__uml_exitcall(exit_aio); + +static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, struct aio_context *aio) +{ + struct aio_thread_req req = { .type = type, + .io_fd = io_fd, + .offset = offset, + .buf = buf, + .len = len, + .aio = aio, + }; + int err; + + err = os_write_file(aio_req_fd_w, &req, sizeof(req)); + if(err == sizeof(req)) + err = 0; + + return err; +} + +int submit_aio(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, int reply_fd, + struct aio_context *aio) +{ + aio->reply_fd = reply_fd; + if(aio_24) + return submit_aio_24(type, io_fd, buf, len, offset, aio); + else { + return submit_aio_26(type, io_fd, buf, len, offset, aio); + } +} -- cgit v1.1 From 09ace81c1d737bcbb2423db235ac980cac4d5de9 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Sat, 3 Sep 2005 15:57:46 -0700 Subject: [PATCH] uml: add host AIO support to block driver This adds AIO support to the ubd driver. The driver breaks a struct request into IO requests to the host, based on the hardware segments in the request and on any COW blocks covered by the request. The ubd IO thread is gone, since there is now an equivalent thread in the AIO module. There is provision for multiple outstanding requests now. Requests aren't retired until all pieces of it have been completed. The AIO requests have a shared count, which is decremented as IO operations come in until it reaches 0. This can be possibly moved to the request struct - haven't looked at this yet. Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/drivers/Makefile | 2 +- arch/um/drivers/ubd_kern.c | 556 ++++++++++++++++++++------------------------- arch/um/include/aio.h | 18 +- arch/um/os-Linux/aio.c | 206 +++++++++-------- 4 files changed, 373 insertions(+), 409 deletions(-) (limited to 'arch/um') diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile index de17d4c..783e18c 100644 --- a/arch/um/drivers/Makefile +++ b/arch/um/drivers/Makefile @@ -13,7 +13,7 @@ mcast-objs := mcast_kern.o mcast_user.o net-objs := net_kern.o net_user.o mconsole-objs := mconsole_kern.o mconsole_user.o hostaudio-objs := hostaudio_kern.o -ubd-objs := ubd_kern.o ubd_user.o +ubd-objs := ubd_kern.o port-objs := port_kern.o port_user.o harddog-objs := harddog_kern.o harddog_user.o diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index f731343..e77a38d 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -35,6 +35,7 @@ #include "linux/blkpg.h" #include "linux/genhd.h" #include "linux/spinlock.h" +#include "asm/atomic.h" #include "asm/segment.h" #include "asm/uaccess.h" #include "asm/irq.h" @@ -53,20 +54,21 @@ #include "mem.h" #include "mem_kern.h" #include "cow.h" +#include "aio.h" enum ubd_req { UBD_READ, UBD_WRITE }; struct io_thread_req { - enum ubd_req op; + enum aio_type op; int fds[2]; unsigned long offsets[2]; unsigned long long offset; unsigned long length; char *buffer; int sectorsize; - unsigned long sector_mask; - unsigned long long cow_offset; - unsigned long bitmap_words[2]; + int bitmap_offset; + long bitmap_start; + long bitmap_end; int error; }; @@ -80,28 +82,31 @@ extern int create_cow_file(char *cow_file, char *backing_file, unsigned long *bitmap_len_out, int *data_offset_out); extern int read_cow_bitmap(int fd, void *buf, int offset, int len); -extern void do_io(struct io_thread_req *req); +extern void do_io(struct io_thread_req *req, struct request *r, + unsigned long *bitmap); -static inline int ubd_test_bit(__u64 bit, unsigned char *data) +static inline int ubd_test_bit(__u64 bit, void *data) { + unsigned char *buffer = data; __u64 n; int bits, off; - bits = sizeof(data[0]) * 8; + bits = sizeof(buffer[0]) * 8; n = bit / bits; off = bit % bits; - return((data[n] & (1 << off)) != 0); + return((buffer[n] & (1 << off)) != 0); } -static inline void ubd_set_bit(__u64 bit, unsigned char *data) +static inline void ubd_set_bit(__u64 bit, void *data) { + unsigned char *buffer = data; __u64 n; int bits, off; - bits = sizeof(data[0]) * 8; + bits = sizeof(buffer[0]) * 8; n = bit / bits; off = bit % bits; - data[n] |= (1 << off); + buffer[n] |= (1 << off); } /*End stuff from ubd_user.h*/ @@ -110,8 +115,6 @@ static inline void ubd_set_bit(__u64 bit, unsigned char *data) static DEFINE_SPINLOCK(ubd_io_lock); static DEFINE_SPINLOCK(ubd_lock); -static void (*do_ubd)(void); - static int ubd_open(struct inode * inode, struct file * filp); static int ubd_release(struct inode * inode, struct file * file); static int ubd_ioctl(struct inode * inode, struct file * file, @@ -158,6 +161,8 @@ struct cow { int data_offset; }; +#define MAX_SG 64 + struct ubd { char *file; int count; @@ -168,6 +173,7 @@ struct ubd { int no_cow; struct cow cow; struct platform_device pdev; + struct scatterlist sg[MAX_SG]; }; #define DEFAULT_COW { \ @@ -460,80 +466,113 @@ __uml_help(fakehd, ); static void do_ubd_request(request_queue_t * q); - -/* Only changed by ubd_init, which is an initcall. */ -int thread_fd = -1; +static int in_ubd; /* Changed by ubd_handler, which is serialized because interrupts only * happen on CPU 0. */ int intr_count = 0; -/* call ubd_finish if you need to serialize */ -static void __ubd_finish(struct request *req, int error) +static void ubd_end_request(struct request *req, int bytes, int uptodate) { - int nsect; - - if(error){ - end_request(req, 0); - return; + if (!end_that_request_first(req, uptodate, bytes >> 9)) { + add_disk_randomness(req->rq_disk); + end_that_request_last(req); } - nsect = req->current_nr_sectors; - req->sector += nsect; - req->buffer += nsect << 9; - req->errors = 0; - req->nr_sectors -= nsect; - req->current_nr_sectors = 0; - end_request(req, 1); } -static inline void ubd_finish(struct request *req, int error) +/* call ubd_finish if you need to serialize */ +static void __ubd_finish(struct request *req, int bytes) { - spin_lock(&ubd_io_lock); - __ubd_finish(req, error); - spin_unlock(&ubd_io_lock); + if(bytes < 0){ + ubd_end_request(req, 0, 0); + return; + } + + ubd_end_request(req, bytes, 1); } -/* Called without ubd_io_lock held */ -static void ubd_handler(void) +static inline void ubd_finish(struct request *req, int bytes) { - struct io_thread_req req; - struct request *rq = elv_next_request(ubd_queue); - int n; - - do_ubd = NULL; - intr_count++; - n = os_read_file(thread_fd, &req, sizeof(req)); - if(n != sizeof(req)){ - printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, " - "err = %d\n", os_getpid(), -n); - spin_lock(&ubd_io_lock); - end_request(rq, 0); - spin_unlock(&ubd_io_lock); - return; - } - - ubd_finish(rq, req.error); - reactivate_fd(thread_fd, UBD_IRQ); - do_ubd_request(ubd_queue); + spin_lock(&ubd_io_lock); + __ubd_finish(req, bytes); + spin_unlock(&ubd_io_lock); } +struct bitmap_io { + atomic_t count; + struct aio_context aio; +}; + +struct ubd_aio { + struct aio_context aio; + struct request *req; + int len; + struct bitmap_io *bitmap; + void *bitmap_buf; +}; + +static int ubd_reply_fd = -1; + static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused) { - ubd_handler(); - return(IRQ_HANDLED); -} + struct aio_thread_reply reply; + struct ubd_aio *aio; + struct request *req; + int err, n, fd = (int) (long) dev; -/* Only changed by ubd_init, which is an initcall. */ -static int io_pid = -1; + while(1){ + err = os_read_file(fd, &reply, sizeof(reply)); + if(err == -EAGAIN) + break; + if(err < 0){ + printk("ubd_aio_handler - read returned err %d\n", + -err); + break; + } -void kill_io_thread(void) -{ - if(io_pid != -1) - os_kill_process(io_pid, 1); -} + aio = container_of(reply.data, struct ubd_aio, aio); + n = reply.err; + + if(n == 0){ + req = aio->req; + req->nr_sectors -= aio->len >> 9; -__uml_exitcall(kill_io_thread); + if((aio->bitmap != NULL) && + (atomic_dec_and_test(&aio->bitmap->count))){ + aio->aio = aio->bitmap->aio; + aio->len = 0; + kfree(aio->bitmap); + aio->bitmap = NULL; + submit_aio(&aio->aio); + } + else { + if((req->nr_sectors == 0) && + (aio->bitmap == NULL)){ + int len = req->hard_nr_sectors << 9; + ubd_finish(req, len); + } + + if(aio->bitmap_buf != NULL) + kfree(aio->bitmap_buf); + kfree(aio); + } + } + else if(n < 0){ + ubd_finish(aio->req, n); + if(aio->bitmap != NULL) + kfree(aio->bitmap); + if(aio->bitmap_buf != NULL) + kfree(aio->bitmap_buf); + kfree(aio); + } + } + reactivate_fd(fd, UBD_IRQ); + + do_ubd_request(ubd_queue); + + return(IRQ_HANDLED); +} static int ubd_file_size(struct ubd *dev, __u64 *size_out) { @@ -569,7 +608,7 @@ static int ubd_open_dev(struct ubd *dev) &dev->cow.data_offset, create_ptr); if((dev->fd == -ENOENT) && create_cow){ - dev->fd = create_cow_file(dev->file, dev->cow.file, + dev->fd = create_cow_file(dev->file, dev->cow.file, dev->openflags, 1 << 9, PAGE_SIZE, &dev->cow.bitmap_offset, &dev->cow.bitmap_len, @@ -831,6 +870,10 @@ int ubd_init(void) { int i; + ubd_reply_fd = init_aio_irq(UBD_IRQ, "ubd", ubd_intr); + if(ubd_reply_fd < 0) + printk("Setting up ubd AIO failed, err = %d\n", ubd_reply_fd); + devfs_mk_dir("ubd"); if (register_blkdev(MAJOR_NR, "ubd")) return -1; @@ -841,6 +884,7 @@ int ubd_init(void) return -1; } + blk_queue_max_hw_segments(ubd_queue, MAX_SG); if (fake_major != MAJOR_NR) { char name[sizeof("ubd_nnn\0")]; @@ -852,40 +896,12 @@ int ubd_init(void) driver_register(&ubd_driver); for (i = 0; i < MAX_DEV; i++) ubd_add(i); + return 0; } late_initcall(ubd_init); -int ubd_driver_init(void){ - unsigned long stack; - int err; - - /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/ - if(global_openflags.s){ - printk(KERN_INFO "ubd: Synchronous mode\n"); - /* Letting ubd=sync be like using ubd#s= instead of ubd#= is - * enough. So use anyway the io thread. */ - } - stack = alloc_stack(0, 0); - io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *), - &thread_fd); - if(io_pid < 0){ - printk(KERN_ERR - "ubd : Failed to start I/O thread (errno = %d) - " - "falling back to synchronous I/O\n", -io_pid); - io_pid = -1; - return(0); - } - err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, - SA_INTERRUPT, "ubd", ubd_dev); - if(err != 0) - printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err); - return(err); -} - -device_initcall(ubd_driver_init); - static int ubd_open(struct inode *inode, struct file *filp) { struct gendisk *disk = inode->i_bdev->bd_disk; @@ -923,105 +939,55 @@ static int ubd_release(struct inode * inode, struct file * file) return(0); } -static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, - __u64 *cow_offset, unsigned long *bitmap, - __u64 bitmap_offset, unsigned long *bitmap_words, - __u64 bitmap_len) +static void cowify_bitmap(struct io_thread_req *req, unsigned long *bitmap) { - __u64 sector = io_offset >> 9; - int i, update_bitmap = 0; - - for(i = 0; i < length >> 9; i++){ - if(cow_mask != NULL) - ubd_set_bit(i, (unsigned char *) cow_mask); - if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) - continue; - - update_bitmap = 1; - ubd_set_bit(sector + i, (unsigned char *) bitmap); - } - - if(!update_bitmap) - return; - - *cow_offset = sector / (sizeof(unsigned long) * 8); - - /* This takes care of the case where we're exactly at the end of the - * device, and *cow_offset + 1 is off the end. So, just back it up - * by one word. Thanks to Lynn Kerby for the fix and James McMechan - * for the original diagnosis. - */ - if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) / - sizeof(unsigned long) - 1)) - (*cow_offset)--; - - bitmap_words[0] = bitmap[*cow_offset]; - bitmap_words[1] = bitmap[*cow_offset + 1]; - - *cow_offset *= sizeof(unsigned long); - *cow_offset += bitmap_offset; -} + __u64 sector = req->offset / req->sectorsize; + int i; -static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, - __u64 bitmap_offset, __u64 bitmap_len) -{ - __u64 sector = req->offset >> 9; - int i; + for(i = 0; i < req->length / req->sectorsize; i++){ + if(ubd_test_bit(sector + i, bitmap)) + continue; - if(req->length > (sizeof(req->sector_mask) * 8) << 9) - panic("Operation too long"); + if(req->bitmap_start == -1) + req->bitmap_start = sector + i; + req->bitmap_end = sector + i + 1; - if(req->op == UBD_READ) { - for(i = 0; i < req->length >> 9; i++){ - if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) - ubd_set_bit(i, (unsigned char *) - &req->sector_mask); - } - } - else cowify_bitmap(req->offset, req->length, &req->sector_mask, - &req->cow_offset, bitmap, bitmap_offset, - req->bitmap_words, bitmap_len); + ubd_set_bit(sector + i, bitmap); + } } /* Called with ubd_io_lock held */ -static int prepare_request(struct request *req, struct io_thread_req *io_req) +static int prepare_request(struct request *req, struct io_thread_req *io_req, + unsigned long long offset, int page_offset, + int len, struct page *page) { struct gendisk *disk = req->rq_disk; struct ubd *dev = disk->private_data; - __u64 offset; - int len; - - if(req->rq_status == RQ_INACTIVE) return(1); /* This should be impossible now */ if((rq_data_dir(req) == WRITE) && !dev->openflags.w){ printk("Write attempted on readonly ubd device %s\n", disk->disk_name); - end_request(req, 0); + ubd_end_request(req, 0, 0); return(1); } - offset = ((__u64) req->sector) << 9; - len = req->current_nr_sectors << 9; - io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd; io_req->fds[1] = dev->fd; - io_req->cow_offset = -1; io_req->offset = offset; io_req->length = len; io_req->error = 0; - io_req->sector_mask = 0; - - io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE; + io_req->op = (rq_data_dir(req) == READ) ? AIO_READ : AIO_WRITE; io_req->offsets[0] = 0; io_req->offsets[1] = dev->cow.data_offset; - io_req->buffer = req->buffer; + io_req->buffer = page_address(page) + page_offset; io_req->sectorsize = 1 << 9; + io_req->bitmap_offset = dev->cow.bitmap_offset; + io_req->bitmap_start = -1; + io_req->bitmap_end = -1; - if(dev->cow.file != NULL) - cowify_req(io_req, dev->cow.bitmap, dev->cow.bitmap_offset, - dev->cow.bitmap_len); - + if((dev->cow.file != NULL) && (io_req->op == UBD_WRITE)) + cowify_bitmap(io_req, dev->cow.bitmap); return(0); } @@ -1030,30 +996,36 @@ static void do_ubd_request(request_queue_t *q) { struct io_thread_req io_req; struct request *req; - int err, n; - - if(thread_fd == -1){ - while((req = elv_next_request(q)) != NULL){ - err = prepare_request(req, &io_req); - if(!err){ - do_io(&io_req); - __ubd_finish(req, io_req.error); - } - } - } - else { - if(do_ubd || (req = elv_next_request(q)) == NULL) - return; - err = prepare_request(req, &io_req); - if(!err){ - do_ubd = ubd_handler; - n = os_write_file(thread_fd, (char *) &io_req, - sizeof(io_req)); - if(n != sizeof(io_req)) - printk("write to io thread failed, " - "errno = %d\n", -n); + __u64 sector; + int err; + + if(in_ubd) + return; + in_ubd = 1; + while((req = elv_next_request(q)) != NULL){ + struct gendisk *disk = req->rq_disk; + struct ubd *dev = disk->private_data; + int n, i; + + blkdev_dequeue_request(req); + + sector = req->sector; + n = blk_rq_map_sg(q, req, dev->sg); + + for(i = 0; i < n; i++){ + struct scatterlist *sg = &dev->sg[i]; + + err = prepare_request(req, &io_req, sector << 9, + sg->offset, sg->length, + sg->page); + if(err) + continue; + + sector += sg->length >> 9; + do_io(&io_req, req, dev->cow.bitmap); } } + in_ubd = 0; } static int ubd_ioctl(struct inode * inode, struct file * file, @@ -1269,131 +1241,95 @@ int create_cow_file(char *cow_file, char *backing_file, struct openflags flags, return(err); } -static int update_bitmap(struct io_thread_req *req) +void do_io(struct io_thread_req *req, struct request *r, unsigned long *bitmap) { - int n; - - if(req->cow_offset == -1) - return(0); - - n = os_seek_file(req->fds[1], req->cow_offset); - if(n < 0){ - printk("do_io - bitmap lseek failed : err = %d\n", -n); - return(1); - } - - n = os_write_file(req->fds[1], &req->bitmap_words, - sizeof(req->bitmap_words)); - if(n != sizeof(req->bitmap_words)){ - printk("do_io - bitmap update failed, err = %d fd = %d\n", -n, - req->fds[1]); - return(1); - } - - return(0); -} - -void do_io(struct io_thread_req *req) -{ - char *buf; - unsigned long len; - int n, nsectors, start, end, bit; - int err; - __u64 off; - - nsectors = req->length / req->sectorsize; - start = 0; - do { - bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask); - end = start; - while((end < nsectors) && - (ubd_test_bit(end, (unsigned char *) - &req->sector_mask) == bit)) - end++; - - off = req->offset + req->offsets[bit] + - start * req->sectorsize; - len = (end - start) * req->sectorsize; - buf = &req->buffer[start * req->sectorsize]; - - err = os_seek_file(req->fds[bit], off); - if(err < 0){ - printk("do_io - lseek failed : err = %d\n", -err); - req->error = 1; - return; - } - if(req->op == UBD_READ){ - n = 0; - do { - buf = &buf[n]; - len -= n; - n = os_read_file(req->fds[bit], buf, len); - if (n < 0) { - printk("do_io - read failed, err = %d " - "fd = %d\n", -n, req->fds[bit]); - req->error = 1; - return; - } - } while((n < len) && (n != 0)); - if (n < len) memset(&buf[n], 0, len - n); - } else { - n = os_write_file(req->fds[bit], buf, len); - if(n != len){ - printk("do_io - write failed err = %d " - "fd = %d\n", -n, req->fds[bit]); - req->error = 1; - return; - } - } + struct ubd_aio *aio; + struct bitmap_io *bitmap_io = NULL; + char *buf; + void *bitmap_buf = NULL; + unsigned long len, sector; + int nsectors, start, end, bit, err; + __u64 off; + + if(req->bitmap_start != -1){ + /* Round up to the nearest word */ + int round = sizeof(unsigned long); + len = (req->bitmap_end - req->bitmap_start + + round * 8 - 1) / (round * 8); + len *= round; + + off = req->bitmap_start / (8 * round); + off *= round; + + bitmap_io = kmalloc(sizeof(*bitmap_io), GFP_KERNEL); + if(bitmap_io == NULL){ + printk("Failed to kmalloc bitmap IO\n"); + req->error = 1; + return; + } - start = end; - } while(start < nsectors); + bitmap_buf = kmalloc(len, GFP_KERNEL); + if(bitmap_buf == NULL){ + printk("do_io : kmalloc of bitmap chunk " + "failed\n"); + kfree(bitmap_io); + req->error = 1; + return; + } + memcpy(bitmap_buf, &bitmap[off / sizeof(bitmap[0])], len); + + *bitmap_io = ((struct bitmap_io) + { .count = ATOMIC_INIT(0), + .aio = INIT_AIO(AIO_WRITE, req->fds[1], + bitmap_buf, len, + req->bitmap_offset + off, + ubd_reply_fd) } ); + } - req->error = update_bitmap(req); -} + nsectors = req->length / req->sectorsize; + start = 0; + end = nsectors; + bit = 0; + do { + if(bitmap != NULL){ + sector = req->offset / req->sectorsize; + bit = ubd_test_bit(sector + start, bitmap); + end = start; + while((end < nsectors) && + (ubd_test_bit(sector + end, bitmap) == bit)) + end++; + } -/* Changed in start_io_thread, which is serialized by being called only - * from ubd_init, which is an initcall. - */ -int kernel_fd = -1; + off = req->offsets[bit] + req->offset + + start * req->sectorsize; + len = (end - start) * req->sectorsize; + buf = &req->buffer[start * req->sectorsize]; -/* Only changed by the io thread */ -int io_count = 0; + aio = kmalloc(sizeof(*aio), GFP_KERNEL); + if(aio == NULL){ + req->error = 1; + return; + } -int io_thread(void *arg) -{ - struct io_thread_req req; - int n; + *aio = ((struct ubd_aio) + { .aio = INIT_AIO(req->op, req->fds[bit], buf, + len, off, ubd_reply_fd), + .len = len, + .req = r, + .bitmap = bitmap_io, + .bitmap_buf = bitmap_buf }); + + if(aio->bitmap != NULL) + atomic_inc(&aio->bitmap->count); + + err = submit_aio(&aio->aio); + if(err){ + printk("do_io - submit_aio failed, " + "err = %d\n", err); + req->error = 1; + return; + } - ignore_sigwinch_sig(); - while(1){ - n = os_read_file(kernel_fd, &req, sizeof(req)); - if(n != sizeof(req)){ - if(n < 0) - printk("io_thread - read failed, fd = %d, " - "err = %d\n", kernel_fd, -n); - else { - printk("io_thread - short read, fd = %d, " - "length = %d\n", kernel_fd, n); - } - continue; - } - io_count++; - do_io(&req); - n = os_write_file(kernel_fd, &req, sizeof(req)); - if(n != sizeof(req)) - printk("io_thread - write failed, fd = %d, err = %d\n", - kernel_fd, -n); - } + start = end; + } while(start < nsectors); } - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/include/aio.h b/arch/um/include/aio.h index 423bae91..83f1687 100644 --- a/arch/um/include/aio.h +++ b/arch/um/include/aio.h @@ -14,15 +14,27 @@ struct aio_thread_reply { }; struct aio_context { + enum aio_type type; + int fd; + void *data; + int len; + unsigned long long offset; int reply_fd; struct aio_context *next; }; +#define INIT_AIO(aio_type, aio_fd, aio_data, aio_len, aio_offset, \ + aio_reply_fd) \ + { .type = aio_type, \ + .fd = aio_fd, \ + .data = aio_data, \ + .len = aio_len, \ + .offset = aio_offset, \ + .reply_fd = aio_reply_fd } + #define INIT_AIO_CONTEXT { .reply_fd = -1, \ .next = NULL } -extern int submit_aio(enum aio_type type, int fd, char *buf, int len, - unsigned long long offset, int reply_fd, - struct aio_context *aio); +extern int submit_aio(struct aio_context *aio); #endif diff --git a/arch/um/os-Linux/aio.c b/arch/um/os-Linux/aio.c index f2ca299..b04897c 100644 --- a/arch/um/os-Linux/aio.c +++ b/arch/um/os-Linux/aio.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -16,18 +17,31 @@ #include "user.h" #include "mode.h" -struct aio_thread_req { - enum aio_type type; - int io_fd; - unsigned long long offset; - char *buf; - int len; - struct aio_context *aio; -}; - static int aio_req_fd_r = -1; static int aio_req_fd_w = -1; +static int update_aio(struct aio_context *aio, int res) +{ + if(res < 0) + aio->len = res; + else if((res == 0) && (aio->type == AIO_READ)){ + /* This is the EOF case - we have hit the end of the file + * and it ends in a partial block, so we fill the end of + * the block with zeros and claim success. + */ + memset(aio->data, 0, aio->len); + aio->len = 0; + } + else if(res > 0){ + aio->len -= res; + aio->data += res; + aio->offset += res; + return aio->len; + } + + return 0; +} + #if defined(HAVE_AIO_ABI) #include @@ -66,8 +80,7 @@ static long io_getevents(aio_context_t ctx_id, long min_nr, long nr, * that it now backs the mmapped area. */ -static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf, - int len, unsigned long long offset, struct aio_context *aio) +static int do_aio(aio_context_t ctx, struct aio_context *aio) { struct iocb iocb, *iocbp = &iocb; char c; @@ -75,37 +88,37 @@ static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf, iocb = ((struct iocb) { .aio_data = (unsigned long) aio, .aio_reqprio = 0, - .aio_fildes = fd, - .aio_buf = (unsigned long) buf, - .aio_nbytes = len, - .aio_offset = offset, + .aio_fildes = aio->fd, + .aio_buf = (unsigned long) aio->data, + .aio_nbytes = aio->len, + .aio_offset = aio->offset, .aio_reserved1 = 0, .aio_reserved2 = 0, .aio_reserved3 = 0 }); - switch(type){ + switch(aio->type){ case AIO_READ: iocb.aio_lio_opcode = IOCB_CMD_PREAD; - err = io_submit(ctx, 1, &iocbp); break; case AIO_WRITE: iocb.aio_lio_opcode = IOCB_CMD_PWRITE; - err = io_submit(ctx, 1, &iocbp); break; case AIO_MMAP: iocb.aio_lio_opcode = IOCB_CMD_PREAD; iocb.aio_buf = (unsigned long) &c; iocb.aio_nbytes = sizeof(c); - err = io_submit(ctx, 1, &iocbp); break; default: - printk("Bogus op in do_aio - %d\n", type); + printk("Bogus op in do_aio - %d\n", aio->type); err = -EINVAL; - break; + goto out; } + + err = io_submit(ctx, 1, &iocbp); if(err > 0) err = 0; + out: return err; } @@ -114,8 +127,9 @@ static aio_context_t ctx = 0; static int aio_thread(void *arg) { struct aio_thread_reply reply; + struct aio_context *aio; struct io_event event; - int err, n, reply_fd; + int err, n; signal(SIGWINCH, SIG_IGN); @@ -128,15 +142,21 @@ static int aio_thread(void *arg) "errno = %d\n", errno); } else { + aio = (struct aio_context *) event.data; + if(update_aio(aio, event.res)){ + do_aio(ctx, aio); + continue; + } + reply = ((struct aio_thread_reply) - { .data = (void *) (long) event.data, - .err = event.res }); - reply_fd = ((struct aio_context *) reply.data)->reply_fd; - err = os_write_file(reply_fd, &reply, sizeof(reply)); + { .data = aio, + .err = aio->len }); + err = os_write_file(aio->reply_fd, &reply, + sizeof(reply)); if(err != sizeof(reply)) - printk("not_aio_thread - write failed, " - "fd = %d, err = %d\n", - aio_req_fd_r, -err); + printk("aio_thread - write failed, " + "fd = %d, err = %d\n", aio->reply_fd, + -err); } } return 0; @@ -144,35 +164,35 @@ static int aio_thread(void *arg) #endif -static int do_not_aio(struct aio_thread_req *req) +static int do_not_aio(struct aio_context *aio) { char c; int err; - switch(req->type){ + switch(aio->type){ case AIO_READ: - err = os_seek_file(req->io_fd, req->offset); + err = os_seek_file(aio->fd, aio->offset); if(err) goto out; - err = os_read_file(req->io_fd, req->buf, req->len); + err = os_read_file(aio->fd, aio->data, aio->len); break; case AIO_WRITE: - err = os_seek_file(req->io_fd, req->offset); + err = os_seek_file(aio->fd, aio->offset); if(err) goto out; - err = os_write_file(req->io_fd, req->buf, req->len); + err = os_write_file(aio->fd, aio->data, aio->len); break; case AIO_MMAP: - err = os_seek_file(req->io_fd, req->offset); + err = os_seek_file(aio->fd, aio->offset); if(err) goto out; - err = os_read_file(req->io_fd, &c, sizeof(c)); + err = os_read_file(aio->fd, &c, sizeof(c)); break; default: - printk("do_not_aio - bad request type : %d\n", req->type); + printk("do_not_aio - bad request type : %d\n", aio->type); err = -EINVAL; break; } @@ -183,14 +203,14 @@ static int do_not_aio(struct aio_thread_req *req) static int not_aio_thread(void *arg) { - struct aio_thread_req req; + struct aio_context *aio; struct aio_thread_reply reply; int err; signal(SIGWINCH, SIG_IGN); while(1){ - err = os_read_file(aio_req_fd_r, &req, sizeof(req)); - if(err != sizeof(req)){ + err = os_read_file(aio_req_fd_r, &aio, sizeof(aio)); + if(err != sizeof(aio)){ if(err < 0) printk("not_aio_thread - read failed, " "fd = %d, err = %d\n", aio_req_fd_r, @@ -201,17 +221,34 @@ static int not_aio_thread(void *arg) } continue; } - err = do_not_aio(&req); - reply = ((struct aio_thread_reply) { .data = req.aio, - .err = err }); - err = os_write_file(req.aio->reply_fd, &reply, sizeof(reply)); + again: + err = do_not_aio(aio); + + if(update_aio(aio, err)) + goto again; + + reply = ((struct aio_thread_reply) { .data = aio, + .err = aio->len }); + err = os_write_file(aio->reply_fd, &reply, sizeof(reply)); if(err != sizeof(reply)) printk("not_aio_thread - write failed, fd = %d, " "err = %d\n", aio_req_fd_r, -err); } } +static int submit_aio_24(struct aio_context *aio) +{ + int err; + + err = os_write_file(aio_req_fd_w, &aio, sizeof(aio)); + if(err == sizeof(aio)) + err = 0; + + return err; +} + static int aio_pid = -1; +static int (*submit_proc)(struct aio_context *aio); static int init_aio_24(void) { @@ -243,11 +280,33 @@ static int init_aio_24(void) #endif printk("2.6 host AIO support not used - falling back to I/O " "thread\n"); + + submit_proc = submit_aio_24; + return 0; } #ifdef HAVE_AIO_ABI #define DEFAULT_24_AIO 0 +static int submit_aio_26(struct aio_context *aio) +{ + struct aio_thread_reply reply; + int err; + + err = do_aio(ctx, aio); + if(err){ + reply = ((struct aio_thread_reply) { .data = aio, + .err = err }); + err = os_write_file(aio->reply_fd, &reply, sizeof(reply)); + if(err != sizeof(reply)) + printk("submit_aio_26 - write failed, " + "fd = %d, err = %d\n", aio->reply_fd, -err); + else err = 0; + } + + return err; +} + static int init_aio_26(void) { unsigned long stack; @@ -267,39 +326,22 @@ static int init_aio_26(void) aio_pid = err; printk("Using 2.6 host AIO\n"); - return 0; -} - -static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len, - unsigned long long offset, struct aio_context *aio) -{ - struct aio_thread_reply reply; - int err; - err = do_aio(ctx, type, io_fd, buf, len, offset, aio); - if(err){ - reply = ((struct aio_thread_reply) { .data = aio, - .err = err }); - err = os_write_file(aio->reply_fd, &reply, sizeof(reply)); - if(err != sizeof(reply)) - printk("submit_aio_26 - write failed, " - "fd = %d, err = %d\n", aio->reply_fd, -err); - else err = 0; - } + submit_proc = submit_aio_26; - return err; + return 0; } #else #define DEFAULT_24_AIO 1 -static int init_aio_26(void) +static int submit_aio_26(struct aio_context *aio) { return -ENOSYS; } -static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len, - unsigned long long offset, struct aio_context *aio) +static int init_aio_26(void) { + submit_proc = submit_aio_26; return -ENOSYS; } #endif @@ -366,33 +408,7 @@ static void exit_aio(void) __uml_exitcall(exit_aio); -static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len, - unsigned long long offset, struct aio_context *aio) +int submit_aio(struct aio_context *aio) { - struct aio_thread_req req = { .type = type, - .io_fd = io_fd, - .offset = offset, - .buf = buf, - .len = len, - .aio = aio, - }; - int err; - - err = os_write_file(aio_req_fd_w, &req, sizeof(req)); - if(err == sizeof(req)) - err = 0; - - return err; -} - -int submit_aio(enum aio_type type, int io_fd, char *buf, int len, - unsigned long long offset, int reply_fd, - struct aio_context *aio) -{ - aio->reply_fd = reply_fd; - if(aio_24) - return submit_aio_24(type, io_fd, buf, len, offset, aio); - else { - return submit_aio_26(type, io_fd, buf, len, offset, aio); - } + return (*submit_proc)(aio); } -- cgit v1.1 From 60d339f6fe0831060600c62418b71a62ad26c281 Mon Sep 17 00:00:00 2001 From: Gennady Sharapov Date: Sat, 3 Sep 2005 15:57:47 -0700 Subject: [PATCH] uml: move libc-dependent startup and signal code The serial UML OS-abstraction layer patch (um/kernel dir). This moves all systemcalls from process.c file under os-Linux dir and join process.c and process_kern.c files. Signed-off-by: Gennady Sharapov Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/include/os.h | 15 ++ arch/um/include/user_util.h | 8 - arch/um/kernel/Makefile | 13 +- arch/um/kernel/process.c | 439 -------------------------------------------- arch/um/kernel/um_arch.c | 2 +- arch/um/os-Linux/Makefile | 7 +- arch/um/os-Linux/process.c | 58 +++++- arch/um/os-Linux/start_up.c | 329 +++++++++++++++++++++++++++++++++ arch/um/os-Linux/tt.c | 113 ++++++++++++ 9 files changed, 523 insertions(+), 461 deletions(-) delete mode 100644 arch/um/kernel/process.c create mode 100644 arch/um/os-Linux/start_up.c create mode 100644 arch/um/os-Linux/tt.c (limited to 'arch/um') diff --git a/arch/um/include/os.h b/arch/um/include/os.h index 881d298..4c36245 100644 --- a/arch/um/include/os.h +++ b/arch/um/include/os.h @@ -153,6 +153,11 @@ extern int os_file_type(char *file); extern int os_file_mode(char *file, struct openflags *mode_out); extern int os_lock_file(int fd, int excl); +/* start_up.c */ +extern void os_early_checks(void); +extern int can_do_skas(void); + +/* process.c */ extern unsigned long os_process_pc(int pid); extern int os_process_parent(int pid); extern void os_stop_process(int pid); @@ -161,6 +166,9 @@ extern void os_kill_ptraced_process(int pid, int reap_child); extern void os_usr1_process(int pid); extern int os_getpid(void); extern int os_getpgrp(void); +extern void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int)); +extern void init_new_thread_signals(int altstack); +extern int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr); extern int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long len, int r, int w, int x); @@ -170,6 +178,13 @@ extern int os_unmap_memory(void *addr, int len); extern void os_flush_stdout(void); extern unsigned long long os_usecs(void); +/* tt.c + * for tt mode only (will be deleted in future...) + */ +extern void forward_pending_sigio(int target); +extern int start_fork_tramp(void *arg, unsigned long temp_stack, + int clone_flags, int (*tramp)(void *)); + #endif /* diff --git a/arch/um/include/user_util.h b/arch/um/include/user_util.h index 7b6a24d..bb505e0 100644 --- a/arch/um/include/user_util.h +++ b/arch/um/include/user_util.h @@ -54,8 +54,6 @@ extern void stack_protections(unsigned long address); extern void task_protections(unsigned long address); extern int wait_for_stop(int pid, int sig, int cont_type, void *relay); extern void *add_signal_handler(int sig, void (*handler)(int)); -extern int start_fork_tramp(void *arg, unsigned long temp_stack, - int clone_flags, int (*tramp)(void *)); extern int linux_main(int argc, char **argv); extern void set_cmdline(char *cmd); extern void input_cb(void (*proc)(void *), void *arg, int arg_len); @@ -64,8 +62,6 @@ extern void *um_kmalloc(int size); extern int switcheroo(int fd, int prot, void *from, void *to, int size); extern void setup_machinename(char *machine_out); extern void setup_hostinfo(void); -extern void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int)); -extern void init_new_thread_signals(int altstack); extern void do_exec(int old_pid, int new_pid); extern void tracer_panic(char *msg, ...); extern char *get_umid(int only_if_set); @@ -74,16 +70,12 @@ extern int detach(int pid, int sig); extern int attach(int pid); extern void kill_child_dead(int pid); extern int cont(int pid); -extern void check_ptrace(void); extern void check_sigio(void); -extern int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr); extern void write_sigio_workaround(void); extern void arch_check_bugs(void); extern int cpu_feature(char *what, char *buf, int len); extern int arch_handle_signal(int sig, union uml_pt_regs *regs); extern int arch_fixup(unsigned long address, void *sc_ptr); -extern void forward_pending_sigio(int target); -extern int can_do_skas(void); extern void arch_init_thread(void); extern int setjmp_wrapper(void (*proc)(void *, void *), ...); extern int raw(int fd); diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index a9cdd00..614b8eb 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -8,11 +8,10 @@ clean-files := obj-y = config.o exec_kern.o exitcode.o \ helper.o init_task.o irq.o irq_user.o ksyms.o main.o mem.o mem_user.o \ - physmem.o process.o process_kern.o ptrace.o reboot.o resource.o \ - sigio_user.o sigio_kern.o signal_kern.o signal_user.o smp.o \ - syscall_kern.o sysrq.o tempfile.o time.o time_kern.o \ - tlb.o trap_kern.o trap_user.o uaccess_user.o um_arch.o umid.o \ - user_util.o + physmem.o process_kern.o ptrace.o reboot.o resource.o sigio_user.o \ + sigio_kern.o signal_kern.o signal_user.o smp.o syscall_kern.o sysrq.o \ + tempfile.o time.o time_kern.o tlb.o trap_kern.o trap_user.o \ + uaccess_user.o um_arch.o umid.o user_util.o obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o obj-$(CONFIG_GPROF) += gprof_syms.o @@ -25,8 +24,8 @@ obj-$(CONFIG_MODE_SKAS) += skas/ user-objs-$(CONFIG_TTY_LOG) += tty_log.o -USER_OBJS := $(user-objs-y) config.o helper.o main.o process.o tempfile.o \ - time.o tty_log.o umid.o user_util.o +USER_OBJS := $(user-objs-y) config.o helper.o main.o tempfile.o time.o \ + tty_log.o umid.o user_util.o include arch/um/scripts/Makefile.rules diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c deleted file mode 100644 index 67acd92..0000000 --- a/arch/um/kernel/process.c +++ /dev/null @@ -1,439 +0,0 @@ -/* - * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "user_util.h" -#include "kern_util.h" -#include "user.h" -#include "process.h" -#include "signal_kern.h" -#include "signal_user.h" -#include "sysdep/ptrace.h" -#include "sysdep/sigcontext.h" -#include "irq_user.h" -#include "ptrace_user.h" -#include "time_user.h" -#include "init.h" -#include "os.h" -#include "uml-config.h" -#include "choose-mode.h" -#include "mode.h" -#include "tempfile.h" -#ifdef UML_CONFIG_MODE_SKAS -#include "skas.h" -#include "skas_ptrace.h" -#include "registers.h" -#endif - -void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int)) -{ - int flags = 0, pages; - - if(sig_stack != NULL){ - pages = (1 << UML_CONFIG_KERNEL_STACK_ORDER); - set_sigstack(sig_stack, pages * page_size()); - flags = SA_ONSTACK; - } - if(usr1_handler) set_handler(SIGUSR1, usr1_handler, flags, -1); -} - -void init_new_thread_signals(int altstack) -{ - int flags = altstack ? SA_ONSTACK : 0; - - set_handler(SIGSEGV, (__sighandler_t) sig_handler, flags, - SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - set_handler(SIGTRAP, (__sighandler_t) sig_handler, flags, - SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - set_handler(SIGFPE, (__sighandler_t) sig_handler, flags, - SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - set_handler(SIGILL, (__sighandler_t) sig_handler, flags, - SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - set_handler(SIGBUS, (__sighandler_t) sig_handler, flags, - SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - set_handler(SIGUSR2, (__sighandler_t) sig_handler, - flags, SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - signal(SIGHUP, SIG_IGN); - - init_irq_signals(altstack); -} - -struct tramp { - int (*tramp)(void *); - void *tramp_data; - unsigned long temp_stack; - int flags; - int pid; -}; - -/* See above for why sigkill is here */ - -int sigkill = SIGKILL; - -int outer_tramp(void *arg) -{ - struct tramp *t; - int sig = sigkill; - - t = arg; - t->pid = clone(t->tramp, (void *) t->temp_stack + page_size()/2, - t->flags, t->tramp_data); - if(t->pid > 0) wait_for_stop(t->pid, SIGSTOP, PTRACE_CONT, NULL); - kill(os_getpid(), sig); - _exit(0); -} - -int start_fork_tramp(void *thread_arg, unsigned long temp_stack, - int clone_flags, int (*tramp)(void *)) -{ - struct tramp arg; - unsigned long sp; - int new_pid, status, err; - - /* The trampoline will run on the temporary stack */ - sp = stack_sp(temp_stack); - - clone_flags |= CLONE_FILES | SIGCHLD; - - arg.tramp = tramp; - arg.tramp_data = thread_arg; - arg.temp_stack = temp_stack; - arg.flags = clone_flags; - - /* Start the process and wait for it to kill itself */ - new_pid = clone(outer_tramp, (void *) sp, clone_flags, &arg); - if(new_pid < 0) - return(new_pid); - - CATCH_EINTR(err = waitpid(new_pid, &status, 0)); - if(err < 0) - panic("Waiting for outer trampoline failed - errno = %d", - errno); - - if(!WIFSIGNALED(status) || (WTERMSIG(status) != SIGKILL)) - panic("outer trampoline didn't exit with SIGKILL, " - "status = %d", status); - - return(arg.pid); -} - -static int ptrace_child(void *arg) -{ - int ret; - int pid = os_getpid(), ppid = getppid(); - int sc_result; - - if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){ - perror("ptrace"); - os_kill_process(pid, 0); - } - os_stop_process(pid); - - /*This syscall will be intercepted by the parent. Don't call more than - * once, please.*/ - sc_result = os_getpid(); - - if (sc_result == pid) - ret = 1; /*Nothing modified by the parent, we are running - normally.*/ - else if (sc_result == ppid) - ret = 0; /*Expected in check_ptrace and check_sysemu when they - succeed in modifying the stack frame*/ - else - ret = 2; /*Serious trouble! This could be caused by a bug in - host 2.6 SKAS3/2.6 patch before release -V6, together - with a bug in the UML code itself.*/ - _exit(ret); -} - -static int start_ptraced_child(void **stack_out) -{ - void *stack; - unsigned long sp; - int pid, n, status; - - stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if(stack == MAP_FAILED) - panic("check_ptrace : mmap failed, errno = %d", errno); - sp = (unsigned long) stack + PAGE_SIZE - sizeof(void *); - pid = clone(ptrace_child, (void *) sp, SIGCHLD, NULL); - if(pid < 0) - panic("check_ptrace : clone failed, errno = %d", errno); - CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); - if(n < 0) - panic("check_ptrace : wait failed, errno = %d", errno); - if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) - panic("check_ptrace : expected SIGSTOP, got status = %d", - status); - - *stack_out = stack; - return(pid); -} - -/* When testing for SYSEMU support, if it is one of the broken versions, we must - * just avoid using sysemu, not panic, but only if SYSEMU features are broken. - * So only for SYSEMU features we test mustpanic, while normal host features - * must work anyway!*/ -static int stop_ptraced_child(int pid, void *stack, int exitcode, int mustpanic) -{ - int status, n, ret = 0; - - if(ptrace(PTRACE_CONT, pid, 0, 0) < 0) - panic("check_ptrace : ptrace failed, errno = %d", errno); - CATCH_EINTR(n = waitpid(pid, &status, 0)); - if(!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) { - int exit_with = WEXITSTATUS(status); - if (exit_with == 2) - printk("check_ptrace : child exited with status 2. " - "Serious trouble happening! Try updating your " - "host skas patch!\nDisabling SYSEMU support."); - printk("check_ptrace : child exited with exitcode %d, while " - "expecting %d; status 0x%x", exit_with, - exitcode, status); - if (mustpanic) - panic("\n"); - else - printk("\n"); - ret = -1; - } - - if(munmap(stack, PAGE_SIZE) < 0) - panic("check_ptrace : munmap failed, errno = %d", errno); - return ret; -} - -static int force_sysemu_disabled = 0; - -int ptrace_faultinfo = 1; -int proc_mm = 1; - -static int __init skas0_cmd_param(char *str, int* add) -{ - ptrace_faultinfo = proc_mm = 0; - return 0; -} - -static int __init nosysemu_cmd_param(char *str, int* add) -{ - force_sysemu_disabled = 1; - return 0; -} - -__uml_setup("skas0", skas0_cmd_param, - "skas0\n" - " Disables SKAS3 usage, so that SKAS0 is used, unless you \n" - " specify mode=tt.\n\n"); - -__uml_setup("nosysemu", nosysemu_cmd_param, - "nosysemu\n" - " Turns off syscall emulation patch for ptrace (SYSEMU) on.\n" - " SYSEMU is a performance-patch introduced by Laurent Vivier. It changes\n" - " behaviour of ptrace() and helps reducing host context switch rate.\n" - " To make it working, you need a kernel patch for your host, too.\n" - " See http://perso.wanadoo.fr/laurent.vivier/UML/ for further information.\n\n"); - -static void __init check_sysemu(void) -{ - void *stack; - int pid, syscall, n, status, count=0; - - printk("Checking syscall emulation patch for ptrace..."); - sysemu_supported = 0; - pid = start_ptraced_child(&stack); - - if(ptrace(PTRACE_SYSEMU, pid, 0, 0) < 0) - goto fail; - - CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); - if (n < 0) - panic("check_sysemu : wait failed, errno = %d", errno); - if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP)) - panic("check_sysemu : expected SIGTRAP, " - "got status = %d", status); - - n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET, - os_getpid()); - if(n < 0) - panic("check_sysemu : failed to modify system " - "call return, errno = %d", errno); - - if (stop_ptraced_child(pid, stack, 0, 0) < 0) - goto fail_stopped; - - sysemu_supported = 1; - printk("OK\n"); - set_using_sysemu(!force_sysemu_disabled); - - printk("Checking advanced syscall emulation patch for ptrace..."); - pid = start_ptraced_child(&stack); - while(1){ - count++; - if(ptrace(PTRACE_SYSEMU_SINGLESTEP, pid, 0, 0) < 0) - goto fail; - CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); - if(n < 0) - panic("check_ptrace : wait failed, errno = %d", errno); - if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP)) - panic("check_ptrace : expected (SIGTRAP|SYSCALL_TRAP), " - "got status = %d", status); - - syscall = ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_NR_OFFSET, - 0); - if(syscall == __NR_getpid){ - if (!count) - panic("check_ptrace : SYSEMU_SINGLESTEP doesn't singlestep"); - n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET, - os_getpid()); - if(n < 0) - panic("check_sysemu : failed to modify system " - "call return, errno = %d", errno); - break; - } - } - if (stop_ptraced_child(pid, stack, 0, 0) < 0) - goto fail_stopped; - - sysemu_supported = 2; - printk("OK\n"); - - if ( !force_sysemu_disabled ) - set_using_sysemu(sysemu_supported); - return; - -fail: - stop_ptraced_child(pid, stack, 1, 0); -fail_stopped: - printk("missing\n"); -} - -void __init check_ptrace(void) -{ - void *stack; - int pid, syscall, n, status; - - printk("Checking that ptrace can change system call numbers..."); - pid = start_ptraced_child(&stack); - - if (ptrace(PTRACE_OLDSETOPTIONS, pid, 0, (void *)PTRACE_O_TRACESYSGOOD) < 0) - panic("check_ptrace: PTRACE_SETOPTIONS failed, errno = %d", errno); - - while(1){ - if(ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) - panic("check_ptrace : ptrace failed, errno = %d", - errno); - CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); - if(n < 0) - panic("check_ptrace : wait failed, errno = %d", errno); - if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP + 0x80)) - panic("check_ptrace : expected SIGTRAP + 0x80, " - "got status = %d", status); - - syscall = ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_NR_OFFSET, - 0); - if(syscall == __NR_getpid){ - n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET, - __NR_getppid); - if(n < 0) - panic("check_ptrace : failed to modify system " - "call, errno = %d", errno); - break; - } - } - stop_ptraced_child(pid, stack, 0, 1); - printk("OK\n"); - check_sysemu(); -} - -int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr) -{ - sigjmp_buf buf; - int n; - - *jmp_ptr = &buf; - n = sigsetjmp(buf, 1); - if(n != 0) - return(n); - (*fn)(arg); - return(0); -} - -void forward_pending_sigio(int target) -{ - sigset_t sigs; - - if(sigpending(&sigs)) - panic("forward_pending_sigio : sigpending failed"); - if(sigismember(&sigs, SIGIO)) - kill(target, SIGIO); -} - -extern void *__syscall_stub_start, __syscall_stub_end; - -#ifdef UML_CONFIG_MODE_SKAS - -static inline void check_skas3_ptrace_support(void) -{ - struct ptrace_faultinfo fi; - void *stack; - int pid, n; - - printf("Checking for the skas3 patch in the host..."); - pid = start_ptraced_child(&stack); - - n = ptrace(PTRACE_FAULTINFO, pid, 0, &fi); - if (n < 0) { - ptrace_faultinfo = 0; - if(errno == EIO) - printf("not found\n"); - else { - perror("not found"); - } - } - else { - if (!ptrace_faultinfo) - printf("found but disabled on command line\n"); - else - printf("found\n"); - } - - init_registers(pid); - stop_ptraced_child(pid, stack, 1, 1); -} - -int can_do_skas(void) -{ - printf("Checking for /proc/mm..."); - if (os_access("/proc/mm", OS_ACC_W_OK) < 0) { - proc_mm = 0; - printf("not found\n"); - } else { - if (!proc_mm) - printf("found but disabled on command line\n"); - else - printf("found\n"); - } - - check_skas3_ptrace_support(); - return 1; -} -#else -int can_do_skas(void) -{ - return(0); -} -#endif diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index b781b6f..09f6f7c 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -333,6 +333,7 @@ int linux_main(int argc, char **argv) if(have_root == 0) add_arg(DEFAULT_COMMAND_LINE); + os_early_checks(); mode_tt = force_tt ? 1 : !can_do_skas(); #ifndef CONFIG_MODE_TT if (mode_tt) { @@ -470,7 +471,6 @@ void __init setup_arch(char **cmdline_p) void __init check_bugs(void) { arch_check_bugs(); - check_ptrace(); check_sigio(); check_devanon(); } diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile index 351d969..d3c1560 100644 --- a/arch/um/os-Linux/Makefile +++ b/arch/um/os-Linux/Makefile @@ -3,10 +3,11 @@ # Licensed under the GPL # -obj-y = aio.o elf_aux.o file.o process.o signal.o time.o tty.o user_syms.o \ - drivers/ sys-$(SUBARCH)/ +obj-y = aio.o elf_aux.o file.o process.o signal.o start_up.o time.o tt.o \ + tty.o user_syms.o drivers/ sys-$(SUBARCH)/ -USER_OBJS := aio.o elf_aux.o file.o process.o signal.o time.o tty.o +USER_OBJS := aio.o elf_aux.o file.o process.o signal.o start_up.o time.o tt.o \ + tty.o CFLAGS_user_syms.o += -DSUBARCH_$(SUBARCH) diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c index 1e126bf..d32413e 100644 --- a/arch/um/os-Linux/process.c +++ b/arch/um/os-Linux/process.c @@ -3,10 +3,10 @@ * Licensed under the GPL */ -#include #include #include #include +#include #include #include #include @@ -14,6 +14,10 @@ #include "os.h" #include "user.h" #include "user_util.h" +#include "signal_user.h" +#include "process.h" +#include "irq_user.h" +#include "kern_util.h" #define ARBITRARY_ADDR -1 #define FAILURE_PID -1 @@ -114,8 +118,10 @@ void os_usr1_process(int pid) kill(pid, SIGUSR1); } -/*Don't use the glibc version, which caches the result in TLS. It misses some - * syscalls, and also breaks with clone(), which does not unshare the TLS.*/ +/* Don't use the glibc version, which caches the result in TLS. It misses some + * syscalls, and also breaks with clone(), which does not unshare the TLS. + */ + inline _syscall0(pid_t, getpid) int os_getpid(void) @@ -164,6 +170,52 @@ int os_unmap_memory(void *addr, int len) return(0); } +void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int)) +{ + int flags = 0, pages; + + if(sig_stack != NULL){ + pages = (1 << UML_CONFIG_KERNEL_STACK_ORDER); + set_sigstack(sig_stack, pages * page_size()); + flags = SA_ONSTACK; + } + if(usr1_handler) set_handler(SIGUSR1, usr1_handler, flags, -1); +} + +void init_new_thread_signals(int altstack) +{ + int flags = altstack ? SA_ONSTACK : 0; + + set_handler(SIGSEGV, (__sighandler_t) sig_handler, flags, + SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); + set_handler(SIGTRAP, (__sighandler_t) sig_handler, flags, + SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); + set_handler(SIGFPE, (__sighandler_t) sig_handler, flags, + SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); + set_handler(SIGILL, (__sighandler_t) sig_handler, flags, + SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); + set_handler(SIGBUS, (__sighandler_t) sig_handler, flags, + SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); + set_handler(SIGUSR2, (__sighandler_t) sig_handler, + flags, SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); + signal(SIGHUP, SIG_IGN); + + init_irq_signals(altstack); +} + +int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr) +{ + sigjmp_buf buf; + int n; + + *jmp_ptr = &buf; + n = sigsetjmp(buf, 1); + if(n != 0) + return(n); + (*fn)(arg); + return(0); +} + /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c new file mode 100644 index 0000000..a3eab25 --- /dev/null +++ b/arch/um/os-Linux/start_up.c @@ -0,0 +1,329 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "user_util.h" +#include "kern_util.h" +#include "user.h" +#include "signal_kern.h" +#include "signal_user.h" +#include "sysdep/ptrace.h" +#include "sysdep/sigcontext.h" +#include "irq_user.h" +#include "ptrace_user.h" +#include "time_user.h" +#include "init.h" +#include "os.h" +#include "uml-config.h" +#include "choose-mode.h" +#include "mode.h" +#include "tempfile.h" +#ifdef UML_CONFIG_MODE_SKAS +#include "skas.h" +#include "skas_ptrace.h" +#include "registers.h" +#endif + +static int ptrace_child(void *arg) +{ + int ret; + int pid = os_getpid(), ppid = getppid(); + int sc_result; + + if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){ + perror("ptrace"); + os_kill_process(pid, 0); + } + os_stop_process(pid); + + /*This syscall will be intercepted by the parent. Don't call more than + * once, please.*/ + sc_result = os_getpid(); + + if (sc_result == pid) + ret = 1; /*Nothing modified by the parent, we are running + normally.*/ + else if (sc_result == ppid) + ret = 0; /*Expected in check_ptrace and check_sysemu when they + succeed in modifying the stack frame*/ + else + ret = 2; /*Serious trouble! This could be caused by a bug in + host 2.6 SKAS3/2.6 patch before release -V6, together + with a bug in the UML code itself.*/ + _exit(ret); +} + +static int start_ptraced_child(void **stack_out) +{ + void *stack; + unsigned long sp; + int pid, n, status; + + stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if(stack == MAP_FAILED) + panic("check_ptrace : mmap failed, errno = %d", errno); + sp = (unsigned long) stack + PAGE_SIZE - sizeof(void *); + pid = clone(ptrace_child, (void *) sp, SIGCHLD, NULL); + if(pid < 0) + panic("start_ptraced_child : clone failed, errno = %d", errno); + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); + if(n < 0) + panic("check_ptrace : clone failed, errno = %d", errno); + if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) + panic("check_ptrace : expected SIGSTOP, got status = %d", + status); + + *stack_out = stack; + return(pid); +} + +/* When testing for SYSEMU support, if it is one of the broken versions, we + * must just avoid using sysemu, not panic, but only if SYSEMU features are + * broken. + * So only for SYSEMU features we test mustpanic, while normal host features + * must work anyway! + */ +static int stop_ptraced_child(int pid, void *stack, int exitcode, + int mustpanic) +{ + int status, n, ret = 0; + + if(ptrace(PTRACE_CONT, pid, 0, 0) < 0) + panic("check_ptrace : ptrace failed, errno = %d", errno); + CATCH_EINTR(n = waitpid(pid, &status, 0)); + if(!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) { + int exit_with = WEXITSTATUS(status); + if (exit_with == 2) + printk("check_ptrace : child exited with status 2. " + "Serious trouble happening! Try updating your " + "host skas patch!\nDisabling SYSEMU support."); + printk("check_ptrace : child exited with exitcode %d, while " + "expecting %d; status 0x%x", exit_with, + exitcode, status); + if (mustpanic) + panic("\n"); + else + printk("\n"); + ret = -1; + } + + if(munmap(stack, PAGE_SIZE) < 0) + panic("check_ptrace : munmap failed, errno = %d", errno); + return ret; +} + +int ptrace_faultinfo = 1; +int proc_mm = 1; + +static int __init skas0_cmd_param(char *str, int* add) +{ + ptrace_faultinfo = proc_mm = 0; + return 0; +} + +__uml_setup("skas0", skas0_cmd_param, + "skas0\n" + " Disables SKAS3 usage, so that SKAS0 is used, unless \n" + " you specify mode=tt.\n\n"); + +static int force_sysemu_disabled = 0; + +static int __init nosysemu_cmd_param(char *str, int* add) +{ + force_sysemu_disabled = 1; + return 0; +} + +__uml_setup("nosysemu", nosysemu_cmd_param, +"nosysemu\n" +" Turns off syscall emulation patch for ptrace (SYSEMU) on.\n" +" SYSEMU is a performance-patch introduced by Laurent Vivier. It changes\n" +" behaviour of ptrace() and helps reducing host context switch rate.\n" +" To make it working, you need a kernel patch for your host, too.\n" +" See http://perso.wanadoo.fr/laurent.vivier/UML/ for further \n" +" information.\n\n"); + +static void __init check_sysemu(void) +{ + void *stack; + int pid, syscall, n, status, count=0; + + printk("Checking syscall emulation patch for ptrace..."); + sysemu_supported = 0; + pid = start_ptraced_child(&stack); + + if(ptrace(PTRACE_SYSEMU, pid, 0, 0) < 0) + goto fail; + + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); + if (n < 0) + panic("check_sysemu : wait failed, errno = %d", errno); + if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP)) + panic("check_sysemu : expected SIGTRAP, " + "got status = %d", status); + + n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET, + os_getpid()); + if(n < 0) + panic("check_sysemu : failed to modify system " + "call return, errno = %d", errno); + + if (stop_ptraced_child(pid, stack, 0, 0) < 0) + goto fail_stopped; + + sysemu_supported = 1; + printk("OK\n"); + set_using_sysemu(!force_sysemu_disabled); + + printk("Checking advanced syscall emulation patch for ptrace..."); + pid = start_ptraced_child(&stack); + while(1){ + count++; + if(ptrace(PTRACE_SYSEMU_SINGLESTEP, pid, 0, 0) < 0) + goto fail; + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); + if(n < 0) + panic("check_ptrace : wait failed, errno = %d", errno); + if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP)) + panic("check_ptrace : expected (SIGTRAP|SYSCALL_TRAP), " + "got status = %d", status); + + syscall = ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_NR_OFFSET, + 0); + if(syscall == __NR_getpid){ + if (!count) + panic("check_ptrace : SYSEMU_SINGLESTEP doesn't singlestep"); + n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET, + os_getpid()); + if(n < 0) + panic("check_sysemu : failed to modify system " + "call return, errno = %d", errno); + break; + } + } + if (stop_ptraced_child(pid, stack, 0, 0) < 0) + goto fail_stopped; + + sysemu_supported = 2; + printk("OK\n"); + + if ( !force_sysemu_disabled ) + set_using_sysemu(sysemu_supported); + return; + +fail: + stop_ptraced_child(pid, stack, 1, 0); +fail_stopped: + printk("missing\n"); +} + +static void __init check_ptrace(void) +{ + void *stack; + int pid, syscall, n, status; + + printk("Checking that ptrace can change system call numbers..."); + pid = start_ptraced_child(&stack); + + if(ptrace(PTRACE_OLDSETOPTIONS, pid, 0, (void *)PTRACE_O_TRACESYSGOOD) < 0) + panic("check_ptrace: PTRACE_OLDSETOPTIONS failed, errno = %d", errno); + + while(1){ + if(ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) + panic("check_ptrace : ptrace failed, errno = %d", + errno); + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); + if(n < 0) + panic("check_ptrace : wait failed, errno = %d", errno); + if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP + 0x80)) + panic("check_ptrace : expected SIGTRAP + 0x80, " + "got status = %d", status); + + syscall = ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_NR_OFFSET, + 0); + if(syscall == __NR_getpid){ + n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET, + __NR_getppid); + if(n < 0) + panic("check_ptrace : failed to modify system " + "call, errno = %d", errno); + break; + } + } + stop_ptraced_child(pid, stack, 0, 1); + printk("OK\n"); + check_sysemu(); +} + +void os_early_checks(void) +{ + check_ptrace(); +} + +#ifdef UML_CONFIG_MODE_SKAS +static inline void check_skas3_ptrace_support(void) +{ + struct ptrace_faultinfo fi; + void *stack; + int pid, n; + + printf("Checking for the skas3 patch in the host..."); + pid = start_ptraced_child(&stack); + + n = ptrace(PTRACE_FAULTINFO, pid, 0, &fi); + if (n < 0) { + ptrace_faultinfo = 0; + if(errno == EIO) + printf("not found\n"); + else + perror("not found"); + } + else { + if (!ptrace_faultinfo) + printf("found but disabled on command line\n"); + else + printf("found\n"); + } + + init_registers(pid); + stop_ptraced_child(pid, stack, 1, 1); +} + +int can_do_skas(void) +{ + printf("Checking for /proc/mm..."); + if (os_access("/proc/mm", OS_ACC_W_OK) < 0) { + proc_mm = 0; + printf("not found\n"); + } + else { + if (!proc_mm) + printf("found but disabled on command line\n"); + else + printf("found\n"); + } + + check_skas3_ptrace_support(); + return 1; +} +#else +int can_do_skas(void) +{ + return(0); +} +#endif diff --git a/arch/um/os-Linux/tt.c b/arch/um/os-Linux/tt.c new file mode 100644 index 0000000..5b047ab --- /dev/null +++ b/arch/um/os-Linux/tt.c @@ -0,0 +1,113 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "user_util.h" +#include "kern_util.h" +#include "user.h" +#include "signal_kern.h" +#include "signal_user.h" +#include "sysdep/ptrace.h" +#include "sysdep/sigcontext.h" +#include "irq_user.h" +#include "ptrace_user.h" +#include "time_user.h" +#include "init.h" +#include "os.h" +#include "uml-config.h" +#include "choose-mode.h" +#include "mode.h" +#include "tempfile.h" + +/* + *------------------------- + * only for tt mode (will be deleted in future...) + *------------------------- + */ + +struct tramp { + int (*tramp)(void *); + void *tramp_data; + unsigned long temp_stack; + int flags; + int pid; +}; + +/* See above for why sigkill is here */ + +int sigkill = SIGKILL; + +int outer_tramp(void *arg) +{ + struct tramp *t; + int sig = sigkill; + + t = arg; + t->pid = clone(t->tramp, (void *) t->temp_stack + page_size()/2, + t->flags, t->tramp_data); + if(t->pid > 0) wait_for_stop(t->pid, SIGSTOP, PTRACE_CONT, NULL); + kill(os_getpid(), sig); + _exit(0); +} + +int start_fork_tramp(void *thread_arg, unsigned long temp_stack, + int clone_flags, int (*tramp)(void *)) +{ + struct tramp arg; + unsigned long sp; + int new_pid, status, err; + + /* The trampoline will run on the temporary stack */ + sp = stack_sp(temp_stack); + + clone_flags |= CLONE_FILES | SIGCHLD; + + arg.tramp = tramp; + arg.tramp_data = thread_arg; + arg.temp_stack = temp_stack; + arg.flags = clone_flags; + + /* Start the process and wait for it to kill itself */ + new_pid = clone(outer_tramp, (void *) sp, clone_flags, &arg); + if(new_pid < 0) + return(new_pid); + + CATCH_EINTR(err = waitpid(new_pid, &status, 0)); + if(err < 0) + panic("Waiting for outer trampoline failed - errno = %d", + errno); + + if(!WIFSIGNALED(status) || (WTERMSIG(status) != SIGKILL)) + panic("outer trampoline didn't exit with SIGKILL, " + "status = %d", status); + + return(arg.pid); +} + +void forward_pending_sigio(int target) +{ + sigset_t sigs; + + if(sigpending(&sigs)) + panic("forward_pending_sigio : sigpending failed"); + if(sigismember(&sigs, SIGIO)) + kill(target, SIGIO); +} + -- cgit v1.1 From 8b51304ed3184826fb262c1e9d3e58b0b00fd083 Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Sat, 3 Sep 2005 15:57:49 -0700 Subject: [PATCH] uml: increase granularity of host capability checking This change enables SKAS0/SKAS3 to work with all combinations of /proc/mm and PTRACE_FAULTINFO being available or not. Also it changes the initialization of proc_mm and ptrace_faultinfo slightly, to ease forcing SKAS0 on a patched host. Forcing UML to run without /proc/mm or PTRACE_FAULTINFO by cmdline parameter can be implemented with a setup resetting the related variable. Signed-off-by: Bodo Stroesser Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/skas/include/skas.h | 2 +- arch/um/kernel/skas/mmu.c | 52 +++++++++++++++------------- arch/um/kernel/skas/process.c | 69 +++++++++++++++++++++++++++++++++----- arch/um/kernel/skas/process_kern.c | 7 +++- 4 files changed, 96 insertions(+), 34 deletions(-) (limited to 'arch/um') diff --git a/arch/um/kernel/skas/include/skas.h b/arch/um/kernel/skas/include/skas.h index e91064b..03c9d99 100644 --- a/arch/um/kernel/skas/include/skas.h +++ b/arch/um/kernel/skas/include/skas.h @@ -33,7 +33,7 @@ extern void *protect(struct mm_id * mm_idp, unsigned long addr, unsigned long len, int r, int w, int x, int done, void *data); extern void user_signal(int sig, union uml_pt_regs *regs, int pid); -extern int new_mm(int from); +extern int new_mm(int from, unsigned long stack); extern int start_userspace(unsigned long stub_stack); extern int copy_context_skas0(unsigned long stack, int pid); extern void get_skas_faultinfo(int pid, struct faultinfo * fi); diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index d232daa..d837223 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -77,23 +77,14 @@ int init_new_context_skas(struct task_struct *task, struct mm_struct *mm) struct mm_struct *cur_mm = current->mm; struct mm_id *cur_mm_id = &cur_mm->context.skas.id; struct mm_id *mm_id = &mm->context.skas.id; - unsigned long stack; - int from, ret; + unsigned long stack = 0; + int from, ret = -ENOMEM; - if(proc_mm){ - if((cur_mm != NULL) && (cur_mm != &init_mm)) - from = cur_mm->context.skas.id.u.mm_fd; - else from = -1; + if(!proc_mm || !ptrace_faultinfo){ + stack = get_zeroed_page(GFP_KERNEL); + if(stack == 0) + goto out; - ret = new_mm(from); - if(ret < 0){ - printk("init_new_context_skas - new_mm failed, " - "errno = %d\n", ret); - return ret; - } - mm_id->u.mm_fd = ret; - } - else { /* This zeros the entry that pgd_alloc didn't, needed since * we are about to reinitialize it, and want mm.nr_ptes to * be accurate. @@ -103,20 +94,30 @@ int init_new_context_skas(struct task_struct *task, struct mm_struct *mm) ret = init_stub_pte(mm, CONFIG_STUB_CODE, (unsigned long) &__syscall_stub_start); if(ret) - goto out; - - ret = -ENOMEM; - stack = get_zeroed_page(GFP_KERNEL); - if(stack == 0) - goto out; - mm_id->stack = stack; + goto out_free; ret = init_stub_pte(mm, CONFIG_STUB_DATA, stack); if(ret) goto out_free; mm->nr_ptes--; + } + mm_id->stack = stack; + if(proc_mm){ + if((cur_mm != NULL) && (cur_mm != &init_mm)) + from = cur_mm_id->u.mm_fd; + else from = -1; + + ret = new_mm(from, stack); + if(ret < 0){ + printk("init_new_context_skas - new_mm failed, " + "errno = %d\n", ret); + goto out_free; + } + mm_id->u.mm_fd = ret; + } + else { if((cur_mm != NULL) && (cur_mm != &init_mm)) mm_id->u.pid = copy_context_skas0(stack, cur_mm_id->u.pid); @@ -126,7 +127,8 @@ int init_new_context_skas(struct task_struct *task, struct mm_struct *mm) return 0; out_free: - free_page(mm_id->stack); + if(mm_id->stack != 0) + free_page(mm_id->stack); out: return ret; } @@ -137,8 +139,10 @@ void destroy_context_skas(struct mm_struct *mm) if(proc_mm) os_close_file(mmu->id.u.mm_fd); - else { + else os_kill_ptraced_process(mmu->id.u.pid, 1); + + if(!proc_mm || !ptrace_faultinfo){ free_page(mmu->id.stack); free_page(mmu->last_page_table); } diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index f228f8b..5cd0e99 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -138,6 +138,8 @@ static void handle_trap(int pid, union uml_pt_regs *regs, int local_using_sysemu } extern int __syscall_stub_start; +int stub_code_fd = -1; +__u64 stub_code_offset; static int userspace_tramp(void *stack) { @@ -152,31 +154,31 @@ static int userspace_tramp(void *stack) /* This has a pte, but it can't be mapped in with the usual * tlb_flush mechanism because this is part of that mechanism */ - int fd; - __u64 offset; - - fd = phys_mapping(to_phys(&__syscall_stub_start), &offset); addr = mmap64((void *) UML_CONFIG_STUB_CODE, page_size(), - PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset); + PROT_EXEC, MAP_FIXED | MAP_PRIVATE, + stub_code_fd, stub_code_offset); if(addr == MAP_FAILED){ - printk("mapping mmap stub failed, errno = %d\n", + printk("mapping stub code failed, errno = %d\n", errno); exit(1); } if(stack != NULL){ + int fd; + __u64 offset; + fd = phys_mapping(to_phys(stack), &offset); addr = mmap((void *) UML_CONFIG_STUB_DATA, page_size(), PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, fd, offset); if(addr == MAP_FAILED){ - printk("mapping segfault stack failed, " + printk("mapping stub stack failed, " "errno = %d\n", errno); exit(1); } } } - if(!ptrace_faultinfo && (stack != NULL)){ + if(!ptrace_faultinfo){ unsigned long v = UML_CONFIG_STUB_CODE + (unsigned long) stub_segv_handler - (unsigned long) &__syscall_stub_start; @@ -202,6 +204,10 @@ int start_userspace(unsigned long stub_stack) unsigned long sp; int pid, status, n, flags; + if ( stub_code_fd == -1 ) + stub_code_fd = phys_mapping(to_phys(&__syscall_stub_start), + &stub_code_offset); + stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if(stack == MAP_FAILED) @@ -363,6 +369,53 @@ int copy_context_skas0(unsigned long new_stack, int pid) return pid; } +/* + * This is used only, if proc_mm is available, while PTRACE_FAULTINFO + * isn't. Opening /proc/mm creates a new mm_context, which lacks the stub-pages + * Thus, we map them using /proc/mm-fd + */ +void map_stub_pages(int fd, unsigned long code, + unsigned long data, unsigned long stack) +{ + struct proc_mm_op mmop; + int n; + + mmop = ((struct proc_mm_op) { .op = MM_MMAP, + .u = + { .mmap = + { .addr = code, + .len = PAGE_SIZE, + .prot = PROT_EXEC, + .flags = MAP_FIXED | MAP_PRIVATE, + .fd = stub_code_fd, + .offset = stub_code_offset + } } }); + n = os_write_file(fd, &mmop, sizeof(mmop)); + if(n != sizeof(mmop)) + panic("map_stub_pages : /proc/mm map for code failed, " + "err = %d\n", -n); + + if ( stack ) { + __u64 map_offset; + int map_fd = phys_mapping(to_phys((void *)stack), &map_offset); + mmop = ((struct proc_mm_op) + { .op = MM_MMAP, + .u = + { .mmap = + { .addr = data, + .len = PAGE_SIZE, + .prot = PROT_READ | PROT_WRITE, + .flags = MAP_FIXED | MAP_SHARED, + .fd = map_fd, + .offset = map_offset + } } }); + n = os_write_file(fd, &mmop, sizeof(mmop)); + if(n != sizeof(mmop)) + panic("map_stub_pages : /proc/mm map for data failed, " + "err = %d\n", -n); + } +} + void new_thread(void *stack, void **switch_buf_ptr, void **fork_buf_ptr, void (*handler)(int)) { diff --git a/arch/um/kernel/skas/process_kern.c b/arch/um/kernel/skas/process_kern.c index cbabab1..3d1b227 100644 --- a/arch/um/kernel/skas/process_kern.c +++ b/arch/um/kernel/skas/process_kern.c @@ -129,7 +129,9 @@ int copy_thread_skas(int nr, unsigned long clone_flags, unsigned long sp, return(0); } -int new_mm(int from) +extern void map_stub_pages(int fd, unsigned long code, + unsigned long data, unsigned long stack); +int new_mm(int from, unsigned long stack) { struct proc_mm_op copy; int n, fd; @@ -148,6 +150,9 @@ int new_mm(int from) "err = %d\n", -n); } + if(!ptrace_faultinfo) + map_stub_pages(fd, CONFIG_STUB_CODE, CONFIG_STUB_DATA, stack); + return(fd); } -- cgit v1.1 From 07bf731e4b95d7c9ea9dbacd1fc4a041120dfffb Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Sat, 3 Sep 2005 15:57:50 -0700 Subject: [PATCH] uml: skas0 stubs now check system call return values Change syscall-stub's data to include a "expected retval". Stub now checks syscalls retval and aborts execution of syscall list, if retval != expected retval. run_syscall_stub prints the data of the failed syscall, using the data pointer and retval written by the stub to the beginning of the stack. one_syscall_stub is removed, to simplify code, because only some instructions are saved by one_syscall_stub, no host-syscall. Using the stub with additional data (modify_ldt via stub) is prepared also. Signed-off-by: Bodo Stroesser Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/include/tlb.h | 6 +- arch/um/kernel/skas/include/skas.h | 33 +++--- arch/um/kernel/skas/mem_user.c | 234 ++++++++++++++++++++++--------------- arch/um/kernel/skas/tlb.c | 31 ++--- arch/um/kernel/tlb.c | 163 ++++++++++++++------------ arch/um/kernel/tt/tlb.c | 27 +++-- arch/um/sys-i386/stub.S | 42 +++++-- arch/um/sys-x86_64/stub.S | 50 ++++++-- 8 files changed, 348 insertions(+), 238 deletions(-) (limited to 'arch/um') diff --git a/arch/um/include/tlb.h b/arch/um/include/tlb.h index 2deefb9..45d7da6 100644 --- a/arch/um/include/tlb.h +++ b/arch/um/include/tlb.h @@ -38,9 +38,9 @@ extern void mprotect_kernel_vm(int w); extern void force_flush_all(void); extern void fix_range_common(struct mm_struct *mm, unsigned long start_addr, unsigned long end_addr, int force, - void *(*do_ops)(union mm_context *, - struct host_vm_op *, int, int, - void *)); + int (*do_ops)(union mm_context *, + struct host_vm_op *, int, int, + void **)); extern int flush_tlb_kernel_range_common(unsigned long start, unsigned long end); diff --git a/arch/um/kernel/skas/include/skas.h b/arch/um/kernel/skas/include/skas.h index 03c9d99..0609347 100644 --- a/arch/um/kernel/skas/include/skas.h +++ b/arch/um/kernel/skas/include/skas.h @@ -24,14 +24,14 @@ extern void new_thread_proc(void *stack, void (*handler)(int sig)); extern void remove_sigstack(void); extern void new_thread_handler(int sig); extern void handle_syscall(union uml_pt_regs *regs); -extern void *map(struct mm_id * mm_idp, unsigned long virt, - unsigned long len, int r, int w, int x, int phys_fd, - unsigned long long offset, int done, void *data); -extern void *unmap(struct mm_id * mm_idp, void *addr, - unsigned long len, int done, void *data); -extern void *protect(struct mm_id * mm_idp, unsigned long addr, - unsigned long len, int r, int w, int x, int done, - void *data); +extern int map(struct mm_id * mm_idp, unsigned long virt, + unsigned long len, int r, int w, int x, int phys_fd, + unsigned long long offset, int done, void **data); +extern int unmap(struct mm_id * mm_idp, void *addr, unsigned long len, + int done, void **data); +extern int protect(struct mm_id * mm_idp, unsigned long addr, + unsigned long len, int r, int w, int x, int done, + void **data); extern void user_signal(int sig, union uml_pt_regs *regs, int pid); extern int new_mm(int from, unsigned long stack); extern int start_userspace(unsigned long stub_stack); @@ -39,16 +39,11 @@ extern int copy_context_skas0(unsigned long stack, int pid); extern void get_skas_faultinfo(int pid, struct faultinfo * fi); extern long execute_syscall_skas(void *r); extern unsigned long current_stub_stack(void); +extern long run_syscall_stub(struct mm_id * mm_idp, + int syscall, unsigned long *args, long expected, + void **addr, int done); +extern long syscall_stub_data(struct mm_id * mm_idp, + unsigned long *data, int data_count, + void **addr, void **stub_addr); #endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/skas/mem_user.c b/arch/um/kernel/skas/mem_user.c index c976320..1d89640 100644 --- a/arch/um/kernel/skas/mem_user.c +++ b/arch/um/kernel/skas/mem_user.c @@ -5,13 +5,14 @@ #include #include +#include #include #include #include #include #include "mem_user.h" #include "mem.h" -#include "mm_id.h" +#include "skas.h" #include "user.h" #include "os.h" #include "proc_mm.h" @@ -23,56 +24,99 @@ #include "uml-config.h" #include "sysdep/ptrace.h" #include "sysdep/stub.h" -#include "skas.h" -extern unsigned long syscall_stub, batch_syscall_stub, __syscall_stub_start; +extern unsigned long batch_syscall_stub, __syscall_stub_start; extern void wait_stub_done(int pid, int sig, char * fname); +static inline unsigned long *check_init_stack(struct mm_id * mm_idp, + unsigned long *stack) +{ + if(stack == NULL){ + stack = (unsigned long *) mm_idp->stack + 2; + *stack = 0; + } + return stack; +} + +extern int proc_mm; + int single_count = 0; +int multi_count = 0; +int multi_op_count = 0; -static long one_syscall_stub(struct mm_id * mm_idp, int syscall, - unsigned long *args) +static long do_syscall_stub(struct mm_id *mm_idp, void **addr) { + unsigned long regs[MAX_REG_NR]; + unsigned long *data; + unsigned long *syscall; + long ret, offset; int n, pid = mm_idp->u.pid; - unsigned long regs[MAX_REG_NR]; + + if(proc_mm) +#warning Need to look up userspace_pid by cpu + pid = userspace_pid[0]; + + multi_count++; get_safe_registers(regs); regs[REGS_IP_INDEX] = UML_CONFIG_STUB_CODE + - ((unsigned long) &syscall_stub - + ((unsigned long) &batch_syscall_stub - (unsigned long) &__syscall_stub_start); - /* XXX Don't have a define for starting a syscall */ - regs[REGS_SYSCALL_NR] = syscall; - regs[REGS_SYSCALL_ARG1] = args[0]; - regs[REGS_SYSCALL_ARG2] = args[1]; - regs[REGS_SYSCALL_ARG3] = args[2]; - regs[REGS_SYSCALL_ARG4] = args[3]; - regs[REGS_SYSCALL_ARG5] = args[4]; - regs[REGS_SYSCALL_ARG6] = args[5]; - n = ptrace_setregs(pid, regs); - if(n < 0){ - printk("one_syscall_stub : PTRACE_SETREGS failed, " - "errno = %d\n", n); - return(n); + n = ptrace_setregs(pid, regs); + if(n < 0) + panic("do_syscall_stub : PTRACE_SETREGS failed, errno = %d\n", + n); + + wait_stub_done(pid, 0, "do_syscall_stub"); + + /* When the stub stops, we find the following values on the + * beginning of the stack: + * (long )return_value + * (long )offset to failed sycall-data (0, if no error) + */ + ret = *((unsigned long *) mm_idp->stack); + offset = *((unsigned long *) mm_idp->stack + 1); + if (offset) { + data = (unsigned long *)(mm_idp->stack + + offset - UML_CONFIG_STUB_DATA); + syscall = (unsigned long *)((unsigned long)data + data[0]); + printk("do_syscall_stub: syscall %ld failed, return value = " + "0x%lx, expected return value = 0x%lx\n", + syscall[0], ret, syscall[7]); + printk(" syscall parameters: " + "0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", + syscall[1], syscall[2], syscall[3], + syscall[4], syscall[5], syscall[6]); + for(n = 1; n < data[0]/sizeof(long); n++) { + if(n == 1) + printk(" additional syscall data:"); + if(n % 4 == 1) + printk("\n "); + printk(" 0x%lx", data[n]); + } + if(n > 1) + printk("\n"); } + else ret = 0; - wait_stub_done(pid, 0, "one_syscall_stub"); + *addr = check_init_stack(mm_idp, NULL); - return(*((unsigned long *) mm_idp->stack)); + return ret; } -int multi_count = 0; -int multi_op_count = 0; - -static long many_syscall_stub(struct mm_id * mm_idp, int syscall, - unsigned long *args, int done, void **addr_out) +long run_syscall_stub(struct mm_id * mm_idp, int syscall, + unsigned long *args, long expected, void **addr, + int done) { - unsigned long regs[MAX_REG_NR], *stack; - int n, pid = mm_idp->u.pid; + unsigned long *stack = check_init_stack(mm_idp, *addr); + + if(done && *addr == NULL) + single_count++; + + *stack += sizeof(long); + stack += *stack / sizeof(long); - stack = *addr_out; - if(stack == NULL) - stack = (unsigned long *) current_stub_stack(); *stack++ = syscall; *stack++ = args[0]; *stack++ = args[1]; @@ -80,53 +124,55 @@ static long many_syscall_stub(struct mm_id * mm_idp, int syscall, *stack++ = args[3]; *stack++ = args[4]; *stack++ = args[5]; + *stack++ = expected; *stack = 0; multi_op_count++; if(!done && ((((unsigned long) stack) & ~PAGE_MASK) < - PAGE_SIZE - 8 * sizeof(long))){ - *addr_out = stack; + PAGE_SIZE - 10 * sizeof(long))){ + *addr = stack; return 0; } - multi_count++; - get_safe_registers(regs); - regs[REGS_IP_INDEX] = UML_CONFIG_STUB_CODE + - ((unsigned long) &batch_syscall_stub - - (unsigned long) &__syscall_stub_start); - regs[REGS_SP_INDEX] = UML_CONFIG_STUB_DATA; + return do_syscall_stub(mm_idp, addr); +} - n = ptrace_setregs(pid, regs); - if(n < 0){ - printk("many_syscall_stub : PTRACE_SETREGS failed, " - "errno = %d\n", n); - return(n); - } +long syscall_stub_data(struct mm_id * mm_idp, + unsigned long *data, int data_count, + void **addr, void **stub_addr) +{ + unsigned long *stack; + int ret = 0; + + /* If *addr still is uninitialized, it *must* contain NULL. + * Thus in this case do_syscall_stub correctly won't be called. + */ + if((((unsigned long) *addr) & ~PAGE_MASK) >= + PAGE_SIZE - (10 + data_count) * sizeof(long)) { + ret = do_syscall_stub(mm_idp, addr); + /* in case of error, don't overwrite data on stack */ + if(ret) + return ret; + } - wait_stub_done(pid, 0, "many_syscall_stub"); - stack = (unsigned long *) mm_idp->stack; + stack = check_init_stack(mm_idp, *addr); + *addr = stack; - *addr_out = stack; - return(*stack); -} + *stack = data_count * sizeof(long); -static long run_syscall_stub(struct mm_id * mm_idp, int syscall, - unsigned long *args, void **addr, int done) -{ - long res; + memcpy(stack + 1, data, data_count * sizeof(long)); - if((*addr == NULL) && done) - res = one_syscall_stub(mm_idp, syscall, args); - else res = many_syscall_stub(mm_idp, syscall, args, done, addr); + *stub_addr = (void *)(((unsigned long)(stack + 1) & ~PAGE_MASK) + + UML_CONFIG_STUB_DATA); - return res; + return 0; } -void *map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, - int r, int w, int x, int phys_fd, unsigned long long offset, - int done, void *data) +int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, + int r, int w, int x, int phys_fd, unsigned long long offset, + int done, void **data) { - int prot, n; + int prot, ret; prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | (x ? PROT_EXEC : 0); @@ -146,29 +192,27 @@ void *map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, .fd = phys_fd, .offset= offset } } } ); - n = os_write_file(fd, &map, sizeof(map)); - if(n != sizeof(map)) - printk("map : /proc/mm map failed, err = %d\n", -n); + ret = os_write_file(fd, &map, sizeof(map)); + if(ret != sizeof(map)) + printk("map : /proc/mm map failed, err = %d\n", -ret); + else ret = 0; } else { - long res; unsigned long args[] = { virt, len, prot, MAP_SHARED | MAP_FIXED, phys_fd, MMAP_OFFSET(offset) }; - res = run_syscall_stub(mm_idp, STUB_MMAP_NR, args, - &data, done); - if((void *) res == MAP_FAILED) - printk("mmap stub failed, errno = %d\n", res); + ret = run_syscall_stub(mm_idp, STUB_MMAP_NR, args, virt, + data, done); } - return data; + return ret; } -void *unmap(struct mm_id * mm_idp, void *addr, unsigned long len, int done, - void *data) +int unmap(struct mm_id * mm_idp, void *addr, unsigned long len, int done, + void **data) { - int n; + int ret; if(proc_mm){ struct proc_mm_op unmap; @@ -180,29 +224,29 @@ void *unmap(struct mm_id * mm_idp, void *addr, unsigned long len, int done, { .addr = (unsigned long) addr, .len = len } } } ); - n = os_write_file(fd, &unmap, sizeof(unmap)); - if(n != sizeof(unmap)) - printk("unmap - proc_mm write returned %d\n", n); + ret = os_write_file(fd, &unmap, sizeof(unmap)); + if(ret != sizeof(unmap)) + printk("unmap - proc_mm write returned %d\n", ret); + else ret = 0; } else { - int res; unsigned long args[] = { (unsigned long) addr, len, 0, 0, 0, 0 }; - res = run_syscall_stub(mm_idp, __NR_munmap, args, - &data, done); - if(res < 0) - printk("munmap stub failed, errno = %d\n", res); + ret = run_syscall_stub(mm_idp, __NR_munmap, args, 0, + data, done); + if(ret < 0) + printk("munmap stub failed, errno = %d\n", ret); } - return data; + return ret; } -void *protect(struct mm_id * mm_idp, unsigned long addr, unsigned long len, - int r, int w, int x, int done, void *data) +int protect(struct mm_id * mm_idp, unsigned long addr, unsigned long len, + int r, int w, int x, int done, void **data) { struct proc_mm_op protect; - int prot, n; + int prot, ret; prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | (x ? PROT_EXEC : 0); @@ -217,21 +261,19 @@ void *protect(struct mm_id * mm_idp, unsigned long addr, unsigned long len, .len = len, .prot = prot } } } ); - n = os_write_file(fd, &protect, sizeof(protect)); - if(n != sizeof(protect)) - panic("protect failed, err = %d", -n); + ret = os_write_file(fd, &protect, sizeof(protect)); + if(ret != sizeof(protect)) + printk("protect failed, err = %d", -ret); + else ret = 0; } else { - int res; unsigned long args[] = { addr, len, prot, 0, 0, 0 }; - res = run_syscall_stub(mm_idp, __NR_mprotect, args, - &data, done); - if(res < 0) - panic("mprotect stub failed, errno = %d\n", res); + ret = run_syscall_stub(mm_idp, __NR_mprotect, args, 0, + data, done); } - return data; + return ret; } void before_mem_skas(unsigned long unused) diff --git a/arch/um/kernel/skas/tlb.c b/arch/um/kernel/skas/tlb.c index 4b5fd20..6e84963 100644 --- a/arch/um/kernel/skas/tlb.c +++ b/arch/um/kernel/skas/tlb.c @@ -18,30 +18,31 @@ #include "os.h" #include "tlb.h" -static void *do_ops(union mm_context *mmu, struct host_vm_op *ops, int last, - int finished, void *flush) +static int do_ops(union mm_context *mmu, struct host_vm_op *ops, int last, + int finished, void **flush) { struct host_vm_op *op; - int i; + int i, ret = 0; - for(i = 0; i <= last; i++){ + for(i = 0; i <= last && !ret; i++){ op = &ops[i]; switch(op->type){ case MMAP: - flush = map(&mmu->skas.id, op->u.mmap.addr, - op->u.mmap.len, op->u.mmap.r, op->u.mmap.w, - op->u.mmap.x, op->u.mmap.fd, - op->u.mmap.offset, finished, flush); + ret = map(&mmu->skas.id, op->u.mmap.addr, + op->u.mmap.len, op->u.mmap.r, op->u.mmap.w, + op->u.mmap.x, op->u.mmap.fd, + op->u.mmap.offset, finished, flush); break; case MUNMAP: - flush = unmap(&mmu->skas.id, (void *) op->u.munmap.addr, - op->u.munmap.len, finished, flush); + ret = unmap(&mmu->skas.id, + (void *) op->u.munmap.addr, + op->u.munmap.len, finished, flush); break; case MPROTECT: - flush = protect(&mmu->skas.id, op->u.mprotect.addr, - op->u.mprotect.len, op->u.mprotect.r, - op->u.mprotect.w, op->u.mprotect.x, - finished, flush); + ret = protect(&mmu->skas.id, op->u.mprotect.addr, + op->u.mprotect.len, op->u.mprotect.r, + op->u.mprotect.w, op->u.mprotect.x, + finished, flush); break; default: printk("Unknown op type %d in do_ops\n", op->type); @@ -49,7 +50,7 @@ static void *do_ops(union mm_context *mmu, struct host_vm_op *ops, int last, } } - return flush; + return ret; } extern int proc_mm; diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 7d914bb..80ed618 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -16,115 +16,117 @@ #include "os.h" static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, - int r, int w, int x, struct host_vm_op *ops, int index, + int r, int w, int x, struct host_vm_op *ops, int *index, int last_filled, union mm_context *mmu, void **flush, - void *(*do_ops)(union mm_context *, struct host_vm_op *, - int, int, void *)) + int (*do_ops)(union mm_context *, struct host_vm_op *, + int, int, void **)) { __u64 offset; struct host_vm_op *last; - int fd; + int fd, ret = 0; fd = phys_mapping(phys, &offset); - if(index != -1){ - last = &ops[index]; + if(*index != -1){ + last = &ops[*index]; if((last->type == MMAP) && (last->u.mmap.addr + last->u.mmap.len == virt) && (last->u.mmap.r == r) && (last->u.mmap.w == w) && (last->u.mmap.x == x) && (last->u.mmap.fd == fd) && (last->u.mmap.offset + last->u.mmap.len == offset)){ last->u.mmap.len += len; - return index; + return 0; } } - if(index == last_filled){ - *flush = (*do_ops)(mmu, ops, last_filled, 0, *flush); - index = -1; + if(*index == last_filled){ + ret = (*do_ops)(mmu, ops, last_filled, 0, flush); + *index = -1; } - ops[++index] = ((struct host_vm_op) { .type = MMAP, - .u = { .mmap = { - .addr = virt, - .len = len, - .r = r, - .w = w, - .x = x, - .fd = fd, - .offset = offset } - } }); - return index; + ops[++*index] = ((struct host_vm_op) { .type = MMAP, + .u = { .mmap = { + .addr = virt, + .len = len, + .r = r, + .w = w, + .x = x, + .fd = fd, + .offset = offset } + } }); + return ret; } static int add_munmap(unsigned long addr, unsigned long len, - struct host_vm_op *ops, int index, int last_filled, + struct host_vm_op *ops, int *index, int last_filled, union mm_context *mmu, void **flush, - void *(*do_ops)(union mm_context *, struct host_vm_op *, - int, int, void *)) + int (*do_ops)(union mm_context *, struct host_vm_op *, + int, int, void **)) { struct host_vm_op *last; + int ret = 0; - if(index != -1){ - last = &ops[index]; + if(*index != -1){ + last = &ops[*index]; if((last->type == MUNMAP) && (last->u.munmap.addr + last->u.mmap.len == addr)){ last->u.munmap.len += len; - return index; + return 0; } } - if(index == last_filled){ - *flush = (*do_ops)(mmu, ops, last_filled, 0, *flush); - index = -1; + if(*index == last_filled){ + ret = (*do_ops)(mmu, ops, last_filled, 0, flush); + *index = -1; } - ops[++index] = ((struct host_vm_op) { .type = MUNMAP, - .u = { .munmap = { - .addr = addr, - .len = len } } }); - return index; + ops[++*index] = ((struct host_vm_op) { .type = MUNMAP, + .u = { .munmap = { + .addr = addr, + .len = len } } }); + return ret; } static int add_mprotect(unsigned long addr, unsigned long len, int r, int w, - int x, struct host_vm_op *ops, int index, + int x, struct host_vm_op *ops, int *index, int last_filled, union mm_context *mmu, void **flush, - void *(*do_ops)(union mm_context *, - struct host_vm_op *, int, int, void *)) + int (*do_ops)(union mm_context *, struct host_vm_op *, + int, int, void **)) { struct host_vm_op *last; + int ret = 0; - if(index != -1){ - last = &ops[index]; + if(*index != -1){ + last = &ops[*index]; if((last->type == MPROTECT) && (last->u.mprotect.addr + last->u.mprotect.len == addr) && (last->u.mprotect.r == r) && (last->u.mprotect.w == w) && (last->u.mprotect.x == x)){ last->u.mprotect.len += len; - return index; + return 0; } } - if(index == last_filled){ - *flush = (*do_ops)(mmu, ops, last_filled, 0, *flush); - index = -1; + if(*index == last_filled){ + ret = (*do_ops)(mmu, ops, last_filled, 0, flush); + *index = -1; } - ops[++index] = ((struct host_vm_op) { .type = MPROTECT, - .u = { .mprotect = { - .addr = addr, - .len = len, - .r = r, - .w = w, - .x = x } } }); - return index; + ops[++*index] = ((struct host_vm_op) { .type = MPROTECT, + .u = { .mprotect = { + .addr = addr, + .len = len, + .r = r, + .w = w, + .x = x } } }); + return ret; } #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1)) void fix_range_common(struct mm_struct *mm, unsigned long start_addr, unsigned long end_addr, int force, - void *(*do_ops)(union mm_context *, struct host_vm_op *, - int, int, void *)) + int (*do_ops)(union mm_context *, struct host_vm_op *, + int, int, void **)) { pgd_t *npgd; pud_t *npud; @@ -136,20 +138,21 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, struct host_vm_op ops[1]; void *flush = NULL; int op_index = -1, last_op = sizeof(ops) / sizeof(ops[0]) - 1; + int ret = 0; if(mm == NULL) return; ops[0].type = NONE; - for(addr = start_addr; addr < end_addr;){ + for(addr = start_addr; addr < end_addr && !ret;){ npgd = pgd_offset(mm, addr); if(!pgd_present(*npgd)){ end = ADD_ROUND(addr, PGDIR_SIZE); if(end > end_addr) end = end_addr; if(force || pgd_newpage(*npgd)){ - op_index = add_munmap(addr, end - addr, ops, - op_index, last_op, mmu, - &flush, do_ops); + ret = add_munmap(addr, end - addr, ops, + &op_index, last_op, mmu, + &flush, do_ops); pgd_mkuptodate(*npgd); } addr = end; @@ -162,9 +165,9 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, if(end > end_addr) end = end_addr; if(force || pud_newpage(*npud)){ - op_index = add_munmap(addr, end - addr, ops, - op_index, last_op, mmu, - &flush, do_ops); + ret = add_munmap(addr, end - addr, ops, + &op_index, last_op, mmu, + &flush, do_ops); pud_mkuptodate(*npud); } addr = end; @@ -177,9 +180,9 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, if(end > end_addr) end = end_addr; if(force || pmd_newpage(*npmd)){ - op_index = add_munmap(addr, end - addr, ops, - op_index, last_op, mmu, - &flush, do_ops); + ret = add_munmap(addr, end - addr, ops, + &op_index, last_op, mmu, + &flush, do_ops); pmd_mkuptodate(*npmd); } addr = end; @@ -198,24 +201,32 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, } if(force || pte_newpage(*npte)){ if(pte_present(*npte)) - op_index = add_mmap(addr, - pte_val(*npte) & PAGE_MASK, - PAGE_SIZE, r, w, x, ops, - op_index, last_op, mmu, - &flush, do_ops); - else op_index = add_munmap(addr, PAGE_SIZE, ops, - op_index, last_op, mmu, - &flush, do_ops); + ret = add_mmap(addr, + pte_val(*npte) & PAGE_MASK, + PAGE_SIZE, r, w, x, ops, + &op_index, last_op, mmu, + &flush, do_ops); + else ret = add_munmap(addr, PAGE_SIZE, ops, + &op_index, last_op, mmu, + &flush, do_ops); } else if(pte_newprot(*npte)) - op_index = add_mprotect(addr, PAGE_SIZE, r, w, x, ops, - op_index, last_op, mmu, - &flush, do_ops); + ret = add_mprotect(addr, PAGE_SIZE, r, w, x, ops, + &op_index, last_op, mmu, + &flush, do_ops); *npte = pte_mkuptodate(*npte); addr += PAGE_SIZE; } - flush = (*do_ops)(mmu, ops, op_index, 1, flush); + + if(!ret) + ret = (*do_ops)(mmu, ops, op_index, 1, &flush); + + /* This is not an else because ret is modified above */ + if(ret) { + printk("fix_range_common: failed, killing current process\n"); + force_sig(SIGKILL, current); + } } int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) diff --git a/arch/um/kernel/tt/tlb.c b/arch/um/kernel/tt/tlb.c index 16fc6a2..f1d85db 100644 --- a/arch/um/kernel/tt/tlb.c +++ b/arch/um/kernel/tt/tlb.c @@ -17,26 +17,31 @@ #include "os.h" #include "tlb.h" -static void *do_ops(union mm_context *mmu, struct host_vm_op *ops, int last, - int finished, void *flush) +static int do_ops(union mm_context *mmu, struct host_vm_op *ops, int last, + int finished, void **flush) { struct host_vm_op *op; - int i; + int i, ret=0; - for(i = 0; i <= last; i++){ + for(i = 0; i <= last && !ret; i++){ op = &ops[i]; switch(op->type){ case MMAP: - os_map_memory((void *) op->u.mmap.addr, op->u.mmap.fd, - op->u.mmap.offset, op->u.mmap.len, - op->u.mmap.r, op->u.mmap.w, - op->u.mmap.x); + ret = os_map_memory((void *) op->u.mmap.addr, + op->u.mmap.fd, op->u.mmap.offset, + op->u.mmap.len, op->u.mmap.r, + op->u.mmap.w, op->u.mmap.x); break; case MUNMAP: - os_unmap_memory((void *) op->u.munmap.addr, - op->u.munmap.len); + ret = os_unmap_memory((void *) op->u.munmap.addr, + op->u.munmap.len); break; case MPROTECT: + ret = protect_memory(op->u.mprotect.addr, + op->u.munmap.len, + op->u.mprotect.r, + op->u.mprotect.w, + op->u.mprotect.x, 1); protect_memory(op->u.mprotect.addr, op->u.munmap.len, op->u.mprotect.r, op->u.mprotect.w, op->u.mprotect.x, 1); @@ -47,7 +52,7 @@ static void *do_ops(union mm_context *mmu, struct host_vm_op *ops, int last, } } - return NULL; + return ret; } static void fix_range(struct mm_struct *mm, unsigned long start_addr, diff --git a/arch/um/sys-i386/stub.S b/arch/um/sys-i386/stub.S index a0f9506..6a70d9a 100644 --- a/arch/um/sys-i386/stub.S +++ b/arch/um/sys-i386/stub.S @@ -2,24 +2,50 @@ .globl syscall_stub .section .__syscall_stub, "x" -syscall_stub: - int $0x80 - mov %eax, UML_CONFIG_STUB_DATA - int3 .globl batch_syscall_stub batch_syscall_stub: - mov $UML_CONFIG_STUB_DATA, %esp -again: pop %eax + /* load pointer to first operation */ + mov $(UML_CONFIG_STUB_DATA+8), %esp + +again: + /* load length of additional data */ + mov 0x0(%esp), %eax + + /* if(length == 0) : end of list */ + /* write possible 0 to header */ + mov %eax, UML_CONFIG_STUB_DATA+4 cmpl $0, %eax jz done + + /* save current pointer */ + mov %esp, UML_CONFIG_STUB_DATA+4 + + /* skip additional data */ + add %eax, %esp + + /* load syscall-# */ + pop %eax + + /* load syscall params */ pop %ebx pop %ecx pop %edx pop %esi pop %edi pop %ebp + + /* execute syscall */ int $0x80 + + /* check return value */ + pop %ebx + cmp %ebx, %eax + je again + +done: + /* save return value */ mov %eax, UML_CONFIG_STUB_DATA - jmp again -done: int3 + + /* stop */ + int3 diff --git a/arch/um/sys-x86_64/stub.S b/arch/um/sys-x86_64/stub.S index 957f2ef..03c2797 100644 --- a/arch/um/sys-x86_64/stub.S +++ b/arch/um/sys-x86_64/stub.S @@ -16,21 +16,51 @@ syscall_stub: .globl batch_syscall_stub batch_syscall_stub: - movq $(UML_CONFIG_STUB_DATA >> 32), %rbx - salq $32, %rbx - movq $(UML_CONFIG_STUB_DATA & 0xffffffff), %rcx - or %rcx, %rbx - movq %rbx, %rsp -again: pop %rax - cmpq $0, %rax -jz done + mov $(UML_CONFIG_STUB_DATA >> 32), %rbx + sal $32, %rbx + mov $(UML_CONFIG_STUB_DATA & 0xffffffff), %rax + or %rax, %rbx + /* load pointer to first operation */ + mov %rbx, %rsp + add $0x10, %rsp +again: + /* load length of additional data */ + mov 0x0(%rsp), %rax + + /* if(length == 0) : end of list */ + /* write possible 0 to header */ + mov %rax, 8(%rbx) + cmp $0, %rax + jz done + + /* save current pointer */ + mov %rsp, 8(%rbx) + + /* skip additional data */ + add %rax, %rsp + + /* load syscall-# */ + pop %rax + + /* load syscall params */ pop %rdi pop %rsi pop %rdx pop %r10 pop %r8 pop %r9 + + /* execute syscall */ syscall + + /* check return value */ + pop %rcx + cmp %rcx, %rax + je again + +done: + /* save return value */ mov %rax, (%rbx) - jmp again -done: int3 + + /* stop */ + int3 -- cgit v1.1 From d9838d86536fe17e76d19bf3e737100fae618396 Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Sat, 3 Sep 2005 15:57:51 -0700 Subject: [PATCH] uml: allow host capability usage to be disabled Add new cmdline setups: - noprocmm - noptracefaultinfo In case of testing, they can be used to switch off usage of /proc/mm and PTRACE_FAULTINFO independently. Signed-off-by: Bodo Stroesser Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/os-Linux/start_up.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'arch/um') diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index a3eab25..a8b5b9d 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -275,6 +275,30 @@ void os_early_checks(void) check_ptrace(); } +static int __init noprocmm_cmd_param(char *str, int* add) +{ + proc_mm = 0; + return 0; +} + +__uml_setup("noprocmm", noprocmm_cmd_param, +"noprocmm\n" +" Turns off usage of /proc/mm, even if host supports it.\n" +" To support /proc/mm, the host needs to be patched using\n" +" the current skas3 patch.\n\n"); + +static int __init noptracefaultinfo_cmd_param(char *str, int* add) +{ + ptrace_faultinfo = 0; + return 0; +} + +__uml_setup("noptracefaultinfo", noptracefaultinfo_cmd_param, +"noptracefaultinfo\n" +" Turns off usage of PTRACE_FAULTINFO, even if host supports\n" +" it. To support PTRACE_FAULTINFO, the host needs to be patched\n" +" using the current skas3 patch.\n\n"); + #ifdef UML_CONFIG_MODE_SKAS static inline void check_skas3_ptrace_support(void) { -- cgit v1.1 From f9dfefe423a7633d81310c7b06c5566c74f9167b Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Sat, 3 Sep 2005 15:57:51 -0700 Subject: [PATCH] uml: fix advanced sysemu check cleanup and fix the check for advanced sysemu (PTRACE_SYSEMU_SINGLESTEP option) Signed-off-by: Bodo Stroesser Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/os-Linux/start_up.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'arch/um') diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index a8b5b9d..040cc14 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -161,7 +161,7 @@ __uml_setup("nosysemu", nosysemu_cmd_param, static void __init check_sysemu(void) { void *stack; - int pid, syscall, n, status, count=0; + int pid, n, status, count=0; printk("Checking syscall emulation patch for ptrace..."); sysemu_supported = 0; @@ -192,6 +192,12 @@ static void __init check_sysemu(void) printk("Checking advanced syscall emulation patch for ptrace..."); pid = start_ptraced_child(&stack); + + if(ptrace(PTRACE_OLDSETOPTIONS, pid, 0, + (void *) PTRACE_O_TRACESYSGOOD) < 0) + panic("check_ptrace: PTRACE_OLDSETOPTIONS failed, errno = %d", + errno); + while(1){ count++; if(ptrace(PTRACE_SYSEMU_SINGLESTEP, pid, 0, 0) < 0) @@ -199,15 +205,10 @@ static void __init check_sysemu(void) CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); if(n < 0) panic("check_ptrace : wait failed, errno = %d", errno); - if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP)) - panic("check_ptrace : expected (SIGTRAP|SYSCALL_TRAP), " - "got status = %d", status); - - syscall = ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_NR_OFFSET, - 0); - if(syscall == __NR_getpid){ + if(WIFSTOPPED(status) && (WSTOPSIG(status) == (SIGTRAP|0x80))){ if (!count) - panic("check_ptrace : SYSEMU_SINGLESTEP doesn't singlestep"); + panic("check_ptrace : SYSEMU_SINGLESTEP " + "doesn't singlestep"); n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET, os_getpid()); if(n < 0) @@ -215,6 +216,11 @@ static void __init check_sysemu(void) "call return, errno = %d", errno); break; } + else if(WIFSTOPPED(status) && (WSTOPSIG(status) == SIGTRAP)) + count++; + else + panic("check_ptrace : expected SIGTRAP or " + "(SIGTRAP|0x80), got status = %d", status); } if (stop_ptraced_child(pid, stack, 0, 0) < 0) goto fail_stopped; @@ -250,8 +256,8 @@ static void __init check_ptrace(void) CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); if(n < 0) panic("check_ptrace : wait failed, errno = %d", errno); - if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP + 0x80)) - panic("check_ptrace : expected SIGTRAP + 0x80, " + if(!WIFSTOPPED(status) || (WSTOPSIG(status) != (SIGTRAP|0x80))) + panic("check_ptrace : expected (SIGTRAP|0x80), " "got status = %d", status); syscall = ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_NR_OFFSET, -- cgit v1.1 From 7ef939054139ef857cebbec07cbd12d7cf7beedd Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Sat, 3 Sep 2005 15:57:52 -0700 Subject: [PATCH] uml: fix x86_64 page leak We were leaking pmd pages when 3_LEVEL_PGTABLES was enabled. This fixes that. Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/skas/include/mmu-skas.h | 4 ++++ arch/um/kernel/skas/mmu.c | 9 ++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/um') diff --git a/arch/um/kernel/skas/include/mmu-skas.h b/arch/um/kernel/skas/include/mmu-skas.h index 278b72f..09536f8 100644 --- a/arch/um/kernel/skas/include/mmu-skas.h +++ b/arch/um/kernel/skas/include/mmu-skas.h @@ -6,11 +6,15 @@ #ifndef __SKAS_MMU_H #define __SKAS_MMU_H +#include "linux/config.h" #include "mm_id.h" struct mmu_context_skas { struct mm_id id; unsigned long last_page_table; +#ifdef CONFIG_3_LEVEL_PGTABLES + unsigned long last_pmd; +#endif }; extern void switch_mm_skas(struct mm_id * mm_idp); diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index d837223..240143b 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -56,6 +56,9 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc, */ mm->context.skas.last_page_table = pmd_page_kernel(*pmd); +#ifdef CONFIG_3_LEVEL_PGTABLES + mm->context.skas.last_pmd = (unsigned long) __va(pud_val(*pud)); +#endif *pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT)); *pte = pte_mkexec(*pte); @@ -144,6 +147,10 @@ void destroy_context_skas(struct mm_struct *mm) if(!proc_mm || !ptrace_faultinfo){ free_page(mmu->id.stack); - free_page(mmu->last_page_table); + pte_free_kernel((pte_t *) mmu->last_page_table); + dec_page_state(nr_page_table_pages); +#ifdef CONFIG_3_LEVEL_PGTABLES + pmd_free((pmd_t *) mmu->last_pmd); +#endif } } -- cgit v1.1 From 97de50c0add1e8f3b4e764c66a13c07235fee631 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Tue, 6 Sep 2005 15:17:49 -0700 Subject: [PATCH] remove verify_area(): remove verify_area() from various uaccess.h headers Remove the deprecated (and unused) verify_area() from various uaccess.h headers. Signed-off-by: Jesper Juhl Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/include/um_uaccess.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/um') diff --git a/arch/um/include/um_uaccess.h b/arch/um/include/um_uaccess.h index 6e348cb..84c0868 100644 --- a/arch/um/include/um_uaccess.h +++ b/arch/um/include/um_uaccess.h @@ -20,13 +20,6 @@ #define access_ok(type, addr, size) \ CHOOSE_MODE_PROC(access_ok_tt, access_ok_skas, type, addr, size) -/* this function will go away soon - use access_ok() instead */ -static inline int __deprecated verify_area(int type, const void __user *addr, unsigned long size) -{ - return (CHOOSE_MODE_PROC(verify_area_tt, verify_area_skas, type, addr, - size)); -} - static inline int copy_from_user(void *to, const void __user *from, int n) { return(CHOOSE_MODE_PROC(copy_from_user_tt, copy_from_user_skas, to, -- cgit v1.1 From 90dffc03ca8caa0ea047f11c81fe61cd86d80568 Mon Sep 17 00:00:00 2001 From: "viro@ZenIV.linux.org.uk" Date: Tue, 6 Sep 2005 02:02:22 +0100 Subject: [PATCH] lost chunk of "uml: build cleanups" A piece of the UML stubs patch got lost - it has Killed STUBS_CFLAGS - it's not needed and the only remaining use had been gratitious - it only polluted CFLAGS in description and does remove it in arch/um/Makefile-x86_64, but forgets to do the same in i386 counterpart. Lost chunk follows: Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/um/Makefile-i386 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/um') diff --git a/arch/um/Makefile-i386 b/arch/um/Makefile-i386 index a777e57..1ab431a 100644 --- a/arch/um/Makefile-i386 +++ b/arch/um/Makefile-i386 @@ -27,7 +27,7 @@ export LDFLAGS HOSTCFLAGS HOSTLDFLAGS UML_OBJCOPYFLAGS endif endif -CFLAGS += -U__$(SUBARCH)__ -U$(SUBARCH) $(STUB_CFLAGS) +CFLAGS += -U__$(SUBARCH)__ -U$(SUBARCH) ifneq ($(CONFIG_GPROF),y) ARCH_CFLAGS += -DUM_FASTCALL -- cgit v1.1 From 95608261dae863bc43292e6fbd946a3abd3aa49f Mon Sep 17 00:00:00 2001 From: "viro@ZenIV.linux.org.uk" Date: Tue, 6 Sep 2005 22:33:51 +0100 Subject: [PATCH] bogus symbol used in arch/um/os-Linux/elf_aux.c elf_aux is userland code; it uses symbol (ELF_CLASS) that doesn't exist in userland headers; pulled into kernel-offsets.h, switched elf_aux to using it. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/um/include/common-offsets.h | 1 + arch/um/os-Linux/Makefile | 3 +++ arch/um/os-Linux/elf_aux.c | 3 ++- arch/um/sys-i386/kernel-offsets.c | 1 + arch/um/sys-x86_64/kernel-offsets.c | 1 + 5 files changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/um') diff --git a/arch/um/include/common-offsets.h b/arch/um/include/common-offsets.h index d705daa..0aa6209 100644 --- a/arch/um/include/common-offsets.h +++ b/arch/um/include/common-offsets.h @@ -12,3 +12,4 @@ DEFINE_STR(UM_KERN_WARNING, KERN_WARNING); DEFINE_STR(UM_KERN_NOTICE, KERN_NOTICE); DEFINE_STR(UM_KERN_INFO, KERN_INFO); DEFINE_STR(UM_KERN_DEBUG, KERN_DEBUG); +DEFINE(HOST_ELF_CLASS, ELF_CLASS); diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile index d3c1560..7a16624 100644 --- a/arch/um/os-Linux/Makefile +++ b/arch/um/os-Linux/Makefile @@ -9,6 +9,9 @@ obj-y = aio.o elf_aux.o file.o process.o signal.o start_up.o time.o tt.o \ USER_OBJS := aio.o elf_aux.o file.o process.o signal.o start_up.o time.o tt.o \ tty.o +elf_aux.o: $(ARCH_DIR)/kernel-offsets.h +CFLAGS_elf_aux.o += -I$(objtree)/arch/um + CFLAGS_user_syms.o += -DSUBARCH_$(SUBARCH) HAVE_AIO_ABI := $(shell [ -r /usr/include/linux/aio_abi.h ] && \ diff --git a/arch/um/os-Linux/elf_aux.c b/arch/um/os-Linux/elf_aux.c index 4cca3e9..1399520 100644 --- a/arch/um/os-Linux/elf_aux.c +++ b/arch/um/os-Linux/elf_aux.c @@ -12,8 +12,9 @@ #include "init.h" #include "elf_user.h" #include "mem_user.h" +#include -#if ELF_CLASS == ELFCLASS32 +#if HOST_ELF_CLASS == ELFCLASS32 typedef Elf32_auxv_t elf_auxv_t; #else typedef Elf64_auxv_t elf_auxv_t; diff --git a/arch/um/sys-i386/kernel-offsets.c b/arch/um/sys-i386/kernel-offsets.c index 9f8ecd1..a1070af 100644 --- a/arch/um/sys-i386/kernel-offsets.c +++ b/arch/um/sys-i386/kernel-offsets.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #define DEFINE(sym, val) \ diff --git a/arch/um/sys-x86_64/kernel-offsets.c b/arch/um/sys-x86_64/kernel-offsets.c index 220e875..998541e 100644 --- a/arch/um/sys-x86_64/kernel-offsets.c +++ b/arch/um/sys-x86_64/kernel-offsets.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #define DEFINE(sym, val) \ -- cgit v1.1 From 9a0b3869bbf7cc66ee668515d4852c729158c0ca Mon Sep 17 00:00:00 2001 From: "viro@ZenIV.linux.org.uk" Date: Wed, 7 Sep 2005 23:21:11 +0100 Subject: [PATCH] bogus #if (arch/um/kernel/mem.c) Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/um/kernel/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/um') diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index 5597bd3..64fa062 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -196,7 +196,7 @@ static void init_highmem(void) static void __init fixaddr_user_init( void) { -#if CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA +#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA long size = FIXADDR_USER_END - FIXADDR_USER_START; pgd_t *pgd; pud_t *pud; -- cgit v1.1